From 1ebc3636346a01f6c5709099d34c57f2cb00d7ea Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Thu, 8 Aug 2024 11:27:54 +0200 Subject: [PATCH 001/145] initial commit --- .../HPC chatbot preprocessor/.idea/.gitignore | 8 + .../.idea/HPC chatbot preprocessor.iml | 10 + .../inspectionProfiles/Project_Default.xml | 25 ++ .../inspectionProfiles/profiles_settings.xml | 6 + .../HPC chatbot preprocessor/.idea/misc.xml | 7 + .../.idea/modules.xml | 8 + .../HPC chatbot preprocessor/.idea/vcs.xml | 6 + scripts/HPC chatbot preprocessor/main.py | 375 ++++++++++++++++++ .../HPC chatbot preprocessor/start_checker.py | 17 + 9 files changed, 462 insertions(+) create mode 100644 scripts/HPC chatbot preprocessor/.idea/.gitignore create mode 100644 scripts/HPC chatbot preprocessor/.idea/HPC chatbot preprocessor.iml create mode 100644 scripts/HPC chatbot preprocessor/.idea/inspectionProfiles/Project_Default.xml create mode 100644 scripts/HPC chatbot preprocessor/.idea/inspectionProfiles/profiles_settings.xml create mode 100644 scripts/HPC chatbot preprocessor/.idea/misc.xml create mode 100644 scripts/HPC chatbot preprocessor/.idea/modules.xml create mode 100644 scripts/HPC chatbot preprocessor/.idea/vcs.xml create mode 100644 scripts/HPC chatbot preprocessor/main.py create mode 100644 scripts/HPC chatbot preprocessor/start_checker.py diff --git a/scripts/HPC chatbot preprocessor/.idea/.gitignore b/scripts/HPC chatbot preprocessor/.idea/.gitignore new file mode 100644 index 000000000000..13566b81b018 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/scripts/HPC chatbot preprocessor/.idea/HPC chatbot preprocessor.iml b/scripts/HPC chatbot preprocessor/.idea/HPC chatbot preprocessor.iml new file mode 100644 index 000000000000..2c80e1269497 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/.idea/HPC chatbot preprocessor.iml @@ -0,0 +1,10 @@ + + + + + + + + + + \ No newline at end of file diff --git a/scripts/HPC chatbot preprocessor/.idea/inspectionProfiles/Project_Default.xml b/scripts/HPC chatbot preprocessor/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 000000000000..fc946d9cefc8 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,25 @@ + + + + \ No newline at end of file diff --git a/scripts/HPC chatbot preprocessor/.idea/inspectionProfiles/profiles_settings.xml b/scripts/HPC chatbot preprocessor/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 000000000000..105ce2da2d64 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/scripts/HPC chatbot preprocessor/.idea/misc.xml b/scripts/HPC chatbot preprocessor/.idea/misc.xml new file mode 100644 index 000000000000..54cda8fd6dd9 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/.idea/misc.xml @@ -0,0 +1,7 @@ + + + + + + \ No newline at end of file diff --git a/scripts/HPC chatbot preprocessor/.idea/modules.xml b/scripts/HPC chatbot preprocessor/.idea/modules.xml new file mode 100644 index 000000000000..58e027d745f9 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/scripts/HPC chatbot preprocessor/.idea/vcs.xml b/scripts/HPC chatbot preprocessor/.idea/vcs.xml new file mode 100644 index 000000000000..b2bdec2d71b6 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/scripts/HPC chatbot preprocessor/main.py b/scripts/HPC chatbot preprocessor/main.py new file mode 100644 index 000000000000..7f74fb12fb25 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/main.py @@ -0,0 +1,375 @@ +import os +import re +import shutil + +# test_number = int(input("Which test should be run?")) +# +# # Test for strip_markdown (somewhat successful, see findings file) +# +# if test_number == 1: +# import strip_markdown +# +# strip_markdown.strip_markdown_file("C:\\HPC werk\\Chatbot\\md_to_plaintext_test.md") +# +# # Test if copy of document doesn't change original document (successful) +# if test_number == 2: +# import shutil +# +# shutil.copyfile("C:\\HPC_werk\\Chatbot\\md_to_plaintext_test.txt", +# "C:\\HPC_werk\\Chatbot\\md_to_plaintext_test_copy.txt") +# with open("C:\\HPC_werk\\Chatbot\\md_to_plaintext_test_copy.txt", 'w') as file: +# file.write('hello') + +# Test with actual document + +# make a copy of one of the md files to test some things +shutil.copyfile("C:\\HPC_werk\\Documentation\\local\\vsc_user_docs\\mkdocs\\docs\\HPC\\getting_started.md", + "C:\\HPC_werk\\Chatbot\\getting_started_copy.md") + +################### define global variables ################### +# variable for the filename (which will be changed into something else in the final version) +filename = "getting_started_copy.md" + +# variable for the main title (needed for reference links) +main_title = filename[:-3] + +# variable that keeps track of the directories that are used to write in at different levels +root_dir_generic = "C:\\HPC_werk\\Chatbot\\parsed_mds\\generic\\" +root_dir_os_specific_linux = "C:\\HPC_werk\\Chatbot\\parsed_mds\\os_specific\\linux\\" +root_dir_os_specific_windows = "C:\\HPC_werk\\Chatbot\\parsed_mds\\os_specific\\windows\\" +root_dir_os_specific_macos = "C:\\HPC_werk\\Chatbot\\parsed_mds\\os_specific\\macos\\" +curr_dirs = [filename[:-3] for i in range(4)] + +# variable to keep track whether we're dealing with OS-specific info or not +OS_specific = False + +# pattern for the regex if-statement to filter out markdown titles +if_pattern = r'^#+ ' + +# variable that keeps track of the latest non-zero level title and corresponding directory +last_title_level = 1 +last_title = None +last_directory = None +last_was_title = False + +# list to keep track of links in the text +links_generic = [] +links_linux = [] +links_windows = [] +links_macos = [] + +# dictionaries to keep track of current OS and location +active_OS_if_states = {"linux": "inactive", "windows": "inactive", "macos": "inactive"} +active_site_if_states = {"Gent": "inactive", "not-Gent": "inactive"} + +# variable to keep track of the type of if-statement +if_type = "OS" + +# variable to keep track of the macro-replacements at the top of markdown files +replacements = {} + +# variable that is used to detect whether the first title has been encountered yet +after_first_title = False + + +################### define functions ################### + +# function that removes the previous file structure before starting the process of making a new one +def remove_directory_tree(old_directory): + if os.path.exists(old_directory): + shutil.rmtree(old_directory) + + +# function that checks the first lines of a file until a title is found and saves the macro-replacements to the list +def save_replacements(curr_line): + global replacements + match = re.search(r'\{% set (.*?)="(.*?)" %}', curr_line) + replacements[match.group(1)] = match.group(2) + + +# function that checks whether the current line has a title of level 3 at maximum (returns the level of the title or 0 if the line is not a title) +def check_for_title_logic(curr_line): + global curr_dirs + match = re.match(if_pattern, curr_line) + if match and len(match.group(0)) <= 4: + return len(match.group(0)) - 1 + else: + return 0 + + +# function that resets the contents of the link_lists +def reset_link_lists(): + global links_generic, links_linux, links_windows, links_macos + links_generic = [] + links_linux = [] + links_windows = [] + links_macos = [] + + +# function that uses the check_for_title_logic function to create the appropriate directories and update the necessary variables +def check_for_title(curr_line): + global curr_dirs, last_title + logic_output = check_for_title_logic(curr_line) + if logic_output == 0: + return 0, None, None + else: + if last_title is not None: + write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic) + write_end_of_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", "Linux", links_linux) + write_end_of_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", "Windows", links_windows) + write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS", links_macos) + reset_link_lists() + + curr_dirs[logic_output] = curr_dirs[logic_output - 1] + "\\" + curr_line[logic_output + 1:-1].replace(' ', '-') + + create_directory(root_dir_generic + curr_dirs[logic_output]) + create_directory(root_dir_os_specific_linux + curr_dirs[logic_output]) + create_directory(root_dir_os_specific_windows + curr_dirs[logic_output]) + create_directory(root_dir_os_specific_macos + curr_dirs[logic_output]) + + update_lower_curr_dir(curr_dirs[logic_output], logic_output) + return logic_output, curr_line[logic_output + 1:-1].replace(' ', '-'), curr_dirs[logic_output] + + +# function that creates directories if needed +def create_directory(new_directory): + if not os.path.exists(new_directory): + os.mkdir(new_directory) + + +# function that updates the curr_dir variables when needed +def update_lower_curr_dir(curr_directory, level): + global curr_dirs + for i in range(level + 1, 4): + curr_dirs[i] = curr_directory + + +# function that replaces certain markdown structures with the equivalent used on the website +def replace_markdown_markers(curr_line, linklist): + + # replace {{hpcinfra}} + curr_line = re.sub(r'\{\{\s*hpcinfra\s*}}', "HPC-UGent infrastructure", curr_line) + + # replace other replacement macros + for macro in replacements.keys(): + curr_line = re.sub(r'\{\{\s*' + re.escape(macro) + r'\s*}}', replacements[macro], curr_line) + + # replace links with a reference + matches = re.findall(r'\[(.*?)]\((.*?)\)', curr_line) + if matches: + for match in matches: + print(f"[{match[0]}]({match[1]})") + curr_line = curr_line.replace(f"[{match[0]}]({match[1]})", match[0] + "[" + str(len(linklist) + 1) + "]") + linklist.append(match[1]) + + return curr_line, linklist + + +# function that checks for if-statements +def check_if_statements(curr_line): + global if_type + + # check whether the first part of the line contains information wrt if-statements + match = re.search(r'^\{%-\s([^%]*)%}(.*)', curr_line) + + # check whether the line contains information wrt if-statements that is not in its first part + match_large = re.search(r'^(.*)(\{%-\s[^%]*%})(.*)', curr_line) + + if match: + print("################################################################################") + content = match.group(1) + print(content) + + # new if-statement wrt OS + if re.match(r'if OS == ', content): + OS = content[9:-1] + + # set new active OS + active_OS_if_states[OS] = "active" + + # set other active ones on inactive + for other_OS in active_OS_if_states.keys(): + if other_OS != OS and active_OS_if_states[other_OS] == "active": + active_OS_if_states[other_OS] = "inactive" + + if_type = "OS" + + # new if-statement wrt site + elif re.match(r'if site == ', content): + if re.search(r'(?i)gent', content): + active_site_if_states["Gent"] = "active" + active_site_if_states["not-Gent"] = "inactive" + else: + active_site_if_states["not-Gent"] = "active" + if active_site_if_states["Gent"] == "active": + active_site_if_states["Gent"] = "inactive" + if_type = "site" + + # endif statement wrt OS + elif re.match(r'endif ', content) and if_type == "OS": + if str(1) in active_OS_if_states.values(): + active_OS_if_states[list(active_OS_if_states.keys())[list(active_OS_if_states.values()).index(str(1))]] = "active" + else: + for key in active_OS_if_states.keys(): + active_OS_if_states[key] = "inactive" + + # endif statement wrt site + elif re.match(r'endif ', content) and if_type == "site": + for key in active_site_if_states.keys(): + active_site_if_states[key] = "inactive" + + # else statement wrt OS + elif re.match(r'else ', content) and if_type == "OS": + + i = 0 + for i in range(3): + if str(i) not in active_OS_if_states.values(): + break + + # set the previously active one on inactive until the next endif + key_list = list(active_OS_if_states.keys()) + position = list(active_OS_if_states.values()).index("active") + active_OS_if_states[key_list[position]] = str(i) + + # set inactive ones on active + while "inactive" in active_OS_if_states.values(): + position = list(active_OS_if_states.values()).index("inactive") + active_OS_if_states[key_list[position]] = "active" + + # else statement wrt site + elif re.match(r'else ', content) and if_type == "site": + + # change state of "Gent" and set not-Gent on active + if active_site_if_states["Gent"] == "inactive": + active_site_if_states["Gent"] = "active" + elif active_site_if_states["Gent"] == "active": + active_site_if_states["Gent"] = str(0) + active_site_if_states["not-Gent"] = "active" + + print(active_OS_if_states) + print(active_site_if_states) + + if len(match.group(2)) != 0: + extra_message = match.group(2).lstrip() + print(extra_message) + # check_if_statements(extra_message) + print("check_extra_message") + return "check_extra_message", extra_message, None + + else: + print("done") + return "done", None, None + + elif match_large: + print("################################################################################") + print(active_OS_if_states) + print(active_site_if_states) + print(match_large.group(1)) + print(match_large.group(2)) + print("write_text_and_check_extra_message") + return "write_text_and_check_extra_message", match_large.group(2), match_large.group(1) + + else: + return "write_text", None, curr_line + + +# function that writes a line to a file +def write_text_to_file(file_name, curr_line): + global links_generic, links_linux, links_windows, links_macos + with open(file_name, "a") as write_file: + if "generic" in file_name: + curr_line, links_generic = replace_markdown_markers(curr_line, links_generic) + elif "linux" in file_name: + curr_line, links_linux = replace_markdown_markers(curr_line, links_linux) + elif "windows" in file_name: + curr_line, links_windows = replace_markdown_markers(curr_line, links_windows) + else: + curr_line, links_macos = replace_markdown_markers(curr_line, links_macos) + write_file.write(curr_line) + + +# function that decides what file to write text to +def choose_and_write_to_file(curr_line): + # check that the line is part of the website for gent + if active_site_if_states["Gent"] == "active" or active_site_if_states["Gent"] == "inactive" and active_site_if_states["not-Gent"] == "inactive": + if active_OS_if_states["linux"] == "inactive" and active_OS_if_states["windows"] == "inactive" and active_OS_if_states["macos"] == "inactive": + write_text_to_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", curr_line) + if active_OS_if_states["linux"] == "active": + write_text_to_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", curr_line) + if active_OS_if_states["windows"] == "active": + write_text_to_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", curr_line) + if active_OS_if_states["macos"] == "active": + write_text_to_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", curr_line) + + +# function that adds a reference link at the end of every txt file +def add_reference_link(file_location, reference_link): + with open(file_location, 'a') as write_file: + write_file.write("\nreference: " + reference_link + "\n") + + +# function that adds the links that should be at the end of a file +def write_end_of_file(file_location, OS, linklist): + if len(OS) > 0: + OS = OS + "/" + + # add the links from within the document + with open(file_location, 'a') as write_file: + for i, link in enumerate(linklist): + write_file.write("[" + str(i + 1) + "]: " + str(link) + "\n") + + # finally add the reference link + add_reference_link(file_location, "docs.hpc.ugent.be/" + OS + main_title + "/#" + last_title.lower()) + + +################### actually parse the md file ################### + +# remove the old directories if needed +remove_directory_tree(root_dir_generic) +remove_directory_tree(root_dir_os_specific_linux) +remove_directory_tree(root_dir_os_specific_windows) +remove_directory_tree(root_dir_os_specific_macos) + +# create directories for the source markdown file +create_directory(root_dir_generic) +create_directory(root_dir_os_specific_linux) +create_directory(root_dir_os_specific_windows) +create_directory(root_dir_os_specific_macos) +create_directory(root_dir_generic + curr_dirs[0]) +create_directory(root_dir_os_specific_linux + curr_dirs[0]) +create_directory(root_dir_os_specific_windows + curr_dirs[0]) +create_directory(root_dir_os_specific_macos + curr_dirs[0]) + +# open the file and store line by line in the right file +with open("C:\\HPC_werk\\Chatbot\\getting_started_copy.md", 'r') as readfile: + + for line in readfile: + title_level, title, directory = check_for_title(line) + + # line is a title with a maximum depth of 3 + if title_level > 0: + last_title_level = title_level + last_title = title + last_directory = directory + after_first_title = True + + # line is not a title + else: + if after_first_title: + # check for if-statements and write the appropriate lines in the right files + next_action = check_if_statements(line) + while next_action[0] == "write_text_and_check_extra_message" or next_action[0] == "check_extra_message": + if next_action[0] == "write_text_and_check_extra_message": + choose_and_write_to_file(next_action[2]) + next_action = check_if_statements(next_action[1]) + + if next_action[0] == "write_text": + choose_and_write_to_file(next_action[2]) + else: + save_replacements(line) + +# write end of file for the last file +write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic) +write_end_of_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", "Linux", links_linux) +write_end_of_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", "Windows", links_windows) +write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS", links_macos) diff --git a/scripts/HPC chatbot preprocessor/start_checker.py b/scripts/HPC chatbot preprocessor/start_checker.py new file mode 100644 index 000000000000..50b61cd52133 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/start_checker.py @@ -0,0 +1,17 @@ +import os + +directory = "C:\\HPC_werk\\Documentation\\local\\vsc_user_docs\\mkdocs\\docs\\HPC" + +for dirpath, dirnames, filenames in os.walk(directory): + for filename in filenames: + # if filename.endswith("xdmod.md"): + # break + if filename.endswith(".md"): + lines_until_title = 0 + with open(directory + "\\" + filename, "r") as file: + for line in file: + if line[0] == "#": + break + lines_until_title += 1 + print(filename + " : " + str(lines_until_title)) + break From 10edb2050da2ffc7412c7123c190cb4991682daf Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Thu, 8 Aug 2024 17:09:18 +0200 Subject: [PATCH 002/145] some cleanup --- .../copies/getting_started_copy.md | 268 ++++++++++++++++++ .../Getting-Access/Getting-Access.txt | 25 ++ .../Getting-Connected/Getting-Connected.txt | 19 ++ .../Getting-Started/Getting-Started.txt | 11 + .../Inspect-your-results.txt | 56 ++++ .../Getting-Started/Next-steps/Next-steps.txt | 15 + .../Submitting-a-job/Submitting-a-job.txt | 60 ++++ .../Transfer-your-files.txt | 21 ++ .../Wait-for-job-to-be-executed.txt | 26 ++ .../Getting-Access/Getting-Access.txt | 2 + .../Getting-Connected/Getting-Connected.txt | 18 ++ .../Getting-Started/Getting-Started.txt | 2 + .../Inspect-your-results.txt | 2 + .../Getting-Started/Next-steps/Next-steps.txt | 2 + .../Submitting-a-job/Submitting-a-job.txt | 2 + .../Transfer-your-files.txt | 21 ++ .../Wait-for-job-to-be-executed.txt | 2 + .../Getting-Access/Getting-Access.txt | 2 + .../Getting-Connected/Getting-Connected.txt | 13 + .../Getting-Started/Getting-Started.txt | 2 + .../Inspect-your-results.txt | 2 + .../Getting-Started/Next-steps/Next-steps.txt | 2 + .../Submitting-a-job/Submitting-a-job.txt | 2 + .../Transfer-your-files.txt | 21 ++ .../Wait-for-job-to-be-executed.txt | 2 + .../Getting-Access/Getting-Access.txt | 2 + .../Getting-Connected/Getting-Connected.txt | 13 + .../Getting-Started/Getting-Started.txt | 2 + .../Inspect-your-results.txt | 2 + .../Getting-Started/Next-steps/Next-steps.txt | 2 + .../Submitting-a-job/Submitting-a-job.txt | 2 + .../Transfer-your-files.txt | 15 + .../Wait-for-job-to-be-executed.txt | 2 + .../HPC chatbot preprocessor/jinja_parser.py | 3 + scripts/HPC chatbot preprocessor/main.py | 26 +- .../HPC chatbot preprocessor/start_checker.py | 2 + 36 files changed, 662 insertions(+), 7 deletions(-) create mode 100644 scripts/HPC chatbot preprocessor/copies/getting_started_copy.md create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Started.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Started.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Started.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Started.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt create mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt create mode 100644 scripts/HPC chatbot preprocessor/jinja_parser.py diff --git a/scripts/HPC chatbot preprocessor/copies/getting_started_copy.md b/scripts/HPC chatbot preprocessor/copies/getting_started_copy.md new file mode 100644 index 000000000000..8fe33ebc513d --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/getting_started_copy.md @@ -0,0 +1,268 @@ +{% set exampleloc="mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist" %} +# Getting Started + +Welcome to the "Getting Started" guide. This chapter will lead you through the initial steps of logging into the {{hpcinfra}} and submitting your very first job. We'll also walk you through the process step by step using a practical example. + +In addition to this chapter, you might find the [recording of the *Introduction to HPC-UGent* training session](https://www.ugent.be/hpc/en/training/introhpcugent-recording) to be a useful resource. + +Before proceeding, read [the introduction to HPC](introduction.md) to gain an understanding of the {{ hpcinfra }} and related terminology. + +### Getting Access + +To get access to the {{hpcinfra}}, visit [Getting an HPC Account](account.md). + +If you have not used Linux before, +{%- if site == 'Gent' %} +now would be a good time to follow our [Linux Tutorial](linux-tutorial/index.md). +{%- else %} +please learn some basics first before continuing. (see [Appendix C - Useful Linux Commands](useful_linux_commands.md)) +{%- endif %} + +#### A typical workflow looks like this: + +1. Connect to the login nodes +2. Transfer your files to the {{hpcinfra}} +3. Optional: compile your code and test it +4. Create a job script and submit your job +5. Wait for job to be executed +6. Study the results generated by your jobs, either on the cluster or + after downloading them locally. + +We will walk through an illustrative workload to get you started. In this example, our objective is to train a deep learning model for recognizing hand-written digits (MNIST dataset) using [TensorFlow](https://www.tensorflow.org/); +see the [example scripts](https://github.com/hpcugent/vsc_user_docs/tree/main/{{exampleloc}}). + +### Getting Connected + +There are two options to connect + +- Using a terminal to connect via SSH (for power users) (see [First Time connection to the {{ hpcinfra}}](connecting.md#first-time-connection-to-the-hpc-infrastructure)) +- [Using the web portal](web_portal.md) + +Considering your operating system is **{{OS}}**, + +{%- if OS == linux %} +it is recommended to make use of the `ssh` command in a terminal to get the most flexibility. + +Assuming you have already generated SSH keys in the previous step ([Getting Access](#getting-access)), and that they are in a default location, you should now be able to login by running the following command: + +
ssh {{userid}}@{{loginnode}}
+ +!!! Warning "User your own VSC account id" + + Replace {{userid}} with your VSC account id (see ) + +!!! Tip + + You can also still use the web portal (see [shell access on web portal](web_portal.md#shell-access)) + +{%- else %} +{%- if OS == windows %} it is recommended to use the web portal. +{%- else %} it should be easy to make use of the `ssh` command in a terminal, but the web portal will work too. {%- endif %} + +The [web portal](web_portal.md) offers a convenient way to upload files and gain shell access to the {{hpcinfra}} from a standard web browser (no software installation or configuration required). + +See [shell access](web_portal.md#shell-access) when using the web portal, or +[connection to the {{hpcinfra}}](connecting.md#first-time-connection-to-the-hpc-infrastructure) when using a terminal. + +Make sure you can get to a shell access to the {{hpcinfra}} before proceeding with the next steps. + +{%- endif %} + +!!! Info + + When having problems see the [connection issues section on the troubleshooting page](troubleshooting.md#sec:connecting-issues). + + +### Transfer your files + +Now that you can login, it is time to transfer files from your local computer to your **home directory** on the {{hpcinfra}}. + +Download [tensorflow_mnist.py](https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/{{exampleloc}}/tensorflow_mnist.py) +and [run.sh](https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/{{exampleloc}}/run.sh) example scripts to your computer (from [here](https://github.com/hpcugent/vsc_user_docs/tree/main/{{exampleloc}})). + +{%- if OS == windows %} + +The [HPC-UGent web portal](https://login.hpc.ugent.be) provides a file browser that allows uploading files. +For more information see the [file browser section](web_portal.md#file-browser). + +Upload both files (`run.sh` and `tensorflow-mnist.py`) to your **home directory** and go back to your shell. + +!!! Info + + As an alternative, you can use WinSCP (see [our section](connecting.md#winscp)) + +{%- else %} + +On your local machine you can run: +
curl -OL https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/{{exampleloc}}/tensorflow_mnist.py
+curl -OL https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/{{exampleloc}}/run.sh
+
+ +Using the `scp` command, the files can be copied from your local host to your *home directory* (`~`) on the remote host (HPC). +
scp tensorflow_mnist.py run.sh {{userid}}{{ loginnode }}:~ 
+
ssh  {{userid}}@{{ loginnode }} 
+ +!!! Warning "User your own VSC account id" + + Replace {{userid}} with your VSC account id (see ) + +!!! Info + + For more information about transfering files or `scp`, see [tranfer files from/to hpc](connecting.md#transfer-files-tofrom-the-hpc). + +{%- endif %} + +When running `ls` in your session on the {{hpcinfra}}, you should see the two files listed in your home directory (`~`): + +```shell +$ ls ~ +run.sh tensorflow_mnist.py +``` + +When you do not see these files, make sure you uploaded the files to your **home directory**. + +### Submitting a job + +Jobs are submitted and executed using job scripts. In our case **run.sh** can be used as a (very minimal) job script. + +A job script is a shell script, a text file that specifies the resources, +the software that is used (via `module load` statements), +and the steps that should be executed to run the calculation. + +Our job script looks like this: + +
-- run.sh --
+ +```bash +#!/bin/bash + +module load TensorFlow/2.11.0-foss-2022a + +python tensorflow_mnist.py + +``` +As you can see this job script will run the Python script named **tensorflow_mnist.py**. + + +The jobs you submit are per default executed on **cluser/{{defaultcluster}}**, you can swap to another cluster by issuing the following command. + +```shell +module swap cluster/{{othercluster}} +``` + +!!! Tip + + When submitting jobs with limited amount of resources, it is recommended to use the [debug/interactive cluster](interactive_debug.md#interactive-and-debug-cluster): `donphan`. + +{%- if site == 'Gent' %} + + To get a list of all clusters and their hardware, see . + +{%- endif %} + +This job script can now be submitted to the cluster's job system for execution, using the qsub (**q**ueue **sub**mit) command: + +```shell +$ qsub run.sh +{{jobid}} +``` + +This command returns a job identifier (*{{jobid}}*) on the HPC cluster. This is a unique identifier for the job which can be used to monitor and manage your job. + +!!! Warning "Make sure you understand what the `module` command does" + + Note that the module commands only modify environment variables. For instance, running `module swap cluster/{{othercluster}}` will update your shell environment so that `qsub` submits a job to the `{{othercluster}}` cluster, + but our active shell session is still running on the login node. + + It is important to understand that while `module` commands affect your session environment, they do ***not*** change where the commands your are running are being executed: they will still be run on the login node you are on. + + When you submit a job script however, the commands ***in*** the job script will be run on a workernode of the cluster the job was submitted to (like `{{othercluster}}`). + +For detailed information about `module` commands, read the [running batch jobs](running_batch_jobs.md) chapter. + +### Wait for job to be executed + +Your job is put into a queue before being executed, so it may take a while before it actually starts. +(see [when will my job start?](running_batch_jobs.md#when-will-my-job-start) for scheduling policy). + +You can get an overview of the active jobs using the `qstat` command: +
$ qstat
+Job ID     Name             User            Time Use S Queue
+---------- ---------------- --------------- -------- - -------
+{{jobid}}     run.sh           {{userid}}        0:00:00  Q {{othercluster}}
+
+ +Eventually, after entering `qstat` again you should see that your job has started running: +
$ qstat
+Job ID     Name             User            Time Use S Queue
+---------- ---------------- --------------- -------- - -------
+{{jobid}}     run.sh           {{userid}}        0:00:01  R {{othercluster}}
+
+ +If you don't see your job in the output of the `qstat` command anymore, your job has likely completed. + +Read [this section](running_batch_jobs.md#monitoring-and-managing-your-jobs) on how to interpret the output. + +### Inspect your results + +When your job finishes it generates 2 output files: + +- One for normal output messages (*stdout* output channel). +- One for warning and error messages (*stderr* output channel). + +By default located in the directory where you issued `qsub`. + +{%- if site == 'Gent' %} + +!!! Info + + For more information about the stdout and stderr output channels, see this [section](linux-tutorial/beyond_the_basics.md#inputoutput). + +{%- endif %} + +In our example when running ls in the current directory you should see 2 new files: + +- **run.sh.o{{jobid}}**, containing *normal output messages* produced by job {{jobid}}; +- **run.sh.e{{jobid}}**, containing *errors and warnings* produced by job {{jobid}}. + +!!! Info + + run.sh.e{{jobid}} should be empty (no errors or warnings). + +!!! Warning "Use your own job ID" + + Replace {{jobid}} with the jobid you got from the `qstat` command (see above) or simply look for added files in your current directory by running `ls`. + +When examining the contents of ``run.sh.o{{jobid}}`` you will see something like this: +``` +Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz +11493376/11490434 [==============================] - 1s 0us/step +Epoch 1/5 +1875/1875 [==============================] - 2s 823us/step - loss: 0.2960 - accuracy: 0.9133 +Epoch 2/5 +1875/1875 [==============================] - 1s 771us/step - loss: 0.1427 - accuracy: 0.9571 +Epoch 3/5 +1875/1875 [==============================] - 1s 767us/step - loss: 0.1070 - accuracy: 0.9675 +Epoch 4/5 +1875/1875 [==============================] - 1s 764us/step - loss: 0.0881 - accuracy: 0.9727 +Epoch 5/5 +1875/1875 [==============================] - 1s 764us/step - loss: 0.0741 - accuracy: 0.9768 +313/313 - 0s - loss: 0.0782 - accuracy: 0.9764 +``` + +Hurray šŸŽ‰, we trained a deep learning model and achieved 97,64 percent accuracy. + +!!! Warning + + When using TensorFlow specifically, you should actually submit jobs to a GPU cluster for better performance, see [GPU clusters](gpu.md). + + For the purpose of this example, we are running a very small TensorFlow workload on a CPU-only cluster. + +### Next steps + +- [Running interactive jobs](running_interactive_jobs.md) +- [Running jobs with input/output data](running_jobs_with_input_output_data.md) +- [Multi core jobs/Parallel Computing](multi_core_jobs.md) +- [Interactive and debug cluster](interactive_debug.md#interactive-and-debug-cluster) + +For more examples see [Program examples](program_examples.md) and [Job script examples](jobscript_examples.md) diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt new file mode 100644 index 000000000000..f95191b96f01 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt @@ -0,0 +1,25 @@ + +To get access to the HPC-UGent infrastructure, visit Getting an HPC Account[1]. + +If you have not used Linux before, +now would be a good time to follow our Linux Tutorial[2]. + +#### A typical workflow looks like this: + +1. Connect to the login nodes +2. Transfer your files to the HPC-UGent infrastructure +3. Optional: compile your code and test it +4. Create a job script and submit your job +5. Wait for job to be executed +6. Study the results generated by your jobs, either on the cluster or + after downloading them locally. + +We will walk through an illustrative workload to get you started. In this example, our objective is to train a deep learning model for recognizing hand-written digits (MNIST dataset) using TensorFlow[3]; +see the example scripts[4]. + +[1]: account.md +[2]: linux-tutorial/index.md +[3]: https://www.tensorflow.org/ +[4]: https://github.com/hpcugent/vsc_user_docs/tree/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist + +reference: docs.hpc.ugent.be/getting_started_copy/#getting-access diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt new file mode 100644 index 000000000000..94f17ac50709 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt @@ -0,0 +1,19 @@ + +There are two options to connect + +- Using a terminal to connect via SSH (for power users) (see First Time connection to the HPC-UGent infrastructure[1]) +- Using the web portal[2] + +Considering your operating system is **{{OS}}**, + + +!!! Info + + When having problems see the connection issues section on the troubleshooting page[3]. + + +[1]: connecting.md#first-time-connection-to-the-hpc-infrastructure +[2]: web_portal.md +[3]: troubleshooting.md#sec:connecting-issues + +reference: docs.hpc.ugent.be/getting_started_copy/#getting-connected diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Started.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Started.txt new file mode 100644 index 000000000000..3403b57f2c21 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Started.txt @@ -0,0 +1,11 @@ + +Welcome to the "Getting Started" guide. This chapter will lead you through the initial steps of logging into the HPC-UGent infrastructure and submitting your very first job. We'll also walk you through the process step by step using a practical example. + +In addition to this chapter, you might find the recording of the *Introduction to HPC-UGent* training session[1] to be a useful resource. + +Before proceeding, read the introduction to HPC[2] to gain an understanding of the HPC-UGent infrastructure and related terminology. + +[1]: https://www.ugent.be/hpc/en/training/introhpcugent-recording +[2]: introduction.md + +reference: docs.hpc.ugent.be/getting_started_copy/#getting-started diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt new file mode 100644 index 000000000000..417416007f5c --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt @@ -0,0 +1,56 @@ + +When your job finishes it generates 2 output files: + +- One for normal output messages (*stdout* output channel). +- One for warning and error messages (*stderr* output channel). + +By default located in the directory where you issued `qsub`. + + +!!! Info + + For more information about the stdout and stderr output channels, see this section[1]. + + +In our example when running ls in the current directory you should see 2 new files: + +- **run.sh.o{{jobid}}**, containing *normal output messages* produced by job {{jobid}}; +- **run.sh.e{{jobid}}**, containing *errors and warnings* produced by job {{jobid}}. + +!!! Info + + run.sh.e{{jobid}} should be empty (no errors or warnings). + +!!! Warning "Use your own job ID" + + Replace {{jobid}} with the jobid you got from the `qstat` command (see above) or simply look for added files in your current directory by running `ls`. + +When examining the contents of ``run.sh.o{{jobid}}`` you will see something like this: +``` +Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz +11493376/11490434 [==============================] - 1s 0us/step +Epoch 1/5 +1875/1875 [==============================] - 2s 823us/step - loss: 0.2960 - accuracy: 0.9133 +Epoch 2/5 +1875/1875 [==============================] - 1s 771us/step - loss: 0.1427 - accuracy: 0.9571 +Epoch 3/5 +1875/1875 [==============================] - 1s 767us/step - loss: 0.1070 - accuracy: 0.9675 +Epoch 4/5 +1875/1875 [==============================] - 1s 764us/step - loss: 0.0881 - accuracy: 0.9727 +Epoch 5/5 +1875/1875 [==============================] - 1s 764us/step - loss: 0.0741 - accuracy: 0.9768 +313/313 - 0s - loss: 0.0782 - accuracy: 0.9764 +``` + +Hurray šŸŽ‰, we trained a deep learning model and achieved 97,64 percent accuracy. + +!!! Warning + + When using TensorFlow specifically, you should actually submit jobs to a GPU cluster for better performance, see GPU clusters[2]. + + For the purpose of this example, we are running a very small TensorFlow workload on a CPU-only cluster. + +[1]: linux-tutorial/beyond_the_basics.md#inputoutput +[2]: gpu.md + +reference: docs.hpc.ugent.be/getting_started_copy/#inspect-your-results diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt new file mode 100644 index 000000000000..804b56b8251b --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt @@ -0,0 +1,15 @@ + +- Running interactive jobs[1] +- Running jobs with input/output data[2] +- Multi core jobs/Parallel Computing[3] +- Interactive and debug cluster[4] + +For more examples see Program examples[5] and Job script examples[6] +[1]: running_interactive_jobs.md +[2]: running_jobs_with_input_output_data.md +[3]: multi_core_jobs.md +[4]: interactive_debug.md#interactive-and-debug-cluster +[5]: program_examples.md +[6]: jobscript_examples.md + +reference: docs.hpc.ugent.be/getting_started_copy/#next-steps diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt new file mode 100644 index 000000000000..edb336fa06ba --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt @@ -0,0 +1,60 @@ + +Jobs are submitted and executed using job scripts. In our case **run.sh** can be used as a (very minimal) job script. + +A job script is a shell script, a text file that specifies the resources, +the software that is used (via `module load` statements), +and the steps that should be executed to run the calculation. + +Our job script looks like this: + +
-- run.sh --
+ +```bash +#!/bin/bash + +module load TensorFlow/2.11.0-foss-2022a + +python tensorflow_mnist.py + +``` +As you can see this job script will run the Python script named **tensorflow_mnist.py**. + + +The jobs you submit are per default executed on **cluser/{{defaultcluster}}**, you can swap to another cluster by issuing the following command. + +```shell +module swap cluster/{{othercluster}} +``` + +!!! Tip + + When submitting jobs with limited amount of resources, it is recommended to use the debug/interactive cluster[1]: `donphan`. + + + To get a list of all clusters and their hardware, see . + + +This job script can now be submitted to the cluster's job system for execution, using the qsub (**q**ueue **sub**mit) command: + +```shell +$ qsub run.sh +{{jobid}} +``` + +This command returns a job identifier (*{{jobid}}*) on the HPC cluster. This is a unique identifier for the job which can be used to monitor and manage your job. + +!!! Warning "Make sure you understand what the `module` command does" + + Note that the module commands only modify environment variables. For instance, running `module swap cluster/{{othercluster}}` will update your shell environment so that `qsub` submits a job to the `{{othercluster}}` cluster, + but our active shell session is still running on the login node. + + It is important to understand that while `module` commands affect your session environment, they do ***not*** change where the commands your are running are being executed: they will still be run on the login node you are on. + + When you submit a job script however, the commands ***in*** the job script will be run on a workernode of the cluster the job was submitted to (like `{{othercluster}}`). + +For detailed information about `module` commands, read the running batch jobs[2] chapter. + +[1]: interactive_debug.md#interactive-and-debug-cluster +[2]: running_batch_jobs.md + +reference: docs.hpc.ugent.be/getting_started_copy/#submitting-a-job diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt new file mode 100644 index 000000000000..94dc30f67121 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt @@ -0,0 +1,21 @@ + +Now that you can login, it is time to transfer files from your local computer to your **home directory** on the HPC-UGent infrastructure. + +Download tensorflow_mnist.py[1] +and run.sh[2] example scripts to your computer (from here[3]). + + +When running `ls` in your session on the HPC-UGent infrastructure, you should see the two files listed in your home directory (`~`): + +```shell +$ ls ~ +run.sh tensorflow_mnist.py +``` + +When you do not see these files, make sure you uploaded the files to your **home directory**. + +[1]: https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist/tensorflow_mnist.py +[2]: https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist/run.sh +[3]: https://github.com/hpcugent/vsc_user_docs/tree/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist + +reference: docs.hpc.ugent.be/getting_started_copy/#transfer-your-files diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt new file mode 100644 index 000000000000..de177946cf93 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt @@ -0,0 +1,26 @@ + +Your job is put into a queue before being executed, so it may take a while before it actually starts. +(see when will my job start?[1] for scheduling policy). + +You can get an overview of the active jobs using the `qstat` command: +
$ qstat
+Job ID     Name             User            Time Use S Queue
+---------- ---------------- --------------- -------- - -------
+{{jobid}}     run.sh           {{userid}}        0:00:00  Q {{othercluster}}
+
+ +Eventually, after entering `qstat` again you should see that your job has started running: +
$ qstat
+Job ID     Name             User            Time Use S Queue
+---------- ---------------- --------------- -------- - -------
+{{jobid}}     run.sh           {{userid}}        0:00:01  R {{othercluster}}
+
+ +If you don't see your job in the output of the `qstat` command anymore, your job has likely completed. + +Read this section[2] on how to interpret the output. + +[1]: running_batch_jobs.md#when-will-my-job-start +[2]: running_batch_jobs.md#monitoring-and-managing-your-jobs + +reference: docs.hpc.ugent.be/getting_started_copy/#wait-for-job-to-be-executed diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt new file mode 100644 index 000000000000..e756b9a3cbea --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt @@ -0,0 +1,2 @@ + +reference: docs.hpc.ugent.be/Linux/getting_started_copy/#getting-access diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt new file mode 100644 index 000000000000..bac5dfcbfbec --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt @@ -0,0 +1,18 @@ +it is recommended to make use of the `ssh` command in a terminal to get the most flexibility. + +Assuming you have already generated SSH keys in the previous step (Getting Access[1]), and that they are in a default location, you should now be able to login by running the following command: + +
ssh {{userid}}@{{loginnode}}
+ +!!! Warning "User your own VSC account id" + + Replace {{userid}} with your VSC account id (see ) + +!!! Tip + + You can also still use the web portal (see shell access on web portal[2]) + +[1]: #getting-access +[2]: web_portal.md#shell-access + +reference: docs.hpc.ugent.be/Linux/getting_started_copy/#getting-connected diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Started.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Started.txt new file mode 100644 index 000000000000..f0b9d83bed36 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Started.txt @@ -0,0 +1,2 @@ + +reference: docs.hpc.ugent.be/Linux/getting_started_copy/#getting-started diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt new file mode 100644 index 000000000000..441b54c70424 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt @@ -0,0 +1,2 @@ + +reference: docs.hpc.ugent.be/Linux/getting_started_copy/#inspect-your-results diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt new file mode 100644 index 000000000000..d72ffccf01ad --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt @@ -0,0 +1,2 @@ + +reference: docs.hpc.ugent.be/Linux/getting_started_copy/#next-steps diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt new file mode 100644 index 000000000000..744c2c3db7a7 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt @@ -0,0 +1,2 @@ + +reference: docs.hpc.ugent.be/Linux/getting_started_copy/#submitting-a-job diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt new file mode 100644 index 000000000000..aca6e05d28ce --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt @@ -0,0 +1,21 @@ + +On your local machine you can run: +
curl -OL https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist/tensorflow_mnist.py
+curl -OL https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist/run.sh
+
+ +Using the `scp` command, the files can be copied from your local host to your *home directory* (`~`) on the remote host (HPC). +
scp tensorflow_mnist.py run.sh {{userid}}{{ loginnode }}:~ 
+
ssh  {{userid}}@{{ loginnode }} 
+ +!!! Warning "User your own VSC account id" + + Replace {{userid}} with your VSC account id (see ) + +!!! Info + + For more information about transfering files or `scp`, see tranfer files from/to hpc[1]. + +[1]: connecting.md#transfer-files-tofrom-the-hpc + +reference: docs.hpc.ugent.be/Linux/getting_started_copy/#transfer-your-files diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt new file mode 100644 index 000000000000..93e6fdff1713 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt @@ -0,0 +1,2 @@ + +reference: docs.hpc.ugent.be/Linux/getting_started_copy/#wait-for-job-to-be-executed diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt new file mode 100644 index 000000000000..8732e5869811 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt @@ -0,0 +1,2 @@ + +reference: docs.hpc.ugent.be/macOS/getting_started_copy/#getting-access diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt new file mode 100644 index 000000000000..2b1de2be8385 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt @@ -0,0 +1,13 @@ +it should be easy to make use of the `ssh` command in a terminal, but the web portal will work too. +The web portal[1] offers a convenient way to upload files and gain shell access to the HPC-UGent infrastructure from a standard web browser (no software installation or configuration required). + +See shell access[2] when using the web portal, or +connection to the HPC-UGent infrastructure[3] when using a terminal. + +Make sure you can get to a shell access to the HPC-UGent infrastructure before proceeding with the next steps. + +[1]: web_portal.md +[2]: web_portal.md#shell-access +[3]: connecting.md#first-time-connection-to-the-hpc-infrastructure + +reference: docs.hpc.ugent.be/macOS/getting_started_copy/#getting-connected diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Started.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Started.txt new file mode 100644 index 000000000000..4e60f862a0a1 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Started.txt @@ -0,0 +1,2 @@ + +reference: docs.hpc.ugent.be/macOS/getting_started_copy/#getting-started diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt new file mode 100644 index 000000000000..f7ae9f96226f --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt @@ -0,0 +1,2 @@ + +reference: docs.hpc.ugent.be/macOS/getting_started_copy/#inspect-your-results diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt new file mode 100644 index 000000000000..71f384bcf17c --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt @@ -0,0 +1,2 @@ + +reference: docs.hpc.ugent.be/macOS/getting_started_copy/#next-steps diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt new file mode 100644 index 000000000000..d72ba48195a5 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt @@ -0,0 +1,2 @@ + +reference: docs.hpc.ugent.be/macOS/getting_started_copy/#submitting-a-job diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt new file mode 100644 index 000000000000..fce05042ab2c --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt @@ -0,0 +1,21 @@ + +On your local machine you can run: +
curl -OL https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist/tensorflow_mnist.py
+curl -OL https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist/run.sh
+
+ +Using the `scp` command, the files can be copied from your local host to your *home directory* (`~`) on the remote host (HPC). +
scp tensorflow_mnist.py run.sh {{userid}}{{ loginnode }}:~ 
+
ssh  {{userid}}@{{ loginnode }} 
+ +!!! Warning "User your own VSC account id" + + Replace {{userid}} with your VSC account id (see ) + +!!! Info + + For more information about transfering files or `scp`, see tranfer files from/to hpc[1]. + +[1]: connecting.md#transfer-files-tofrom-the-hpc + +reference: docs.hpc.ugent.be/macOS/getting_started_copy/#transfer-your-files diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt new file mode 100644 index 000000000000..2ef8770504b5 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt @@ -0,0 +1,2 @@ + +reference: docs.hpc.ugent.be/macOS/getting_started_copy/#wait-for-job-to-be-executed diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt new file mode 100644 index 000000000000..874af3657046 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt @@ -0,0 +1,2 @@ + +reference: docs.hpc.ugent.be/Windows/getting_started_copy/#getting-access diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt new file mode 100644 index 000000000000..ce0b873b2b0e --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt @@ -0,0 +1,13 @@ +it is recommended to use the web portal. +The web portal[1] offers a convenient way to upload files and gain shell access to the HPC-UGent infrastructure from a standard web browser (no software installation or configuration required). + +See shell access[2] when using the web portal, or +connection to the HPC-UGent infrastructure[3] when using a terminal. + +Make sure you can get to a shell access to the HPC-UGent infrastructure before proceeding with the next steps. + +[1]: web_portal.md +[2]: web_portal.md#shell-access +[3]: connecting.md#first-time-connection-to-the-hpc-infrastructure + +reference: docs.hpc.ugent.be/Windows/getting_started_copy/#getting-connected diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Started.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Started.txt new file mode 100644 index 000000000000..44d1f17b73be --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Started.txt @@ -0,0 +1,2 @@ + +reference: docs.hpc.ugent.be/Windows/getting_started_copy/#getting-started diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt new file mode 100644 index 000000000000..730fbbc3b740 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt @@ -0,0 +1,2 @@ + +reference: docs.hpc.ugent.be/Windows/getting_started_copy/#inspect-your-results diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt new file mode 100644 index 000000000000..55df915125a7 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt @@ -0,0 +1,2 @@ + +reference: docs.hpc.ugent.be/Windows/getting_started_copy/#next-steps diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt new file mode 100644 index 000000000000..f67d48ece4a1 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt @@ -0,0 +1,2 @@ + +reference: docs.hpc.ugent.be/Windows/getting_started_copy/#submitting-a-job diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt new file mode 100644 index 000000000000..dce86fc7cf3e --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt @@ -0,0 +1,15 @@ + +The HPC-UGent web portal[1] provides a file browser that allows uploading files. +For more information see the file browser section[2]. + +Upload both files (`run.sh` and `tensorflow-mnist.py`) to your **home directory** and go back to your shell. + +!!! Info + + As an alternative, you can use WinSCP (see our section[3]) + +[1]: https://login.hpc.ugent.be +[2]: web_portal.md#file-browser +[3]: connecting.md#winscp + +reference: docs.hpc.ugent.be/Windows/getting_started_copy/#transfer-your-files diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt new file mode 100644 index 000000000000..bdd7387e3790 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt @@ -0,0 +1,2 @@ + +reference: docs.hpc.ugent.be/Windows/getting_started_copy/#wait-for-job-to-be-executed diff --git a/scripts/HPC chatbot preprocessor/jinja_parser.py b/scripts/HPC chatbot preprocessor/jinja_parser.py new file mode 100644 index 000000000000..d9fd8f1c5ce1 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/jinja_parser.py @@ -0,0 +1,3 @@ +from jinja2 import Template + +# I shall do this tomorrow, I do not find myself in the possession of enough understanding about jinja to commence with this task today diff --git a/scripts/HPC chatbot preprocessor/main.py b/scripts/HPC chatbot preprocessor/main.py index 7f74fb12fb25..0331e61cfc5c 100644 --- a/scripts/HPC chatbot preprocessor/main.py +++ b/scripts/HPC chatbot preprocessor/main.py @@ -22,9 +22,13 @@ # Test with actual document +# make a copies directory to store the copies +if not os.path.exists(".\\copies"): + os.mkdir(".\\copies") + # make a copy of one of the md files to test some things -shutil.copyfile("C:\\HPC_werk\\Documentation\\local\\vsc_user_docs\\mkdocs\\docs\\HPC\\getting_started.md", - "C:\\HPC_werk\\Chatbot\\getting_started_copy.md") +shutil.copyfile("..\\..\\mkdocs\\docs\\HPC\\getting_started.md", + ".\\copies\\getting_started_copy.md") ################### define global variables ################### # variable for the filename (which will be changed into something else in the final version) @@ -34,10 +38,10 @@ main_title = filename[:-3] # variable that keeps track of the directories that are used to write in at different levels -root_dir_generic = "C:\\HPC_werk\\Chatbot\\parsed_mds\\generic\\" -root_dir_os_specific_linux = "C:\\HPC_werk\\Chatbot\\parsed_mds\\os_specific\\linux\\" -root_dir_os_specific_windows = "C:\\HPC_werk\\Chatbot\\parsed_mds\\os_specific\\windows\\" -root_dir_os_specific_macos = "C:\\HPC_werk\\Chatbot\\parsed_mds\\os_specific\\macos\\" +root_dir_generic = ".\\copies\\parsed_mds\\generic\\" +root_dir_os_specific_linux = ".\\copies\\parsed_mds\\os_specific\\linux\\" +root_dir_os_specific_windows = ".\\copies\\parsed_mds\\os_specific\\windows\\" +root_dir_os_specific_macos = ".\\copies\\parsed_mds\\os_specific\\macos\\" curr_dirs = [filename[:-3] for i in range(4)] # variable to keep track whether we're dealing with OS-specific info or not @@ -162,6 +166,12 @@ def replace_markdown_markers(curr_line, linklist): curr_line = curr_line.replace(f"[{match[0]}]({match[1]})", match[0] + "[" + str(len(linklist) + 1) + "]") linklist.append(match[1]) + # TODO: + # code-blocks + # tips + # warnings + # etc + return curr_line, linklist @@ -331,7 +341,9 @@ def write_end_of_file(file_location, OS, linklist): remove_directory_tree(root_dir_os_specific_macos) # create directories for the source markdown file +create_directory(".\\copies\\parsed_mds") create_directory(root_dir_generic) +create_directory(".\\copies\\parsed_mds\\os_specific") create_directory(root_dir_os_specific_linux) create_directory(root_dir_os_specific_windows) create_directory(root_dir_os_specific_macos) @@ -341,7 +353,7 @@ def write_end_of_file(file_location, OS, linklist): create_directory(root_dir_os_specific_macos + curr_dirs[0]) # open the file and store line by line in the right file -with open("C:\\HPC_werk\\Chatbot\\getting_started_copy.md", 'r') as readfile: +with open(".\\copies\\" + filename, 'r') as readfile: for line in readfile: title_level, title, directory = check_for_title(line) diff --git a/scripts/HPC chatbot preprocessor/start_checker.py b/scripts/HPC chatbot preprocessor/start_checker.py index 50b61cd52133..5661c79ddc95 100644 --- a/scripts/HPC chatbot preprocessor/start_checker.py +++ b/scripts/HPC chatbot preprocessor/start_checker.py @@ -1,3 +1,5 @@ +# THIS IS NOT AN IMPORTANT FILE, DON'T WORRY ABOUT IT, I JUST USED IT TO TEST SOME THINGS + import os directory = "C:\\HPC_werk\\Documentation\\local\\vsc_user_docs\\mkdocs\\docs\\HPC" From 85a93ec31da14d3877658a741fd882184695b6cb Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 9 Aug 2024 10:39:56 +0200 Subject: [PATCH 003/145] used jinja to replace macros --- .../HPC chatbot preprocessor/if_mangler.py | 47 +++++++ .../HPC chatbot preprocessor/jinja_parser.py | 24 +++- scripts/HPC chatbot preprocessor/main.py | 124 ++++++------------ 3 files changed, 109 insertions(+), 86 deletions(-) create mode 100644 scripts/HPC chatbot preprocessor/if_mangler.py diff --git a/scripts/HPC chatbot preprocessor/if_mangler.py b/scripts/HPC chatbot preprocessor/if_mangler.py new file mode 100644 index 000000000000..f49ef691fd34 --- /dev/null +++ b/scripts/HPC chatbot preprocessor/if_mangler.py @@ -0,0 +1,47 @@ +import re +import os + + +def create_directory(new_directory): + if not os.path.exists(new_directory): + os.mkdir(new_directory) + +create_directory(".\\if_mangled_files") + +# global variable to keep track of latest if-statement scope +is_os = False + + +def mangle_os_ifs(line): + global is_os + + match = re.search(r'\{%-\s[^%]*%}', line) + if_match = re.search(r'\{%-\sif [^%]*%}', line) + if_os_match = re.search(r'\{%-\sif OS == [^%]*%}', line) + + if match: + if if_match: + if if_os_match: + is_os = True + line = line[:match.start() + 1] + "-if-" + line[match.start() + 1:match.end() - 1] + "-if-" + line[match.end() - 1:] + else: + is_os = False + else: + if is_os: + line = line[:match.start() + 1] + "-if-" + line[match.start() + 1:match.end() - 1] + "-if-" + line[match.end() - 1:] + + match = re.search(r'\{%-\s[^%]*%}', line) + + while match and is_os: + line = line[:match.start() + 1] + "-if-" + line[match.start() + 1:match.end() - 1] + "-if-" + line[match.end() - 1:] + match = re.search(r'\{%-\s[^%]*%}', line) + + return line + + +def mangle_ifs(directory, file): + with open(".\\if_mangled_files\\" + file, 'w') as write_file: + with open(directory + "\\" + file, 'r') as read_file: + for line in read_file: + new_line = mangle_os_ifs(line) + write_file.write(new_line) diff --git a/scripts/HPC chatbot preprocessor/jinja_parser.py b/scripts/HPC chatbot preprocessor/jinja_parser.py index d9fd8f1c5ce1..1b033bfdc6c5 100644 --- a/scripts/HPC chatbot preprocessor/jinja_parser.py +++ b/scripts/HPC chatbot preprocessor/jinja_parser.py @@ -1,3 +1,25 @@ +import yaml from jinja2 import Template +from if_mangler import mangle_ifs -# I shall do this tomorrow, I do not find myself in the possession of enough understanding about jinja to commence with this task today + +# function that let's jinja do its thing to format the files expect for the os-related if-statements +def jinja_parser(filename): + # Read the YAML file + with open('..\\..\\mkdocs\\extra\\gent.yml', 'r') as yml_file: + words_dict = yaml.safe_load(yml_file) + + # Mangle the OS-related if-statements + mangle_ifs('.\\copies', filename) + + # Read the if-mangled Markdown file + with open('.\\if_mangled_files\\' + filename, 'r') as md_file: + md_content = md_file.read() + + # Use Jinja2 to replace the macros + template = Template(md_content) + rendered_content = template.render(words_dict) + + # Save the rendered content to a new file + with open('.\\copies\\' + filename, 'w') as output_file: + output_file.write(rendered_content) diff --git a/scripts/HPC chatbot preprocessor/main.py b/scripts/HPC chatbot preprocessor/main.py index 0331e61cfc5c..35769de46ab0 100644 --- a/scripts/HPC chatbot preprocessor/main.py +++ b/scripts/HPC chatbot preprocessor/main.py @@ -1,6 +1,7 @@ import os import re import shutil +from jinja_parser import jinja_parser # test_number = int(input("Which test should be run?")) # @@ -26,6 +27,9 @@ if not os.path.exists(".\\copies"): os.mkdir(".\\copies") +if not os.path.exists(".\\parsed_mds"): + os.mkdir(".\\parsed_mds") + # make a copy of one of the md files to test some things shutil.copyfile("..\\..\\mkdocs\\docs\\HPC\\getting_started.md", ".\\copies\\getting_started_copy.md") @@ -38,10 +42,10 @@ main_title = filename[:-3] # variable that keeps track of the directories that are used to write in at different levels -root_dir_generic = ".\\copies\\parsed_mds\\generic\\" -root_dir_os_specific_linux = ".\\copies\\parsed_mds\\os_specific\\linux\\" -root_dir_os_specific_windows = ".\\copies\\parsed_mds\\os_specific\\windows\\" -root_dir_os_specific_macos = ".\\copies\\parsed_mds\\os_specific\\macos\\" +root_dir_generic = ".\\parsed_mds\\generic\\" +root_dir_os_specific_linux = ".\\parsed_mds\\os_specific\\linux\\" +root_dir_os_specific_windows = ".\\parsed_mds\\os_specific\\windows\\" +root_dir_os_specific_macos = ".\\parsed_mds\\os_specific\\macos\\" curr_dirs = [filename[:-3] for i in range(4)] # variable to keep track whether we're dealing with OS-specific info or not @@ -62,17 +66,10 @@ links_windows = [] links_macos = [] -# dictionaries to keep track of current OS and location +# dictionaries to keep track of current OS active_OS_if_states = {"linux": "inactive", "windows": "inactive", "macos": "inactive"} -active_site_if_states = {"Gent": "inactive", "not-Gent": "inactive"} - -# variable to keep track of the type of if-statement -if_type = "OS" - -# variable to keep track of the macro-replacements at the top of markdown files -replacements = {} -# variable that is used to detect whether the first title has been encountered yet +# variable that shows whether the first title has been reached yet after_first_title = False @@ -84,13 +81,6 @@ def remove_directory_tree(old_directory): shutil.rmtree(old_directory) -# function that checks the first lines of a file until a title is found and saves the macro-replacements to the list -def save_replacements(curr_line): - global replacements - match = re.search(r'\{% set (.*?)="(.*?)" %}', curr_line) - replacements[match.group(1)] = match.group(2) - - # function that checks whether the current line has a title of level 3 at maximum (returns the level of the title or 0 if the line is not a title) def check_for_title_logic(curr_line): global curr_dirs @@ -151,13 +141,6 @@ def update_lower_curr_dir(curr_directory, level): # function that replaces certain markdown structures with the equivalent used on the website def replace_markdown_markers(curr_line, linklist): - # replace {{hpcinfra}} - curr_line = re.sub(r'\{\{\s*hpcinfra\s*}}', "HPC-UGent infrastructure", curr_line) - - # replace other replacement macros - for macro in replacements.keys(): - curr_line = re.sub(r'\{\{\s*' + re.escape(macro) + r'\s*}}', replacements[macro], curr_line) - # replace links with a reference matches = re.findall(r'\[(.*?)]\((.*?)\)', curr_line) if matches: @@ -177,13 +160,12 @@ def replace_markdown_markers(curr_line, linklist): # function that checks for if-statements def check_if_statements(curr_line): - global if_type # check whether the first part of the line contains information wrt if-statements - match = re.search(r'^\{%-\s([^%]*)%}(.*)', curr_line) + match = re.search(r'^\{-if-%-\s([^%]*)%-if-}(.*)', curr_line) # check whether the line contains information wrt if-statements that is not in its first part - match_large = re.search(r'^(.*)(\{%-\s[^%]*%})(.*)', curr_line) + match_large = re.search(r'^(.*)(\{-if-%-\s[^%]*%-if-})(.*)', curr_line) if match: print("################################################################################") @@ -202,34 +184,16 @@ def check_if_statements(curr_line): if other_OS != OS and active_OS_if_states[other_OS] == "active": active_OS_if_states[other_OS] = "inactive" - if_type = "OS" - - # new if-statement wrt site - elif re.match(r'if site == ', content): - if re.search(r'(?i)gent', content): - active_site_if_states["Gent"] = "active" - active_site_if_states["not-Gent"] = "inactive" - else: - active_site_if_states["not-Gent"] = "active" - if active_site_if_states["Gent"] == "active": - active_site_if_states["Gent"] = "inactive" - if_type = "site" - # endif statement wrt OS - elif re.match(r'endif ', content) and if_type == "OS": + elif re.match(r'endif ', content): if str(1) in active_OS_if_states.values(): active_OS_if_states[list(active_OS_if_states.keys())[list(active_OS_if_states.values()).index(str(1))]] = "active" else: for key in active_OS_if_states.keys(): active_OS_if_states[key] = "inactive" - # endif statement wrt site - elif re.match(r'endif ', content) and if_type == "site": - for key in active_site_if_states.keys(): - active_site_if_states[key] = "inactive" - # else statement wrt OS - elif re.match(r'else ', content) and if_type == "OS": + elif re.match(r'else ', content): i = 0 for i in range(3): @@ -246,18 +210,7 @@ def check_if_statements(curr_line): position = list(active_OS_if_states.values()).index("inactive") active_OS_if_states[key_list[position]] = "active" - # else statement wrt site - elif re.match(r'else ', content) and if_type == "site": - - # change state of "Gent" and set not-Gent on active - if active_site_if_states["Gent"] == "inactive": - active_site_if_states["Gent"] = "active" - elif active_site_if_states["Gent"] == "active": - active_site_if_states["Gent"] = str(0) - active_site_if_states["not-Gent"] = "active" - print(active_OS_if_states) - print(active_site_if_states) if len(match.group(2)) != 0: extra_message = match.group(2).lstrip() @@ -273,7 +226,6 @@ def check_if_statements(curr_line): elif match_large: print("################################################################################") print(active_OS_if_states) - print(active_site_if_states) print(match_large.group(1)) print(match_large.group(2)) print("write_text_and_check_extra_message") @@ -301,15 +253,14 @@ def write_text_to_file(file_name, curr_line): # function that decides what file to write text to def choose_and_write_to_file(curr_line): # check that the line is part of the website for gent - if active_site_if_states["Gent"] == "active" or active_site_if_states["Gent"] == "inactive" and active_site_if_states["not-Gent"] == "inactive": - if active_OS_if_states["linux"] == "inactive" and active_OS_if_states["windows"] == "inactive" and active_OS_if_states["macos"] == "inactive": - write_text_to_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", curr_line) - if active_OS_if_states["linux"] == "active": - write_text_to_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", curr_line) - if active_OS_if_states["windows"] == "active": - write_text_to_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", curr_line) - if active_OS_if_states["macos"] == "active": - write_text_to_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", curr_line) + if active_OS_if_states["linux"] == "inactive" and active_OS_if_states["windows"] == "inactive" and active_OS_if_states["macos"] == "inactive": + write_text_to_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", curr_line) + if active_OS_if_states["linux"] == "active": + write_text_to_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", curr_line) + if active_OS_if_states["windows"] == "active": + write_text_to_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", curr_line) + if active_OS_if_states["macos"] == "active": + write_text_to_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", curr_line) # function that adds a reference link at the end of every txt file @@ -325,6 +276,7 @@ def write_end_of_file(file_location, OS, linklist): # add the links from within the document with open(file_location, 'a') as write_file: + write_file.write("\n\n") for i, link in enumerate(linklist): write_file.write("[" + str(i + 1) + "]: " + str(link) + "\n") @@ -341,9 +293,8 @@ def write_end_of_file(file_location, OS, linklist): remove_directory_tree(root_dir_os_specific_macos) # create directories for the source markdown file -create_directory(".\\copies\\parsed_mds") create_directory(root_dir_generic) -create_directory(".\\copies\\parsed_mds\\os_specific") +create_directory(".\\parsed_mds\\os_specific") create_directory(root_dir_os_specific_linux) create_directory(root_dir_os_specific_windows) create_directory(root_dir_os_specific_macos) @@ -352,6 +303,9 @@ def write_end_of_file(file_location, OS, linklist): create_directory(root_dir_os_specific_windows + curr_dirs[0]) create_directory(root_dir_os_specific_macos + curr_dirs[0]) +# process the jinja macros +jinja_parser(filename) + # open the file and store line by line in the right file with open(".\\copies\\" + filename, 'r') as readfile: @@ -366,22 +320,22 @@ def write_end_of_file(file_location, OS, linklist): after_first_title = True # line is not a title - else: - if after_first_title: - # check for if-statements and write the appropriate lines in the right files - next_action = check_if_statements(line) - while next_action[0] == "write_text_and_check_extra_message" or next_action[0] == "check_extra_message": - if next_action[0] == "write_text_and_check_extra_message": - choose_and_write_to_file(next_action[2]) - next_action = check_if_statements(next_action[1]) - - if next_action[0] == "write_text": + elif after_first_title: + # check for if-statements and write the appropriate lines in the right files + next_action = check_if_statements(line) + while next_action[0] == "write_text_and_check_extra_message" or next_action[0] == "check_extra_message": + if next_action[0] == "write_text_and_check_extra_message": choose_and_write_to_file(next_action[2]) - else: - save_replacements(line) + next_action = check_if_statements(next_action[1]) + + if next_action[0] == "write_text": + choose_and_write_to_file(next_action[2]) # write end of file for the last file write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic) write_end_of_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", "Linux", links_linux) write_end_of_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", "Windows", links_windows) write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS", links_macos) + + +# TODO: directory cleanup From dfff5fabae20307d13cef4f80d22943f7eac87f1 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 9 Aug 2024 14:34:01 +0200 Subject: [PATCH 004/145] adapt if-mangler to accommodate for nested if-clauses --- .../HPC chatbot preprocessor/if_mangler.py | 64 +++++++++++-------- 1 file changed, 38 insertions(+), 26 deletions(-) diff --git a/scripts/HPC chatbot preprocessor/if_mangler.py b/scripts/HPC chatbot preprocessor/if_mangler.py index f49ef691fd34..9980a2e83e87 100644 --- a/scripts/HPC chatbot preprocessor/if_mangler.py +++ b/scripts/HPC chatbot preprocessor/if_mangler.py @@ -1,41 +1,53 @@ import re -import os - - -def create_directory(new_directory): - if not os.path.exists(new_directory): - os.mkdir(new_directory) - -create_directory(".\\if_mangled_files") # global variable to keep track of latest if-statement scope -is_os = False +is_os = 0 # Can be 0, 1 or 2 {0: not in an os-if; 1: in a non-os-if nested in an os-if; 2: in an os-if} def mangle_os_ifs(line): global is_os - match = re.search(r'\{%-\s[^%]*%}', line) - if_match = re.search(r'\{%-\sif [^%]*%}', line) - if_os_match = re.search(r'\{%-\sif OS == [^%]*%}', line) + match = re.search(r'\{%(.*?)%}(.*)', line) - if match: - if if_match: - if if_os_match: - is_os = True - line = line[:match.start() + 1] + "-if-" + line[match.start() + 1:match.end() - 1] + "-if-" + line[match.end() - 1:] - else: - is_os = False - else: - if is_os: - line = line[:match.start() + 1] + "-if-" + line[match.start() + 1:match.end() - 1] + "-if-" + line[match.end() - 1:] + start_index = 0 + added_length = 0 - match = re.search(r'\{%-\s[^%]*%}', line) + while match: - while match and is_os: - line = line[:match.start() + 1] + "-if-" + line[match.start() + 1:match.end() - 1] + "-if-" + line[match.end() - 1:] - match = re.search(r'\{%-\s[^%]*%}', line) + constr_match = re.search(r'\{%.*?%}', match.string) + if_match = re.search(r'if ', match.group(1)) + if_os_match = re.search(r'if OS == ', match.group(1)) + endif_match = re.search(r'endif', match.group(1)) + if endif_match: + if is_os == 2: + line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[ + constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[ + constr_match.end() + start_index + added_length - 1:] + added_length += 8 + is_os = 0 + elif is_os == 1: + is_os = 2 + elif if_match: + if if_os_match: + line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[ + constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[ + constr_match.end() + start_index + added_length - 1:] + added_length += 8 + is_os = 2 + else: + if is_os == 2: + is_os = 1 + else: + is_os = 0 + else: + if is_os == 2: + line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[ + constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[ + constr_match.end() + start_index + added_length - 1:] + added_length += 8 + start_index += constr_match.end() + match = re.search(r'\{%(.*?)%}(.*)', match.group(2)) return line From 649ddec3fcad3655445aa930f75bf4dd82a9504f Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 9 Aug 2024 14:34:43 +0200 Subject: [PATCH 005/145] adapt the parser to take all files as input, not all files get parsed successfully yet --- scripts/HPC chatbot preprocessor/main.py | 282 ++++++++++++----------- 1 file changed, 150 insertions(+), 132 deletions(-) diff --git a/scripts/HPC chatbot preprocessor/main.py b/scripts/HPC chatbot preprocessor/main.py index 35769de46ab0..86bd2ed9c3f5 100644 --- a/scripts/HPC chatbot preprocessor/main.py +++ b/scripts/HPC chatbot preprocessor/main.py @@ -3,78 +3,48 @@ import shutil from jinja_parser import jinja_parser -# test_number = int(input("Which test should be run?")) -# -# # Test for strip_markdown (somewhat successful, see findings file) -# -# if test_number == 1: -# import strip_markdown -# -# strip_markdown.strip_markdown_file("C:\\HPC werk\\Chatbot\\md_to_plaintext_test.md") -# -# # Test if copy of document doesn't change original document (successful) -# if test_number == 2: -# import shutil -# -# shutil.copyfile("C:\\HPC_werk\\Chatbot\\md_to_plaintext_test.txt", -# "C:\\HPC_werk\\Chatbot\\md_to_plaintext_test_copy.txt") -# with open("C:\\HPC_werk\\Chatbot\\md_to_plaintext_test_copy.txt", 'w') as file: -# file.write('hello') - -# Test with actual document - -# make a copies directory to store the copies +# variables for analytics +succeeded = 0 +failed = 0 + +# make the necessary directories if not os.path.exists(".\\copies"): os.mkdir(".\\copies") if not os.path.exists(".\\parsed_mds"): os.mkdir(".\\parsed_mds") -# make a copy of one of the md files to test some things -shutil.copyfile("..\\..\\mkdocs\\docs\\HPC\\getting_started.md", - ".\\copies\\getting_started_copy.md") +if not os.path.exists(".\\if_mangled_files"): + os.mkdir(".\\if_mangled_files") + +# copy the examples to the right location wrt the script in order to allow jinja to work +if not os.path.exists(".\\examples"): + shutil.copytree("..\\..\\mkdocs\\docs\\HPC\\examples", ".\\examples") ################### define global variables ################### -# variable for the filename (which will be changed into something else in the final version) -filename = "getting_started_copy.md" -# variable for the main title (needed for reference links) -main_title = filename[:-3] +# variable that keeps track of the source directories +source_directories = ["..\\..\\mkdocs\\docs\\HPC\\", "..\\..\\mkdocs\\docs\\HPC\\linux-tutorial"] # variable that keeps track of the directories that are used to write in at different levels root_dir_generic = ".\\parsed_mds\\generic\\" root_dir_os_specific_linux = ".\\parsed_mds\\os_specific\\linux\\" root_dir_os_specific_windows = ".\\parsed_mds\\os_specific\\windows\\" root_dir_os_specific_macos = ".\\parsed_mds\\os_specific\\macos\\" -curr_dirs = [filename[:-3] for i in range(4)] - -# variable to keep track whether we're dealing with OS-specific info or not -OS_specific = False - -# pattern for the regex if-statement to filter out markdown titles -if_pattern = r'^#+ ' - -# variable that keeps track of the latest non-zero level title and corresponding directory -last_title_level = 1 -last_title = None -last_directory = None -last_was_title = False -# list to keep track of links in the text -links_generic = [] -links_linux = [] -links_windows = [] -links_macos = [] +# list of all the filenames +filenames = {} +for source_directory in source_directories: + all_items = os.listdir(source_directory) + files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]] + for file in files: + filenames[file] = os.path.join(source_directory, file) -# dictionaries to keep track of current OS -active_OS_if_states = {"linux": "inactive", "windows": "inactive", "macos": "inactive"} -# variable that shows whether the first title has been reached yet -after_first_title = False +# filenames = {'account.md': '..\\..\\mkdocs\\docs\\HPC\\account.md'} ################### define functions ################### - # function that removes the previous file structure before starting the process of making a new one def remove_directory_tree(old_directory): if os.path.exists(old_directory): @@ -84,7 +54,7 @@ def remove_directory_tree(old_directory): # function that checks whether the current line has a title of level 3 at maximum (returns the level of the title or 0 if the line is not a title) def check_for_title_logic(curr_line): global curr_dirs - match = re.match(if_pattern, curr_line) + match = re.match(r'^#+ ', curr_line) if match and len(match.group(0)) <= 4: return len(match.group(0)) - 1 else: @@ -109,12 +79,16 @@ def check_for_title(curr_line): else: if last_title is not None: write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic) - write_end_of_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", "Linux", links_linux) - write_end_of_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", "Windows", links_windows) - write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS", links_macos) + write_end_of_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", "Linux", + links_linux) + write_end_of_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", "Windows", + links_windows) + write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS", + links_macos) reset_link_lists() - curr_dirs[logic_output] = curr_dirs[logic_output - 1] + "\\" + curr_line[logic_output + 1:-1].replace(' ', '-') + curr_dirs[logic_output] = curr_dirs[logic_output - 1] + "\\" + make_valid_title( + curr_line[logic_output + 1:-1].replace(' ', '-')) create_directory(root_dir_generic + curr_dirs[logic_output]) create_directory(root_dir_os_specific_linux + curr_dirs[logic_output]) @@ -122,7 +96,7 @@ def check_for_title(curr_line): create_directory(root_dir_os_specific_macos + curr_dirs[logic_output]) update_lower_curr_dir(curr_dirs[logic_output], logic_output) - return logic_output, curr_line[logic_output + 1:-1].replace(' ', '-'), curr_dirs[logic_output] + return logic_output, make_valid_title(curr_line[logic_output + 1:-1].replace(' ', '-')), curr_dirs[logic_output] # function that creates directories if needed @@ -140,19 +114,16 @@ def update_lower_curr_dir(curr_directory, level): # function that replaces certain markdown structures with the equivalent used on the website def replace_markdown_markers(curr_line, linklist): - # replace links with a reference matches = re.findall(r'\[(.*?)]\((.*?)\)', curr_line) if matches: for match in matches: - print(f"[{match[0]}]({match[1]})") curr_line = curr_line.replace(f"[{match[0]}]({match[1]})", match[0] + "[" + str(len(linklist) + 1) + "]") linklist.append(match[1]) - # TODO: - # code-blocks - # tips - # warnings + # TODO: code-blocks + # TODO: tips + # TODO: warnings # etc return curr_line, linklist @@ -160,7 +131,7 @@ def replace_markdown_markers(curr_line, linklist): # function that checks for if-statements def check_if_statements(curr_line): - + # TODO: adapt regex for annoying inconsistencies # check whether the first part of the line contains information wrt if-statements match = re.search(r'^\{-if-%-\s([^%]*)%-if-}(.*)', curr_line) @@ -168,9 +139,7 @@ def check_if_statements(curr_line): match_large = re.search(r'^(.*)(\{-if-%-\s[^%]*%-if-})(.*)', curr_line) if match: - print("################################################################################") content = match.group(1) - print(content) # new if-statement wrt OS if re.match(r'if OS == ', content): @@ -187,7 +156,8 @@ def check_if_statements(curr_line): # endif statement wrt OS elif re.match(r'endif ', content): if str(1) in active_OS_if_states.values(): - active_OS_if_states[list(active_OS_if_states.keys())[list(active_OS_if_states.values()).index(str(1))]] = "active" + active_OS_if_states[ + list(active_OS_if_states.keys())[list(active_OS_if_states.values()).index(str(1))]] = "active" else: for key in active_OS_if_states.keys(): active_OS_if_states[key] = "inactive" @@ -210,25 +180,14 @@ def check_if_statements(curr_line): position = list(active_OS_if_states.values()).index("inactive") active_OS_if_states[key_list[position]] = "active" - print(active_OS_if_states) - if len(match.group(2)) != 0: extra_message = match.group(2).lstrip() - print(extra_message) - # check_if_statements(extra_message) - print("check_extra_message") return "check_extra_message", extra_message, None else: - print("done") return "done", None, None elif match_large: - print("################################################################################") - print(active_OS_if_states) - print(match_large.group(1)) - print(match_large.group(2)) - print("write_text_and_check_extra_message") return "write_text_and_check_extra_message", match_large.group(2), match_large.group(1) else: @@ -253,7 +212,8 @@ def write_text_to_file(file_name, curr_line): # function that decides what file to write text to def choose_and_write_to_file(curr_line): # check that the line is part of the website for gent - if active_OS_if_states["linux"] == "inactive" and active_OS_if_states["windows"] == "inactive" and active_OS_if_states["macos"] == "inactive": + if active_OS_if_states["linux"] == "inactive" and active_OS_if_states["windows"] == "inactive" and \ + active_OS_if_states["macos"] == "inactive": write_text_to_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", curr_line) if active_OS_if_states["linux"] == "active": write_text_to_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", curr_line) @@ -284,58 +244,116 @@ def write_end_of_file(file_location, OS, linklist): add_reference_link(file_location, "docs.hpc.ugent.be/" + OS + main_title + "/#" + last_title.lower()) -################### actually parse the md file ################### - -# remove the old directories if needed -remove_directory_tree(root_dir_generic) -remove_directory_tree(root_dir_os_specific_linux) -remove_directory_tree(root_dir_os_specific_windows) -remove_directory_tree(root_dir_os_specific_macos) - -# create directories for the source markdown file -create_directory(root_dir_generic) -create_directory(".\\parsed_mds\\os_specific") -create_directory(root_dir_os_specific_linux) -create_directory(root_dir_os_specific_windows) -create_directory(root_dir_os_specific_macos) -create_directory(root_dir_generic + curr_dirs[0]) -create_directory(root_dir_os_specific_linux + curr_dirs[0]) -create_directory(root_dir_os_specific_windows + curr_dirs[0]) -create_directory(root_dir_os_specific_macos + curr_dirs[0]) - -# process the jinja macros -jinja_parser(filename) - -# open the file and store line by line in the right file -with open(".\\copies\\" + filename, 'r') as readfile: - - for line in readfile: - title_level, title, directory = check_for_title(line) - - # line is a title with a maximum depth of 3 - if title_level > 0: - last_title_level = title_level - last_title = title - last_directory = directory - after_first_title = True - - # line is not a title - elif after_first_title: - # check for if-statements and write the appropriate lines in the right files - next_action = check_if_statements(line) - while next_action[0] == "write_text_and_check_extra_message" or next_action[0] == "check_extra_message": - if next_action[0] == "write_text_and_check_extra_message": - choose_and_write_to_file(next_action[2]) - next_action = check_if_statements(next_action[1]) - - if next_action[0] == "write_text": - choose_and_write_to_file(next_action[2]) - -# write end of file for the last file -write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic) -write_end_of_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", "Linux", links_linux) -write_end_of_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", "Windows", links_windows) -write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS", links_macos) - +# function that makes sure all titles can be used as valid filenames +def make_valid_title(s): + # Define a regex pattern for invalid characters on both Windows and Linux + invalid_chars = r'[<>:"/\\|?*\0()]' + + # Remove invalid characters + valid_filename = re.sub(invalid_chars, '', s) + + # Strip leading/trailing whitespace + valid_filename = valid_filename.strip() + + return valid_filename + + +for filename in filenames.keys(): + try: + # make a copy of one of the md files to test some things + shutil.copyfile(filenames[filename], + ".\\copies\\" + filename) + + ################### define/reset loop specific variables ################### + + # variable for the main title (needed for reference links) + main_title = filename[:-3] + + # variable that keeps track of the directories that are used to write in at different levels + curr_dirs = [filename[:-3] for i in range(4)] + + # variable to keep track whether we're dealing with OS-specific info or not + OS_specific = False + + # variable that keeps track of the latest non-zero level title and corresponding directory + last_title_level = 1 + last_title = None + last_directory = None + last_was_title = False + + # list to keep track of links in the text + links_generic = [] + links_linux = [] + links_windows = [] + links_macos = [] + + # dictionaries to keep track of current OS + active_OS_if_states = {"linux": "inactive", "windows": "inactive", "macos": "inactive"} + + # variable that shows whether the first title has been reached yet + after_first_title = False + + ################### actually parse the md file ################### + + # remove the old directories if needed + remove_directory_tree(root_dir_generic) + remove_directory_tree(root_dir_os_specific_linux) + remove_directory_tree(root_dir_os_specific_windows) + remove_directory_tree(root_dir_os_specific_macos) + + # create directories for the source markdown file + create_directory(root_dir_generic) + create_directory(".\\parsed_mds\\os_specific") + create_directory(root_dir_os_specific_linux) + create_directory(root_dir_os_specific_windows) + create_directory(root_dir_os_specific_macos) + create_directory(root_dir_generic + curr_dirs[0]) + create_directory(root_dir_os_specific_linux + curr_dirs[0]) + create_directory(root_dir_os_specific_windows + curr_dirs[0]) + create_directory(root_dir_os_specific_macos + curr_dirs[0]) + + # process the jinja macros + jinja_parser(filename) + + # open the file and store line by line in the right file + with open(".\\copies\\" + filename, 'r') as readfile: + + for line in readfile: + title_level, title, directory = check_for_title(line) + + # line is a title with a maximum depth of 3 + if title_level > 0: + last_title_level = title_level + last_title = title + last_directory = directory + after_first_title = True + + # line is not a title + elif after_first_title: + # check for if-statements and write the appropriate lines in the right files + next_action = check_if_statements(line) + while next_action[0] == "write_text_and_check_extra_message" or next_action[0] == "check_extra_message": + if next_action[0] == "write_text_and_check_extra_message": + choose_and_write_to_file(next_action[2]) + next_action = check_if_statements(next_action[1]) + + if next_action[0] == "write_text": + choose_and_write_to_file(next_action[2]) + + # write end of file for the last file + write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic) + write_end_of_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", "Linux", + links_linux) + write_end_of_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", "Windows", + links_windows) + write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS", + links_macos) + print("Parsing succeeded for file: " + filename) + succeeded += 1 + except: + print("Parsing failed for file: " + filename) + failed += 1 + +print("Success ratio: " + str(succeeded/(succeeded + failed) * 100) + "%") # TODO: directory cleanup From 2116d6e2412e56c48af0e2f032535f687836859c Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 9 Aug 2024 16:42:44 +0200 Subject: [PATCH 006/145] adapt the parser to take all files as input, not all files get parsed successfully yet --- scripts/HPC chatbot preprocessor/main.py | 38 +++++++++++++++--------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/scripts/HPC chatbot preprocessor/main.py b/scripts/HPC chatbot preprocessor/main.py index 86bd2ed9c3f5..b2900334ef87 100644 --- a/scripts/HPC chatbot preprocessor/main.py +++ b/scripts/HPC chatbot preprocessor/main.py @@ -17,10 +17,6 @@ if not os.path.exists(".\\if_mangled_files"): os.mkdir(".\\if_mangled_files") -# copy the examples to the right location wrt the script in order to allow jinja to work -if not os.path.exists(".\\examples"): - shutil.copytree("..\\..\\mkdocs\\docs\\HPC\\examples", ".\\examples") - ################### define global variables ################### # variable that keeps track of the source directories @@ -40,8 +36,10 @@ for file in files: filenames[file] = os.path.join(source_directory, file) +# TODO: find solution for duplicate filenames between linux tutorial and normal files -# filenames = {'account.md': '..\\..\\mkdocs\\docs\\HPC\\account.md'} +# TODO: problem-files (other layout than normal markdown-files) +problem_files = ["linux_tutorial\\getting_started.md", "linux_tutorial\\navigating.md"] ################### define functions ################### @@ -72,9 +70,9 @@ def reset_link_lists(): # function that uses the check_for_title_logic function to create the appropriate directories and update the necessary variables def check_for_title(curr_line): - global curr_dirs, last_title + global curr_dirs, last_title, in_code_block logic_output = check_for_title_logic(curr_line) - if logic_output == 0: + if logic_output == 0 or in_code_block: return 0, None, None else: if last_title is not None: @@ -99,6 +97,13 @@ def check_for_title(curr_line): return logic_output, make_valid_title(curr_line[logic_output + 1:-1].replace(' ', '-')), curr_dirs[logic_output] +# function used to detect codeblocks and make sure the comments don't get detected as titles +def detect_in_code_block(curr_line): + global in_code_block + if '```' in curr_line or (('
' in curr_line) ^ ('
' in curr_line)): + in_code_block = not in_code_block + + # function that creates directories if needed def create_directory(new_directory): if not os.path.exists(new_directory): @@ -227,6 +232,7 @@ def choose_and_write_to_file(curr_line): def add_reference_link(file_location, reference_link): with open(file_location, 'a') as write_file: write_file.write("\nreference: " + reference_link + "\n") + # TODO: fix trailing spaces in filename # function that adds the links that should be at the end of a file @@ -260,6 +266,7 @@ def make_valid_title(s): for filename in filenames.keys(): try: + # if True: # make a copy of one of the md files to test some things shutil.copyfile(filenames[filename], ".\\copies\\" + filename) @@ -293,13 +300,10 @@ def make_valid_title(s): # variable that shows whether the first title has been reached yet after_first_title = False - ################### actually parse the md file ################### + # variable that is used to be sure that we are detecting titles and not comments from codeblocks + in_code_block = False - # remove the old directories if needed - remove_directory_tree(root_dir_generic) - remove_directory_tree(root_dir_os_specific_linux) - remove_directory_tree(root_dir_os_specific_windows) - remove_directory_tree(root_dir_os_specific_macos) + ################### actually parse the md file ################### # create directories for the source markdown file create_directory(root_dir_generic) @@ -321,6 +325,8 @@ def make_valid_title(s): for line in readfile: title_level, title, directory = check_for_title(line) + detect_in_code_block(line) + # line is a title with a maximum depth of 3 if title_level > 0: last_title_level = title_level @@ -341,6 +347,9 @@ def make_valid_title(s): choose_and_write_to_file(next_action[2]) # write end of file for the last file + # print(root_dir_generic) + # print(last_directory) + # print(filename) write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic) write_end_of_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", "Linux", links_linux) @@ -348,12 +357,13 @@ def make_valid_title(s): links_windows) write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS", links_macos) - print("Parsing succeeded for file: " + filename) succeeded += 1 except: print("Parsing failed for file: " + filename) failed += 1 print("Success ratio: " + str(succeeded/(succeeded + failed) * 100) + "%") +print("Although this ratio should be taken with a grain of salt as a number of other fixes need to be implemented as well, they just don't cause any errors.") # TODO: directory cleanup +# TODO: reconsider maximum depth to be detected as title From 159aa62af18dd76b5567c00a98a08c16081d9773 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 9 Aug 2024 16:43:09 +0200 Subject: [PATCH 007/145] small update, not important --- .../HPC chatbot preprocessor/start_checker.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/scripts/HPC chatbot preprocessor/start_checker.py b/scripts/HPC chatbot preprocessor/start_checker.py index 5661c79ddc95..b328e7ab80c3 100644 --- a/scripts/HPC chatbot preprocessor/start_checker.py +++ b/scripts/HPC chatbot preprocessor/start_checker.py @@ -17,3 +17,19 @@ lines_until_title += 1 print(filename + " : " + str(lines_until_title)) break + +directory = "C:\\HPC_werk\\Documentation\\local\\vsc_user_docs\\mkdocs\\docs\\HPC\\linux-tutorial" + +for dirpath, dirnames, filenames in os.walk(directory): + for filename in filenames: + # if filename.endswith("xdmod.md"): + # break + if filename.endswith(".md"): + lines_until_title = 0 + with open(directory + "\\" + filename, "r") as file: + for line in file: + if line[0] == "#": + break + lines_until_title += 1 + print(filename + " : " + str(lines_until_title)) + break From 75765e555edb9bc67ebcaf0136ac5efc8d0461ad Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 9 Aug 2024 16:43:47 +0200 Subject: [PATCH 008/145] change to the templates --- scripts/HPC chatbot preprocessor/jinja_parser.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/HPC chatbot preprocessor/jinja_parser.py b/scripts/HPC chatbot preprocessor/jinja_parser.py index 1b033bfdc6c5..612c20dd06b2 100644 --- a/scripts/HPC chatbot preprocessor/jinja_parser.py +++ b/scripts/HPC chatbot preprocessor/jinja_parser.py @@ -1,5 +1,5 @@ import yaml -from jinja2 import Template +from jinja2 import Template, FileSystemLoader, Environment, ChoiceLoader from if_mangler import mangle_ifs @@ -17,7 +17,9 @@ def jinja_parser(filename): md_content = md_file.read() # Use Jinja2 to replace the macros - template = Template(md_content) + templateloader = ChoiceLoader([FileSystemLoader(searchpath='.\\if_mangled_files'), FileSystemLoader(searchpath="..\\..\\mkdocs\\docs\\HPC")]) + templateEnv = Environment(loader=templateloader) + template = templateEnv.get_template(filename) rendered_content = template.render(words_dict) # Save the rendered content to a new file From 57d9cfe5f25c66f8a6c2721fca9eaac1e6eea25d Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 9 Aug 2024 16:44:18 +0200 Subject: [PATCH 009/145] change to accommodate for more nested if-clauses --- .../HPC chatbot preprocessor/if_mangler.py | 25 ++++++++++++++----- 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/scripts/HPC chatbot preprocessor/if_mangler.py b/scripts/HPC chatbot preprocessor/if_mangler.py index 9980a2e83e87..8dd0d0994523 100644 --- a/scripts/HPC chatbot preprocessor/if_mangler.py +++ b/scripts/HPC chatbot preprocessor/if_mangler.py @@ -1,7 +1,7 @@ import re # global variable to keep track of latest if-statement scope -is_os = 0 # Can be 0, 1 or 2 {0: not in an os-if; 1: in a non-os-if nested in an os-if; 2: in an os-if} +is_os = 0 # Can be 0, 1, 2 or 3 {0: not in an os-if; 1: in a non-os-if nested in an os-if; 2: in an os-if; 3: in an os-if nested in an os-if} def mangle_os_ifs(line): @@ -26,22 +26,35 @@ def mangle_os_ifs(line): constr_match.end() + start_index + added_length - 1:] added_length += 8 is_os = 0 - elif is_os == 1: - is_os = 2 - elif if_match: - if if_os_match: + if is_os == 3: line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[ constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[ constr_match.end() + start_index + added_length - 1:] added_length += 8 is_os = 2 + elif is_os == 1: + is_os = 2 + elif if_match: + if if_os_match: + if is_os == 2: + line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[ + constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[ + constr_match.end() + start_index + added_length - 1:] + added_length += 8 + is_os = 3 + else: + line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[ + constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[ + constr_match.end() + start_index + added_length - 1:] + added_length += 8 + is_os = 2 else: if is_os == 2: is_os = 1 else: is_os = 0 else: - if is_os == 2: + if is_os == 2 or is_os == 3: line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[ constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[ constr_match.end() + start_index + added_length - 1:] From 75d345b1bd41325c5a4242251c093c6b396d3e21 Mon Sep 17 00:00:00 2001 From: EwDa291 <100782488+EwDa291@users.noreply.github.com> Date: Fri, 9 Aug 2024 16:45:53 +0200 Subject: [PATCH 010/145] Delete scripts/HPC chatbot preprocessor/start_checker.py This file is just used to test some things locally and not part of the parser --- .../HPC chatbot preprocessor/start_checker.py | 35 ------------------- 1 file changed, 35 deletions(-) delete mode 100644 scripts/HPC chatbot preprocessor/start_checker.py diff --git a/scripts/HPC chatbot preprocessor/start_checker.py b/scripts/HPC chatbot preprocessor/start_checker.py deleted file mode 100644 index b328e7ab80c3..000000000000 --- a/scripts/HPC chatbot preprocessor/start_checker.py +++ /dev/null @@ -1,35 +0,0 @@ -# THIS IS NOT AN IMPORTANT FILE, DON'T WORRY ABOUT IT, I JUST USED IT TO TEST SOME THINGS - -import os - -directory = "C:\\HPC_werk\\Documentation\\local\\vsc_user_docs\\mkdocs\\docs\\HPC" - -for dirpath, dirnames, filenames in os.walk(directory): - for filename in filenames: - # if filename.endswith("xdmod.md"): - # break - if filename.endswith(".md"): - lines_until_title = 0 - with open(directory + "\\" + filename, "r") as file: - for line in file: - if line[0] == "#": - break - lines_until_title += 1 - print(filename + " : " + str(lines_until_title)) - break - -directory = "C:\\HPC_werk\\Documentation\\local\\vsc_user_docs\\mkdocs\\docs\\HPC\\linux-tutorial" - -for dirpath, dirnames, filenames in os.walk(directory): - for filename in filenames: - # if filename.endswith("xdmod.md"): - # break - if filename.endswith(".md"): - lines_until_title = 0 - with open(directory + "\\" + filename, "r") as file: - for line in file: - if line[0] == "#": - break - lines_until_title += 1 - print(filename + " : " + str(lines_until_title)) - break From ff7a9fc381399402c36670ef6ddb5bfb245b1dd4 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 12 Aug 2024 11:24:46 +0200 Subject: [PATCH 011/145] make sure files with duplicate names between normal files and linux-tutorial are both read and saved properly --- .../HPC chatbot preprocessor/if_mangler.py | 2 +- .../HPC chatbot preprocessor/jinja_parser.py | 24 +- scripts/HPC chatbot preprocessor/main.py | 246 ++++++++++-------- 3 files changed, 149 insertions(+), 123 deletions(-) diff --git a/scripts/HPC chatbot preprocessor/if_mangler.py b/scripts/HPC chatbot preprocessor/if_mangler.py index 8dd0d0994523..46b121610c95 100644 --- a/scripts/HPC chatbot preprocessor/if_mangler.py +++ b/scripts/HPC chatbot preprocessor/if_mangler.py @@ -66,7 +66,7 @@ def mangle_os_ifs(line): def mangle_ifs(directory, file): with open(".\\if_mangled_files\\" + file, 'w') as write_file: - with open(directory + "\\" + file, 'r') as read_file: + with open(directory, 'r') as read_file: for line in read_file: new_line = mangle_os_ifs(line) write_file.write(new_line) diff --git a/scripts/HPC chatbot preprocessor/jinja_parser.py b/scripts/HPC chatbot preprocessor/jinja_parser.py index 612c20dd06b2..603a453ecf7d 100644 --- a/scripts/HPC chatbot preprocessor/jinja_parser.py +++ b/scripts/HPC chatbot preprocessor/jinja_parser.py @@ -4,24 +4,28 @@ # function that let's jinja do its thing to format the files expect for the os-related if-statements -def jinja_parser(filename): +def jinja_parser(filename, copy_location): # Read the YAML file with open('..\\..\\mkdocs\\extra\\gent.yml', 'r') as yml_file: words_dict = yaml.safe_load(yml_file) - # Mangle the OS-related if-statements - mangle_ifs('.\\copies', filename) + # ugly fix for index.md error + additional_context = { + 'config': { + 'repo_url': 'https://github.com/hpcugent/vsc_user_docs' + } + } + combined_context = {**words_dict, **additional_context} - # Read the if-mangled Markdown file - with open('.\\if_mangled_files\\' + filename, 'r') as md_file: - md_content = md_file.read() + # Mangle the OS-related if-statements + mangle_ifs(copy_location, filename) # Use Jinja2 to replace the macros - templateloader = ChoiceLoader([FileSystemLoader(searchpath='.\\if_mangled_files'), FileSystemLoader(searchpath="..\\..\\mkdocs\\docs\\HPC")]) - templateEnv = Environment(loader=templateloader) + template_loader = ChoiceLoader([FileSystemLoader(searchpath='.\\if_mangled_files'), FileSystemLoader(searchpath="..\\..\\mkdocs\\docs\\HPC")]) + templateEnv = Environment(loader=template_loader) template = templateEnv.get_template(filename) - rendered_content = template.render(words_dict) + rendered_content = template.render(combined_context) # Save the rendered content to a new file - with open('.\\copies\\' + filename, 'w') as output_file: + with open(copy_location, 'w', encoding='utf-8', errors='ignore') as output_file: output_file.write(rendered_content) diff --git a/scripts/HPC chatbot preprocessor/main.py b/scripts/HPC chatbot preprocessor/main.py index b2900334ef87..8351979a8658 100644 --- a/scripts/HPC chatbot preprocessor/main.py +++ b/scripts/HPC chatbot preprocessor/main.py @@ -7,34 +7,22 @@ succeeded = 0 failed = 0 -# make the necessary directories -if not os.path.exists(".\\copies"): - os.mkdir(".\\copies") - -if not os.path.exists(".\\parsed_mds"): - os.mkdir(".\\parsed_mds") - -if not os.path.exists(".\\if_mangled_files"): - os.mkdir(".\\if_mangled_files") - ################### define global variables ################### # variable that keeps track of the source directories source_directories = ["..\\..\\mkdocs\\docs\\HPC\\", "..\\..\\mkdocs\\docs\\HPC\\linux-tutorial"] -# variable that keeps track of the directories that are used to write in at different levels -root_dir_generic = ".\\parsed_mds\\generic\\" -root_dir_os_specific_linux = ".\\parsed_mds\\os_specific\\linux\\" -root_dir_os_specific_windows = ".\\parsed_mds\\os_specific\\windows\\" -root_dir_os_specific_macos = ".\\parsed_mds\\os_specific\\macos\\" - # list of all the filenames -filenames = {} +filenames_generic = {} +filenames_linux = {} for source_directory in source_directories: all_items = os.listdir(source_directory) files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]] for file in files: - filenames[file] = os.path.join(source_directory, file) + if "linux-tutorial" in source_directory: + filenames_linux[file] = os.path.join(source_directory, file) + else: + filenames_generic[file] = os.path.join(source_directory, file) # TODO: find solution for duplicate filenames between linux tutorial and normal files @@ -147,7 +135,7 @@ def check_if_statements(curr_line): content = match.group(1) # new if-statement wrt OS - if re.match(r'if OS == ', content): + if re.search(r'if OS == ', content): OS = content[9:-1] # set new active OS @@ -159,7 +147,7 @@ def check_if_statements(curr_line): active_OS_if_states[other_OS] = "inactive" # endif statement wrt OS - elif re.match(r'endif ', content): + elif re.search(r'endif ', content): if str(1) in active_OS_if_states.values(): active_OS_if_states[ list(active_OS_if_states.keys())[list(active_OS_if_states.values()).index(str(1))]] = "active" @@ -168,7 +156,7 @@ def check_if_statements(curr_line): active_OS_if_states[key] = "inactive" # else statement wrt OS - elif re.match(r'else ', content): + elif re.search(r'else ', content): i = 0 for i in range(3): @@ -264,106 +252,140 @@ def make_valid_title(s): return valid_filename -for filename in filenames.keys(): - try: - # if True: - # make a copy of one of the md files to test some things - shutil.copyfile(filenames[filename], - ".\\copies\\" + filename) - - ################### define/reset loop specific variables ################### - - # variable for the main title (needed for reference links) - main_title = filename[:-3] - - # variable that keeps track of the directories that are used to write in at different levels - curr_dirs = [filename[:-3] for i in range(4)] - - # variable to keep track whether we're dealing with OS-specific info or not - OS_specific = False - - # variable that keeps track of the latest non-zero level title and corresponding directory - last_title_level = 1 - last_title = None - last_directory = None - last_was_title = False - - # list to keep track of links in the text - links_generic = [] - links_linux = [] - links_windows = [] - links_macos = [] - - # dictionaries to keep track of current OS - active_OS_if_states = {"linux": "inactive", "windows": "inactive", "macos": "inactive"} +# remove the directories from a previous run of the parser +remove_directory_tree(".\\parsed_mds") +remove_directory_tree(".\\copies") +remove_directory_tree(".\\if_mangled_files") - # variable that shows whether the first title has been reached yet - after_first_title = False - - # variable that is used to be sure that we are detecting titles and not comments from codeblocks - in_code_block = False +# make the necessary directories +if not os.path.exists(".\\copies"): + os.mkdir(".\\copies") - ################### actually parse the md file ################### +if not os.path.exists(".\\copies\\linux"): + os.mkdir(".\\copies\\linux") - # create directories for the source markdown file - create_directory(root_dir_generic) - create_directory(".\\parsed_mds\\os_specific") - create_directory(root_dir_os_specific_linux) - create_directory(root_dir_os_specific_windows) - create_directory(root_dir_os_specific_macos) - create_directory(root_dir_generic + curr_dirs[0]) - create_directory(root_dir_os_specific_linux + curr_dirs[0]) - create_directory(root_dir_os_specific_windows + curr_dirs[0]) - create_directory(root_dir_os_specific_macos + curr_dirs[0]) +if not os.path.exists(".\\parsed_mds"): + os.mkdir(".\\parsed_mds") - # process the jinja macros - jinja_parser(filename) +if not os.path.exists(".\\if_mangled_files"): + os.mkdir(".\\if_mangled_files") - # open the file and store line by line in the right file - with open(".\\copies\\" + filename, 'r') as readfile: +for filenames in [filenames_generic, filenames_linux]: + for filename in filenames.keys(): + try: + # if True: + # make a copy of one of the md files to test some things + if "linux-tutorial" in filenames[filename]: + copy_file = ".\\copies\\linux\\" + filename + else: + copy_file = ".\\copies\\" + filename + shutil.copyfile(filenames[filename], copy_file) - for line in readfile: - title_level, title, directory = check_for_title(line) + ################### define/reset loop specific variables ################### - detect_in_code_block(line) + # variable that keeps track of the directories that are used to write in at different levels + if "linux-tutorial" in filenames[filename]: + root_dir_generic = ".\\parsed_mds\\generic\\linux_tutorial\\" + root_dir_os_specific_linux = ".\\parsed_mds\\os_specific\\linux\\linux_tutorial\\" + root_dir_os_specific_windows = ".\\parsed_mds\\os_specific\\windows\\linux_tutorial\\" + root_dir_os_specific_macos = ".\\parsed_mds\\os_specific\\macos\\linux_tutorial\\" + else: + root_dir_generic = ".\\parsed_mds\\generic\\" + root_dir_os_specific_linux = ".\\parsed_mds\\os_specific\\linux\\" + root_dir_os_specific_windows = ".\\parsed_mds\\os_specific\\windows\\" + root_dir_os_specific_macos = ".\\parsed_mds\\os_specific\\macos\\" + + # variable for the main title (needed for reference links) + main_title = filename[:-3] + + # variable that keeps track of the directories that are used to write in at different levels + curr_dirs = [filename[:-3] for i in range(4)] + + # variable to keep track whether we're dealing with OS-specific info or not + OS_specific = False + + # variable that keeps track of the latest non-zero level title and corresponding directory + last_title_level = 1 + last_title = None + last_directory = None + last_was_title = False + + # list to keep track of links in the text + links_generic = [] + links_linux = [] + links_windows = [] + links_macos = [] + + # dictionaries to keep track of current OS + active_OS_if_states = {"linux": "inactive", "windows": "inactive", "macos": "inactive"} + + # variable that shows whether the first title has been reached yet + after_first_title = False + + # variable that is used to be sure that we are detecting titles and not comments from codeblocks + in_code_block = False + + ################### actually parse the md file ################### + + # create directories for the source markdown file + create_directory(root_dir_generic) + create_directory(".\\parsed_mds\\os_specific") + create_directory(root_dir_os_specific_linux) + create_directory(root_dir_os_specific_windows) + create_directory(root_dir_os_specific_macos) + create_directory(root_dir_generic + curr_dirs[0]) + create_directory(root_dir_os_specific_linux + curr_dirs[0]) + create_directory(root_dir_os_specific_windows + curr_dirs[0]) + create_directory(root_dir_os_specific_macos + curr_dirs[0]) + + # process the jinja macros + jinja_parser(filename, copy_file) + + # open the file and store line by line in the right file + with open(copy_file, 'r') as readfile: + + for line in readfile: + title_level, title, directory = check_for_title(line) + + detect_in_code_block(line) + + # line is a title with a maximum depth of 3 + if title_level > 0: + last_title_level = title_level + last_title = title + last_directory = directory + after_first_title = True + + # line is not a title + elif after_first_title: + # check for if-statements and write the appropriate lines in the right files + next_action = check_if_statements(line) + while next_action[0] == "write_text_and_check_extra_message" or next_action[ + 0] == "check_extra_message": + if next_action[0] == "write_text_and_check_extra_message": + choose_and_write_to_file(next_action[2]) + next_action = check_if_statements(next_action[1]) + + if next_action[0] == "write_text": + choose_and_write_to_file(next_action[2]) - # line is a title with a maximum depth of 3 - if title_level > 0: - last_title_level = title_level - last_title = title - last_directory = directory - after_first_title = True + # write end of file for the last file + write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic) + write_end_of_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", "Linux", + links_linux) + write_end_of_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", "Windows", + links_windows) + write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS", + links_macos) + succeeded += 1 + except: + print("Parsing failed for file: " + filename) + failed += 1 - # line is not a title - elif after_first_title: - # check for if-statements and write the appropriate lines in the right files - next_action = check_if_statements(line) - while next_action[0] == "write_text_and_check_extra_message" or next_action[0] == "check_extra_message": - if next_action[0] == "write_text_and_check_extra_message": - choose_and_write_to_file(next_action[2]) - next_action = check_if_statements(next_action[1]) - - if next_action[0] == "write_text": - choose_and_write_to_file(next_action[2]) - - # write end of file for the last file - # print(root_dir_generic) - # print(last_directory) - # print(filename) - write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic) - write_end_of_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", "Linux", - links_linux) - write_end_of_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", "Windows", - links_windows) - write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS", - links_macos) - succeeded += 1 - except: - print("Parsing failed for file: " + filename) - failed += 1 - -print("Success ratio: " + str(succeeded/(succeeded + failed) * 100) + "%") -print("Although this ratio should be taken with a grain of salt as a number of other fixes need to be implemented as well, they just don't cause any errors.") +print("Success ratio: " + str(succeeded / (succeeded + failed) * 100) + "%") +print( + "Although this ratio should be taken with a grain of salt as a number of other fixes need to be implemented as well, they just don't cause any errors.") # TODO: directory cleanup # TODO: reconsider maximum depth to be detected as title +# TODO: adapt script to be used from command line From 7d279d6a7f1992275eae487c0893befc4a48d6f9 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 12 Aug 2024 11:54:48 +0200 Subject: [PATCH 012/145] fixed the problem of some files being written in reST instead of markdown --- scripts/HPC chatbot preprocessor/main.py | 25 +++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/scripts/HPC chatbot preprocessor/main.py b/scripts/HPC chatbot preprocessor/main.py index 8351979a8658..12f222b82f0b 100644 --- a/scripts/HPC chatbot preprocessor/main.py +++ b/scripts/HPC chatbot preprocessor/main.py @@ -1,6 +1,8 @@ import os import re import shutil +import pypandoc + from jinja_parser import jinja_parser # variables for analytics @@ -24,10 +26,8 @@ else: filenames_generic[file] = os.path.join(source_directory, file) -# TODO: find solution for duplicate filenames between linux tutorial and normal files - -# TODO: problem-files (other layout than normal markdown-files) -problem_files = ["linux_tutorial\\getting_started.md", "linux_tutorial\\navigating.md"] +# some files are not written in proper markdown but rather in reST, they will be converted later down the line using pandoc +problem_files = ["getting_started.md", "navigating.md"] ################### define functions ################### @@ -272,8 +272,8 @@ def make_valid_title(s): for filenames in [filenames_generic, filenames_linux]: for filename in filenames.keys(): - try: - # if True: + # try: + if True: # make a copy of one of the md files to test some things if "linux-tutorial" in filenames[filename]: copy_file = ".\\copies\\linux\\" + filename @@ -341,6 +341,10 @@ def make_valid_title(s): # process the jinja macros jinja_parser(filename, copy_file) + # convert the files without proper markdown layout into markdown using pandoc + if "linux-tutorial" in filenames[filename] and filename in problem_files: + pypandoc.convert_file(copy_file, 'markdown', outputfile=copy_file) + # open the file and store line by line in the right file with open(copy_file, 'r') as readfile: @@ -378,13 +382,12 @@ def make_valid_title(s): write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS", links_macos) succeeded += 1 - except: - print("Parsing failed for file: " + filename) - failed += 1 + # except: + # print("Parsing failed for file: " + filename) + # failed += 1 print("Success ratio: " + str(succeeded / (succeeded + failed) * 100) + "%") -print( - "Although this ratio should be taken with a grain of salt as a number of other fixes need to be implemented as well, they just don't cause any errors.") +print("Although this ratio should be taken with a grain of salt as a number of other fixes need to be implemented as well, they just don't cause any errors.") # TODO: directory cleanup # TODO: reconsider maximum depth to be detected as title From 8047572387eb08e278bde89f9d688b74c817b7d0 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 12 Aug 2024 13:33:14 +0200 Subject: [PATCH 013/145] some small fixes --- scripts/HPC chatbot preprocessor/main.py | 78 +++++++++++++----------- 1 file changed, 43 insertions(+), 35 deletions(-) diff --git a/scripts/HPC chatbot preprocessor/main.py b/scripts/HPC chatbot preprocessor/main.py index 12f222b82f0b..bf16c95ea353 100644 --- a/scripts/HPC chatbot preprocessor/main.py +++ b/scripts/HPC chatbot preprocessor/main.py @@ -37,11 +37,11 @@ def remove_directory_tree(old_directory): shutil.rmtree(old_directory) -# function that checks whether the current line has a title of level 3 at maximum (returns the level of the title or 0 if the line is not a title) +# function that checks whether the current line has a title of level 4 at maximum (returns the level of the title or 0 if the line is not a title) def check_for_title_logic(curr_line): global curr_dirs match = re.match(r'^#+ ', curr_line) - if match and len(match.group(0)) <= 4: + if match and len(match.group(0)) <= 5: return len(match.group(0)) - 1 else: return 0 @@ -64,13 +64,13 @@ def check_for_title(curr_line): return 0, None, None else: if last_title is not None: - write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic) + write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic, is_linux_tutorial) write_end_of_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", "Linux", - links_linux) + links_linux, is_linux_tutorial) write_end_of_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", "Windows", - links_windows) + links_windows, is_linux_tutorial) write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS", - links_macos) + links_macos, is_linux_tutorial) reset_link_lists() curr_dirs[logic_output] = curr_dirs[logic_output - 1] + "\\" + make_valid_title( @@ -124,12 +124,11 @@ def replace_markdown_markers(curr_line, linklist): # function that checks for if-statements def check_if_statements(curr_line): - # TODO: adapt regex for annoying inconsistencies # check whether the first part of the line contains information wrt if-statements - match = re.search(r'^\{-if-%-\s([^%]*)%-if-}(.*)', curr_line) + match = re.search(r'^\{-if-%([^%]*)%-if-}(.*)', curr_line) # check whether the line contains information wrt if-statements that is not in its first part - match_large = re.search(r'^(.*)(\{-if-%-\s[^%]*%-if-})(.*)', curr_line) + match_large = re.search(r'^(.*)(\{-if-%[^%]*%-if-})(.*)', curr_line) if match: content = match.group(1) @@ -147,7 +146,7 @@ def check_if_statements(curr_line): active_OS_if_states[other_OS] = "inactive" # endif statement wrt OS - elif re.search(r'endif ', content): + elif re.search(r'endif', content): if str(1) in active_OS_if_states.values(): active_OS_if_states[ list(active_OS_if_states.keys())[list(active_OS_if_states.values()).index(str(1))]] = "active" @@ -156,7 +155,7 @@ def check_if_statements(curr_line): active_OS_if_states[key] = "inactive" # else statement wrt OS - elif re.search(r'else ', content): + elif re.search(r'else', content): i = 0 for i in range(3): @@ -220,11 +219,10 @@ def choose_and_write_to_file(curr_line): def add_reference_link(file_location, reference_link): with open(file_location, 'a') as write_file: write_file.write("\nreference: " + reference_link + "\n") - # TODO: fix trailing spaces in filename # function that adds the links that should be at the end of a file -def write_end_of_file(file_location, OS, linklist): +def write_end_of_file(file_location, OS, linklist, is_linux_tutorial_): if len(OS) > 0: OS = OS + "/" @@ -234,8 +232,13 @@ def write_end_of_file(file_location, OS, linklist): for i, link in enumerate(linklist): write_file.write("[" + str(i + 1) + "]: " + str(link) + "\n") + if is_linux_tutorial_: + linux_part = "linux-tutorial/" + else: + linux_part = "" + # finally add the reference link - add_reference_link(file_location, "docs.hpc.ugent.be/" + OS + main_title + "/#" + last_title.lower()) + add_reference_link(file_location, "docs.hpc.ugent.be/" + OS + linux_part + main_title + "/#" + ''.join(char.lower() for char in last_title if char.isalnum() or char == '-').strip('-')) # function that makes sure all titles can be used as valid filenames @@ -243,11 +246,14 @@ def make_valid_title(s): # Define a regex pattern for invalid characters on both Windows and Linux invalid_chars = r'[<>:"/\\|?*\0()]' + # get rid of extra information between {} brackets + s = re.sub(r'\{.*?}', '', s) + # Remove invalid characters valid_filename = re.sub(invalid_chars, '', s) # Strip leading/trailing whitespace - valid_filename = valid_filename.strip() + valid_filename = valid_filename.strip().strip('-') return valid_filename @@ -272,19 +278,21 @@ def make_valid_title(s): for filenames in [filenames_generic, filenames_linux]: for filename in filenames.keys(): - # try: - if True: - # make a copy of one of the md files to test some things - if "linux-tutorial" in filenames[filename]: + try: + ################### define/reset loop specific variables ################### + + # variable that keeps track of whether file is part of the linux tutorial + is_linux_tutorial = bool("linux-tutorial" in filenames[filename]) + + # make a copy of the original file in order to make sure the original does not get altered + if is_linux_tutorial: copy_file = ".\\copies\\linux\\" + filename else: copy_file = ".\\copies\\" + filename shutil.copyfile(filenames[filename], copy_file) - ################### define/reset loop specific variables ################### - # variable that keeps track of the directories that are used to write in at different levels - if "linux-tutorial" in filenames[filename]: + if is_linux_tutorial: root_dir_generic = ".\\parsed_mds\\generic\\linux_tutorial\\" root_dir_os_specific_linux = ".\\parsed_mds\\os_specific\\linux\\linux_tutorial\\" root_dir_os_specific_windows = ".\\parsed_mds\\os_specific\\windows\\linux_tutorial\\" @@ -299,7 +307,7 @@ def make_valid_title(s): main_title = filename[:-3] # variable that keeps track of the directories that are used to write in at different levels - curr_dirs = [filename[:-3] for i in range(4)] + curr_dirs = [filename[:-3] for i in range(5)] # variable to keep track whether we're dealing with OS-specific info or not OS_specific = False @@ -353,7 +361,7 @@ def make_valid_title(s): detect_in_code_block(line) - # line is a title with a maximum depth of 3 + # line is a title with a maximum depth of 4 if title_level > 0: last_title_level = title_level last_title = title @@ -364,8 +372,7 @@ def make_valid_title(s): elif after_first_title: # check for if-statements and write the appropriate lines in the right files next_action = check_if_statements(line) - while next_action[0] == "write_text_and_check_extra_message" or next_action[ - 0] == "check_extra_message": + while next_action[0] == "write_text_and_check_extra_message" or next_action[0] == "check_extra_message": if next_action[0] == "write_text_and_check_extra_message": choose_and_write_to_file(next_action[2]) next_action = check_if_statements(next_action[1]) @@ -374,21 +381,22 @@ def make_valid_title(s): choose_and_write_to_file(next_action[2]) # write end of file for the last file - write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic) + write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic, is_linux_tutorial) write_end_of_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", "Linux", - links_linux) + links_linux, is_linux_tutorial) write_end_of_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", "Windows", - links_windows) + links_windows, is_linux_tutorial) write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS", - links_macos) + links_macos, is_linux_tutorial) succeeded += 1 - # except: - # print("Parsing failed for file: " + filename) - # failed += 1 + except: + print("Parsing failed for file: " + filename) + failed += 1 print("Success ratio: " + str(succeeded / (succeeded + failed) * 100) + "%") print("Although this ratio should be taken with a grain of salt as a number of other fixes need to be implemented as well, they just don't cause any errors.") -# TODO: directory cleanup -# TODO: reconsider maximum depth to be detected as title +remove_directory_tree(".\\copies") +remove_directory_tree(".\\if_mangled_files") +# TODO: reconsider maximum depth to be detected as title (now at four) # TODO: adapt script to be used from command line From 7d1c5ed2cfca12d5eb4ecaffa3178e821c63f210 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 13 Aug 2024 10:35:06 +0200 Subject: [PATCH 014/145] remove try-except-structure --- scripts/HPC chatbot preprocessor/main.py | 216 +++++++++++------------ 1 file changed, 105 insertions(+), 111 deletions(-) diff --git a/scripts/HPC chatbot preprocessor/main.py b/scripts/HPC chatbot preprocessor/main.py index bf16c95ea353..2ed91022b7b1 100644 --- a/scripts/HPC chatbot preprocessor/main.py +++ b/scripts/HPC chatbot preprocessor/main.py @@ -278,122 +278,116 @@ def make_valid_title(s): for filenames in [filenames_generic, filenames_linux]: for filename in filenames.keys(): - try: - ################### define/reset loop specific variables ################### + ################### define/reset loop specific variables ################### - # variable that keeps track of whether file is part of the linux tutorial - is_linux_tutorial = bool("linux-tutorial" in filenames[filename]) + # variable that keeps track of whether file is part of the linux tutorial + is_linux_tutorial = bool("linux-tutorial" in filenames[filename]) - # make a copy of the original file in order to make sure the original does not get altered - if is_linux_tutorial: - copy_file = ".\\copies\\linux\\" + filename - else: - copy_file = ".\\copies\\" + filename - shutil.copyfile(filenames[filename], copy_file) - - # variable that keeps track of the directories that are used to write in at different levels - if is_linux_tutorial: - root_dir_generic = ".\\parsed_mds\\generic\\linux_tutorial\\" - root_dir_os_specific_linux = ".\\parsed_mds\\os_specific\\linux\\linux_tutorial\\" - root_dir_os_specific_windows = ".\\parsed_mds\\os_specific\\windows\\linux_tutorial\\" - root_dir_os_specific_macos = ".\\parsed_mds\\os_specific\\macos\\linux_tutorial\\" - else: - root_dir_generic = ".\\parsed_mds\\generic\\" - root_dir_os_specific_linux = ".\\parsed_mds\\os_specific\\linux\\" - root_dir_os_specific_windows = ".\\parsed_mds\\os_specific\\windows\\" - root_dir_os_specific_macos = ".\\parsed_mds\\os_specific\\macos\\" - - # variable for the main title (needed for reference links) - main_title = filename[:-3] - - # variable that keeps track of the directories that are used to write in at different levels - curr_dirs = [filename[:-3] for i in range(5)] - - # variable to keep track whether we're dealing with OS-specific info or not - OS_specific = False - - # variable that keeps track of the latest non-zero level title and corresponding directory - last_title_level = 1 - last_title = None - last_directory = None - last_was_title = False - - # list to keep track of links in the text - links_generic = [] - links_linux = [] - links_windows = [] - links_macos = [] - - # dictionaries to keep track of current OS - active_OS_if_states = {"linux": "inactive", "windows": "inactive", "macos": "inactive"} - - # variable that shows whether the first title has been reached yet - after_first_title = False - - # variable that is used to be sure that we are detecting titles and not comments from codeblocks - in_code_block = False - - ################### actually parse the md file ################### - - # create directories for the source markdown file - create_directory(root_dir_generic) - create_directory(".\\parsed_mds\\os_specific") - create_directory(root_dir_os_specific_linux) - create_directory(root_dir_os_specific_windows) - create_directory(root_dir_os_specific_macos) - create_directory(root_dir_generic + curr_dirs[0]) - create_directory(root_dir_os_specific_linux + curr_dirs[0]) - create_directory(root_dir_os_specific_windows + curr_dirs[0]) - create_directory(root_dir_os_specific_macos + curr_dirs[0]) - - # process the jinja macros - jinja_parser(filename, copy_file) - - # convert the files without proper markdown layout into markdown using pandoc - if "linux-tutorial" in filenames[filename] and filename in problem_files: - pypandoc.convert_file(copy_file, 'markdown', outputfile=copy_file) - - # open the file and store line by line in the right file - with open(copy_file, 'r') as readfile: - - for line in readfile: - title_level, title, directory = check_for_title(line) - - detect_in_code_block(line) - - # line is a title with a maximum depth of 4 - if title_level > 0: - last_title_level = title_level - last_title = title - last_directory = directory - after_first_title = True - - # line is not a title - elif after_first_title: - # check for if-statements and write the appropriate lines in the right files - next_action = check_if_statements(line) - while next_action[0] == "write_text_and_check_extra_message" or next_action[0] == "check_extra_message": - if next_action[0] == "write_text_and_check_extra_message": - choose_and_write_to_file(next_action[2]) - next_action = check_if_statements(next_action[1]) - - if next_action[0] == "write_text": + # make a copy of the original file in order to make sure the original does not get altered + if is_linux_tutorial: + copy_file = ".\\copies\\linux\\" + filename + else: + copy_file = ".\\copies\\" + filename + shutil.copyfile(filenames[filename], copy_file) + + # variable that keeps track of the directories that are used to write in at different levels + if is_linux_tutorial: + root_dir_generic = ".\\parsed_mds\\generic\\linux_tutorial\\" + root_dir_os_specific_linux = ".\\parsed_mds\\os_specific\\linux\\linux_tutorial\\" + root_dir_os_specific_windows = ".\\parsed_mds\\os_specific\\windows\\linux_tutorial\\" + root_dir_os_specific_macos = ".\\parsed_mds\\os_specific\\macos\\linux_tutorial\\" + else: + root_dir_generic = ".\\parsed_mds\\generic\\" + root_dir_os_specific_linux = ".\\parsed_mds\\os_specific\\linux\\" + root_dir_os_specific_windows = ".\\parsed_mds\\os_specific\\windows\\" + root_dir_os_specific_macos = ".\\parsed_mds\\os_specific\\macos\\" + + # variable for the main title (needed for reference links) + main_title = filename[:-3] + + # variable that keeps track of the directories that are used to write in at different levels + curr_dirs = [filename[:-3] for i in range(5)] + + # variable to keep track whether we're dealing with OS-specific info or not + OS_specific = False + + # variable that keeps track of the latest non-zero level title and corresponding directory + last_title_level = 1 + last_title = None + last_directory = None + last_was_title = False + + # list to keep track of links in the text + links_generic = [] + links_linux = [] + links_windows = [] + links_macos = [] + + # dictionaries to keep track of current OS + active_OS_if_states = {"linux": "inactive", "windows": "inactive", "macos": "inactive"} + + # variable that shows whether the first title has been reached yet + after_first_title = False + + # variable that is used to be sure that we are detecting titles and not comments from codeblocks + in_code_block = False + + ################### actually parse the md file ################### + + # create directories for the source markdown file + create_directory(root_dir_generic) + create_directory(".\\parsed_mds\\os_specific") + create_directory(root_dir_os_specific_linux) + create_directory(root_dir_os_specific_windows) + create_directory(root_dir_os_specific_macos) + create_directory(root_dir_generic + curr_dirs[0]) + create_directory(root_dir_os_specific_linux + curr_dirs[0]) + create_directory(root_dir_os_specific_windows + curr_dirs[0]) + create_directory(root_dir_os_specific_macos + curr_dirs[0]) + + # process the jinja macros + jinja_parser(filename, copy_file) + + # convert the files without proper markdown layout into markdown using pandoc + if "linux-tutorial" in filenames[filename] and filename in problem_files: + pypandoc.convert_file(copy_file, 'markdown', outputfile=copy_file) + + # open the file and store line by line in the right file + with open(copy_file, 'r') as readfile: + + for line in readfile: + title_level, title, directory = check_for_title(line) + + detect_in_code_block(line) + + # line is a title with a maximum depth of 4 + if title_level > 0: + last_title_level = title_level + last_title = title + last_directory = directory + after_first_title = True + + # line is not a title + elif after_first_title: + # check for if-statements and write the appropriate lines in the right files + next_action = check_if_statements(line) + while next_action[0] == "write_text_and_check_extra_message" or next_action[0] == "check_extra_message": + if next_action[0] == "write_text_and_check_extra_message": choose_and_write_to_file(next_action[2]) + next_action = check_if_statements(next_action[1]) - # write end of file for the last file - write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic, is_linux_tutorial) - write_end_of_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", "Linux", - links_linux, is_linux_tutorial) - write_end_of_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", "Windows", - links_windows, is_linux_tutorial) - write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS", - links_macos, is_linux_tutorial) - succeeded += 1 - except: - print("Parsing failed for file: " + filename) - failed += 1 + if next_action[0] == "write_text": + choose_and_write_to_file(next_action[2]) + + # write end of file for the last file + write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic, is_linux_tutorial) + write_end_of_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", "Linux", + links_linux, is_linux_tutorial) + write_end_of_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", "Windows", + links_windows, is_linux_tutorial) + write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS", + links_macos, is_linux_tutorial) -print("Success ratio: " + str(succeeded / (succeeded + failed) * 100) + "%") print("Although this ratio should be taken with a grain of salt as a number of other fixes need to be implemented as well, they just don't cause any errors.") remove_directory_tree(".\\copies") From 984b0cd3868b38c59e72c56fd75f04c6e4918b18 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 13 Aug 2024 12:23:04 +0200 Subject: [PATCH 015/145] collapse all code into one file --- scripts/HPC chatbot preprocessor/main.py | 105 ++++++++++++++++++++++- 1 file changed, 101 insertions(+), 4 deletions(-) diff --git a/scripts/HPC chatbot preprocessor/main.py b/scripts/HPC chatbot preprocessor/main.py index 2ed91022b7b1..b6e42e99ea04 100644 --- a/scripts/HPC chatbot preprocessor/main.py +++ b/scripts/HPC chatbot preprocessor/main.py @@ -2,8 +2,8 @@ import re import shutil import pypandoc - -from jinja_parser import jinja_parser +import yaml +from jinja2 import FileSystemLoader, Environment, ChoiceLoader # variables for analytics succeeded = 0 @@ -29,6 +29,9 @@ # some files are not written in proper markdown but rather in reST, they will be converted later down the line using pandoc problem_files = ["getting_started.md", "navigating.md"] +# global variable to keep track of latest if-statement scope +is_os = 0 # Can be 0, 1, 2 or 3 {0: not in an os-if; 1: in a non-os-if nested in an os-if; 2: in an os-if; 3: in an os-if nested in an os-if} + ################### define functions ################### # function that removes the previous file structure before starting the process of making a new one @@ -122,6 +125,102 @@ def replace_markdown_markers(curr_line, linklist): return curr_line, linklist +# function that let's jinja do its thing to format the files expect for the os-related if-statements +def jinja_parser(filename, copy_location): + # Read the YAML file + with open('..\\..\\mkdocs\\extra\\gent.yml', 'r') as yml_file: + words_dict = yaml.safe_load(yml_file) + + # ugly fix for index.md error + additional_context = { + 'config': { + 'repo_url': 'https://github.com/hpcugent/vsc_user_docs' + } + } + combined_context = {**words_dict, **additional_context} + + # Mangle the OS-related if-statements + mangle_ifs(copy_location, filename) + + # Use Jinja2 to replace the macros + template_loader = ChoiceLoader([FileSystemLoader(searchpath='.\\if_mangled_files'), FileSystemLoader(searchpath="..\\..\\mkdocs\\docs\\HPC")]) + templateEnv = Environment(loader=template_loader) + template = templateEnv.get_template(filename) + rendered_content = template.render(combined_context) + + # Save the rendered content to a new file + with open(copy_location, 'w', encoding='utf-8', errors='ignore') as output_file: + output_file.write(rendered_content) + + +def mangle_os_ifs(line): + global is_os + + match = re.search(r'\{%(.*?)%}(.*)', line) + + start_index = 0 + added_length = 0 + + while match: + + constr_match = re.search(r'\{%.*?%}', match.string) + if_match = re.search(r'if ', match.group(1)) + if_os_match = re.search(r'if OS == ', match.group(1)) + endif_match = re.search(r'endif', match.group(1)) + + if endif_match: + if is_os == 2: + line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[ + constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[ + constr_match.end() + start_index + added_length - 1:] + added_length += 8 + is_os = 0 + if is_os == 3: + line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[ + constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[ + constr_match.end() + start_index + added_length - 1:] + added_length += 8 + is_os = 2 + elif is_os == 1: + is_os = 2 + elif if_match: + if if_os_match: + if is_os == 2: + line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[ + constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[ + constr_match.end() + start_index + added_length - 1:] + added_length += 8 + is_os = 3 + else: + line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[ + constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[ + constr_match.end() + start_index + added_length - 1:] + added_length += 8 + is_os = 2 + else: + if is_os == 2: + is_os = 1 + else: + is_os = 0 + else: + if is_os == 2 or is_os == 3: + line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[ + constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[ + constr_match.end() + start_index + added_length - 1:] + added_length += 8 + start_index += constr_match.end() + match = re.search(r'\{%(.*?)%}(.*)', match.group(2)) + return line + + +def mangle_ifs(directory, file): + with open(".\\if_mangled_files\\" + file, 'w') as write_file: + with open(directory, 'r') as read_file: + for line in read_file: + new_line = mangle_os_ifs(line) + write_file.write(new_line) + + # function that checks for if-statements def check_if_statements(curr_line): # check whether the first part of the line contains information wrt if-statements @@ -388,8 +487,6 @@ def make_valid_title(s): write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS", links_macos, is_linux_tutorial) -print("Although this ratio should be taken with a grain of salt as a number of other fixes need to be implemented as well, they just don't cause any errors.") - remove_directory_tree(".\\copies") remove_directory_tree(".\\if_mangled_files") # TODO: reconsider maximum depth to be detected as title (now at four) From 8f5eeaa5454860326bf3a02d15a63c5622ab7aee Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 13 Aug 2024 12:26:02 +0200 Subject: [PATCH 016/145] Rename file --- scripts/HPC chatbot preprocessor/{main.py => chatbot_parser.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename scripts/HPC chatbot preprocessor/{main.py => chatbot_parser.py} (100%) diff --git a/scripts/HPC chatbot preprocessor/main.py b/scripts/HPC chatbot preprocessor/chatbot_parser.py similarity index 100% rename from scripts/HPC chatbot preprocessor/main.py rename to scripts/HPC chatbot preprocessor/chatbot_parser.py From 2b97b7a31d9ba151f1747a152736dac4906af466 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 13 Aug 2024 12:30:36 +0200 Subject: [PATCH 017/145] cleanup repository --- .../HPC chatbot preprocessor/.idea/.gitignore | 8 - .../.idea/HPC chatbot preprocessor.iml | 10 - .../inspectionProfiles/Project_Default.xml | 25 -- .../inspectionProfiles/profiles_settings.xml | 6 - .../HPC chatbot preprocessor/.idea/misc.xml | 7 - .../.idea/modules.xml | 8 - .../HPC chatbot preprocessor/.idea/vcs.xml | 6 - .../copies/getting_started_copy.md | 268 ------------------ .../Getting-Access/Getting-Access.txt | 25 -- .../Getting-Connected/Getting-Connected.txt | 19 -- .../Getting-Started/Getting-Started.txt | 11 - .../Inspect-your-results.txt | 56 ---- .../Getting-Started/Next-steps/Next-steps.txt | 15 - .../Submitting-a-job/Submitting-a-job.txt | 60 ---- .../Transfer-your-files.txt | 21 -- .../Wait-for-job-to-be-executed.txt | 26 -- .../Getting-Access/Getting-Access.txt | 2 - .../Getting-Connected/Getting-Connected.txt | 18 -- .../Getting-Started/Getting-Started.txt | 2 - .../Inspect-your-results.txt | 2 - .../Getting-Started/Next-steps/Next-steps.txt | 2 - .../Submitting-a-job/Submitting-a-job.txt | 2 - .../Transfer-your-files.txt | 21 -- .../Wait-for-job-to-be-executed.txt | 2 - .../Getting-Access/Getting-Access.txt | 2 - .../Getting-Connected/Getting-Connected.txt | 13 - .../Getting-Started/Getting-Started.txt | 2 - .../Inspect-your-results.txt | 2 - .../Getting-Started/Next-steps/Next-steps.txt | 2 - .../Submitting-a-job/Submitting-a-job.txt | 2 - .../Transfer-your-files.txt | 21 -- .../Wait-for-job-to-be-executed.txt | 2 - .../Getting-Access/Getting-Access.txt | 2 - .../Getting-Connected/Getting-Connected.txt | 13 - .../Getting-Started/Getting-Started.txt | 2 - .../Inspect-your-results.txt | 2 - .../Getting-Started/Next-steps/Next-steps.txt | 2 - .../Submitting-a-job/Submitting-a-job.txt | 2 - .../Transfer-your-files.txt | 15 - .../Wait-for-job-to-be-executed.txt | 2 - .../HPC chatbot preprocessor/if_mangler.py | 72 ----- .../HPC chatbot preprocessor/jinja_parser.py | 31 -- 42 files changed, 811 deletions(-) delete mode 100644 scripts/HPC chatbot preprocessor/.idea/.gitignore delete mode 100644 scripts/HPC chatbot preprocessor/.idea/HPC chatbot preprocessor.iml delete mode 100644 scripts/HPC chatbot preprocessor/.idea/inspectionProfiles/Project_Default.xml delete mode 100644 scripts/HPC chatbot preprocessor/.idea/inspectionProfiles/profiles_settings.xml delete mode 100644 scripts/HPC chatbot preprocessor/.idea/misc.xml delete mode 100644 scripts/HPC chatbot preprocessor/.idea/modules.xml delete mode 100644 scripts/HPC chatbot preprocessor/.idea/vcs.xml delete mode 100644 scripts/HPC chatbot preprocessor/copies/getting_started_copy.md delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Started.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Started.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Started.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Started.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt delete mode 100644 scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt delete mode 100644 scripts/HPC chatbot preprocessor/if_mangler.py delete mode 100644 scripts/HPC chatbot preprocessor/jinja_parser.py diff --git a/scripts/HPC chatbot preprocessor/.idea/.gitignore b/scripts/HPC chatbot preprocessor/.idea/.gitignore deleted file mode 100644 index 13566b81b018..000000000000 --- a/scripts/HPC chatbot preprocessor/.idea/.gitignore +++ /dev/null @@ -1,8 +0,0 @@ -# Default ignored files -/shelf/ -/workspace.xml -# Editor-based HTTP Client requests -/httpRequests/ -# Datasource local storage ignored files -/dataSources/ -/dataSources.local.xml diff --git a/scripts/HPC chatbot preprocessor/.idea/HPC chatbot preprocessor.iml b/scripts/HPC chatbot preprocessor/.idea/HPC chatbot preprocessor.iml deleted file mode 100644 index 2c80e1269497..000000000000 --- a/scripts/HPC chatbot preprocessor/.idea/HPC chatbot preprocessor.iml +++ /dev/null @@ -1,10 +0,0 @@ - - - - - - - - - - \ No newline at end of file diff --git a/scripts/HPC chatbot preprocessor/.idea/inspectionProfiles/Project_Default.xml b/scripts/HPC chatbot preprocessor/.idea/inspectionProfiles/Project_Default.xml deleted file mode 100644 index fc946d9cefc8..000000000000 --- a/scripts/HPC chatbot preprocessor/.idea/inspectionProfiles/Project_Default.xml +++ /dev/null @@ -1,25 +0,0 @@ - - - - \ No newline at end of file diff --git a/scripts/HPC chatbot preprocessor/.idea/inspectionProfiles/profiles_settings.xml b/scripts/HPC chatbot preprocessor/.idea/inspectionProfiles/profiles_settings.xml deleted file mode 100644 index 105ce2da2d64..000000000000 --- a/scripts/HPC chatbot preprocessor/.idea/inspectionProfiles/profiles_settings.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - \ No newline at end of file diff --git a/scripts/HPC chatbot preprocessor/.idea/misc.xml b/scripts/HPC chatbot preprocessor/.idea/misc.xml deleted file mode 100644 index 54cda8fd6dd9..000000000000 --- a/scripts/HPC chatbot preprocessor/.idea/misc.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/scripts/HPC chatbot preprocessor/.idea/modules.xml b/scripts/HPC chatbot preprocessor/.idea/modules.xml deleted file mode 100644 index 58e027d745f9..000000000000 --- a/scripts/HPC chatbot preprocessor/.idea/modules.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - - - \ No newline at end of file diff --git a/scripts/HPC chatbot preprocessor/.idea/vcs.xml b/scripts/HPC chatbot preprocessor/.idea/vcs.xml deleted file mode 100644 index b2bdec2d71b6..000000000000 --- a/scripts/HPC chatbot preprocessor/.idea/vcs.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/scripts/HPC chatbot preprocessor/copies/getting_started_copy.md b/scripts/HPC chatbot preprocessor/copies/getting_started_copy.md deleted file mode 100644 index 8fe33ebc513d..000000000000 --- a/scripts/HPC chatbot preprocessor/copies/getting_started_copy.md +++ /dev/null @@ -1,268 +0,0 @@ -{% set exampleloc="mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist" %} -# Getting Started - -Welcome to the "Getting Started" guide. This chapter will lead you through the initial steps of logging into the {{hpcinfra}} and submitting your very first job. We'll also walk you through the process step by step using a practical example. - -In addition to this chapter, you might find the [recording of the *Introduction to HPC-UGent* training session](https://www.ugent.be/hpc/en/training/introhpcugent-recording) to be a useful resource. - -Before proceeding, read [the introduction to HPC](introduction.md) to gain an understanding of the {{ hpcinfra }} and related terminology. - -### Getting Access - -To get access to the {{hpcinfra}}, visit [Getting an HPC Account](account.md). - -If you have not used Linux before, -{%- if site == 'Gent' %} -now would be a good time to follow our [Linux Tutorial](linux-tutorial/index.md). -{%- else %} -please learn some basics first before continuing. (see [Appendix C - Useful Linux Commands](useful_linux_commands.md)) -{%- endif %} - -#### A typical workflow looks like this: - -1. Connect to the login nodes -2. Transfer your files to the {{hpcinfra}} -3. Optional: compile your code and test it -4. Create a job script and submit your job -5. Wait for job to be executed -6. Study the results generated by your jobs, either on the cluster or - after downloading them locally. - -We will walk through an illustrative workload to get you started. In this example, our objective is to train a deep learning model for recognizing hand-written digits (MNIST dataset) using [TensorFlow](https://www.tensorflow.org/); -see the [example scripts](https://github.com/hpcugent/vsc_user_docs/tree/main/{{exampleloc}}). - -### Getting Connected - -There are two options to connect - -- Using a terminal to connect via SSH (for power users) (see [First Time connection to the {{ hpcinfra}}](connecting.md#first-time-connection-to-the-hpc-infrastructure)) -- [Using the web portal](web_portal.md) - -Considering your operating system is **{{OS}}**, - -{%- if OS == linux %} -it is recommended to make use of the `ssh` command in a terminal to get the most flexibility. - -Assuming you have already generated SSH keys in the previous step ([Getting Access](#getting-access)), and that they are in a default location, you should now be able to login by running the following command: - -
ssh {{userid}}@{{loginnode}}
- -!!! Warning "User your own VSC account id" - - Replace {{userid}} with your VSC account id (see ) - -!!! Tip - - You can also still use the web portal (see [shell access on web portal](web_portal.md#shell-access)) - -{%- else %} -{%- if OS == windows %} it is recommended to use the web portal. -{%- else %} it should be easy to make use of the `ssh` command in a terminal, but the web portal will work too. {%- endif %} - -The [web portal](web_portal.md) offers a convenient way to upload files and gain shell access to the {{hpcinfra}} from a standard web browser (no software installation or configuration required). - -See [shell access](web_portal.md#shell-access) when using the web portal, or -[connection to the {{hpcinfra}}](connecting.md#first-time-connection-to-the-hpc-infrastructure) when using a terminal. - -Make sure you can get to a shell access to the {{hpcinfra}} before proceeding with the next steps. - -{%- endif %} - -!!! Info - - When having problems see the [connection issues section on the troubleshooting page](troubleshooting.md#sec:connecting-issues). - - -### Transfer your files - -Now that you can login, it is time to transfer files from your local computer to your **home directory** on the {{hpcinfra}}. - -Download [tensorflow_mnist.py](https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/{{exampleloc}}/tensorflow_mnist.py) -and [run.sh](https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/{{exampleloc}}/run.sh) example scripts to your computer (from [here](https://github.com/hpcugent/vsc_user_docs/tree/main/{{exampleloc}})). - -{%- if OS == windows %} - -The [HPC-UGent web portal](https://login.hpc.ugent.be) provides a file browser that allows uploading files. -For more information see the [file browser section](web_portal.md#file-browser). - -Upload both files (`run.sh` and `tensorflow-mnist.py`) to your **home directory** and go back to your shell. - -!!! Info - - As an alternative, you can use WinSCP (see [our section](connecting.md#winscp)) - -{%- else %} - -On your local machine you can run: -
curl -OL https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/{{exampleloc}}/tensorflow_mnist.py
-curl -OL https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/{{exampleloc}}/run.sh
-
- -Using the `scp` command, the files can be copied from your local host to your *home directory* (`~`) on the remote host (HPC). -
scp tensorflow_mnist.py run.sh {{userid}}{{ loginnode }}:~ 
-
ssh  {{userid}}@{{ loginnode }} 
- -!!! Warning "User your own VSC account id" - - Replace {{userid}} with your VSC account id (see ) - -!!! Info - - For more information about transfering files or `scp`, see [tranfer files from/to hpc](connecting.md#transfer-files-tofrom-the-hpc). - -{%- endif %} - -When running `ls` in your session on the {{hpcinfra}}, you should see the two files listed in your home directory (`~`): - -```shell -$ ls ~ -run.sh tensorflow_mnist.py -``` - -When you do not see these files, make sure you uploaded the files to your **home directory**. - -### Submitting a job - -Jobs are submitted and executed using job scripts. In our case **run.sh** can be used as a (very minimal) job script. - -A job script is a shell script, a text file that specifies the resources, -the software that is used (via `module load` statements), -and the steps that should be executed to run the calculation. - -Our job script looks like this: - -
-- run.sh --
- -```bash -#!/bin/bash - -module load TensorFlow/2.11.0-foss-2022a - -python tensorflow_mnist.py - -``` -As you can see this job script will run the Python script named **tensorflow_mnist.py**. - - -The jobs you submit are per default executed on **cluser/{{defaultcluster}}**, you can swap to another cluster by issuing the following command. - -```shell -module swap cluster/{{othercluster}} -``` - -!!! Tip - - When submitting jobs with limited amount of resources, it is recommended to use the [debug/interactive cluster](interactive_debug.md#interactive-and-debug-cluster): `donphan`. - -{%- if site == 'Gent' %} - - To get a list of all clusters and their hardware, see . - -{%- endif %} - -This job script can now be submitted to the cluster's job system for execution, using the qsub (**q**ueue **sub**mit) command: - -```shell -$ qsub run.sh -{{jobid}} -``` - -This command returns a job identifier (*{{jobid}}*) on the HPC cluster. This is a unique identifier for the job which can be used to monitor and manage your job. - -!!! Warning "Make sure you understand what the `module` command does" - - Note that the module commands only modify environment variables. For instance, running `module swap cluster/{{othercluster}}` will update your shell environment so that `qsub` submits a job to the `{{othercluster}}` cluster, - but our active shell session is still running on the login node. - - It is important to understand that while `module` commands affect your session environment, they do ***not*** change where the commands your are running are being executed: they will still be run on the login node you are on. - - When you submit a job script however, the commands ***in*** the job script will be run on a workernode of the cluster the job was submitted to (like `{{othercluster}}`). - -For detailed information about `module` commands, read the [running batch jobs](running_batch_jobs.md) chapter. - -### Wait for job to be executed - -Your job is put into a queue before being executed, so it may take a while before it actually starts. -(see [when will my job start?](running_batch_jobs.md#when-will-my-job-start) for scheduling policy). - -You can get an overview of the active jobs using the `qstat` command: -
$ qstat
-Job ID     Name             User            Time Use S Queue
----------- ---------------- --------------- -------- - -------
-{{jobid}}     run.sh           {{userid}}        0:00:00  Q {{othercluster}}
-
- -Eventually, after entering `qstat` again you should see that your job has started running: -
$ qstat
-Job ID     Name             User            Time Use S Queue
----------- ---------------- --------------- -------- - -------
-{{jobid}}     run.sh           {{userid}}        0:00:01  R {{othercluster}}
-
- -If you don't see your job in the output of the `qstat` command anymore, your job has likely completed. - -Read [this section](running_batch_jobs.md#monitoring-and-managing-your-jobs) on how to interpret the output. - -### Inspect your results - -When your job finishes it generates 2 output files: - -- One for normal output messages (*stdout* output channel). -- One for warning and error messages (*stderr* output channel). - -By default located in the directory where you issued `qsub`. - -{%- if site == 'Gent' %} - -!!! Info - - For more information about the stdout and stderr output channels, see this [section](linux-tutorial/beyond_the_basics.md#inputoutput). - -{%- endif %} - -In our example when running ls in the current directory you should see 2 new files: - -- **run.sh.o{{jobid}}**, containing *normal output messages* produced by job {{jobid}}; -- **run.sh.e{{jobid}}**, containing *errors and warnings* produced by job {{jobid}}. - -!!! Info - - run.sh.e{{jobid}} should be empty (no errors or warnings). - -!!! Warning "Use your own job ID" - - Replace {{jobid}} with the jobid you got from the `qstat` command (see above) or simply look for added files in your current directory by running `ls`. - -When examining the contents of ``run.sh.o{{jobid}}`` you will see something like this: -``` -Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz -11493376/11490434 [==============================] - 1s 0us/step -Epoch 1/5 -1875/1875 [==============================] - 2s 823us/step - loss: 0.2960 - accuracy: 0.9133 -Epoch 2/5 -1875/1875 [==============================] - 1s 771us/step - loss: 0.1427 - accuracy: 0.9571 -Epoch 3/5 -1875/1875 [==============================] - 1s 767us/step - loss: 0.1070 - accuracy: 0.9675 -Epoch 4/5 -1875/1875 [==============================] - 1s 764us/step - loss: 0.0881 - accuracy: 0.9727 -Epoch 5/5 -1875/1875 [==============================] - 1s 764us/step - loss: 0.0741 - accuracy: 0.9768 -313/313 - 0s - loss: 0.0782 - accuracy: 0.9764 -``` - -Hurray šŸŽ‰, we trained a deep learning model and achieved 97,64 percent accuracy. - -!!! Warning - - When using TensorFlow specifically, you should actually submit jobs to a GPU cluster for better performance, see [GPU clusters](gpu.md). - - For the purpose of this example, we are running a very small TensorFlow workload on a CPU-only cluster. - -### Next steps - -- [Running interactive jobs](running_interactive_jobs.md) -- [Running jobs with input/output data](running_jobs_with_input_output_data.md) -- [Multi core jobs/Parallel Computing](multi_core_jobs.md) -- [Interactive and debug cluster](interactive_debug.md#interactive-and-debug-cluster) - -For more examples see [Program examples](program_examples.md) and [Job script examples](jobscript_examples.md) diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt deleted file mode 100644 index f95191b96f01..000000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt +++ /dev/null @@ -1,25 +0,0 @@ - -To get access to the HPC-UGent infrastructure, visit Getting an HPC Account[1]. - -If you have not used Linux before, -now would be a good time to follow our Linux Tutorial[2]. - -#### A typical workflow looks like this: - -1. Connect to the login nodes -2. Transfer your files to the HPC-UGent infrastructure -3. Optional: compile your code and test it -4. Create a job script and submit your job -5. Wait for job to be executed -6. Study the results generated by your jobs, either on the cluster or - after downloading them locally. - -We will walk through an illustrative workload to get you started. In this example, our objective is to train a deep learning model for recognizing hand-written digits (MNIST dataset) using TensorFlow[3]; -see the example scripts[4]. - -[1]: account.md -[2]: linux-tutorial/index.md -[3]: https://www.tensorflow.org/ -[4]: https://github.com/hpcugent/vsc_user_docs/tree/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist - -reference: docs.hpc.ugent.be/getting_started_copy/#getting-access diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt deleted file mode 100644 index 94f17ac50709..000000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt +++ /dev/null @@ -1,19 +0,0 @@ - -There are two options to connect - -- Using a terminal to connect via SSH (for power users) (see First Time connection to the HPC-UGent infrastructure[1]) -- Using the web portal[2] - -Considering your operating system is **{{OS}}**, - - -!!! Info - - When having problems see the connection issues section on the troubleshooting page[3]. - - -[1]: connecting.md#first-time-connection-to-the-hpc-infrastructure -[2]: web_portal.md -[3]: troubleshooting.md#sec:connecting-issues - -reference: docs.hpc.ugent.be/getting_started_copy/#getting-connected diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Started.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Started.txt deleted file mode 100644 index 3403b57f2c21..000000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Getting-Started.txt +++ /dev/null @@ -1,11 +0,0 @@ - -Welcome to the "Getting Started" guide. This chapter will lead you through the initial steps of logging into the HPC-UGent infrastructure and submitting your very first job. We'll also walk you through the process step by step using a practical example. - -In addition to this chapter, you might find the recording of the *Introduction to HPC-UGent* training session[1] to be a useful resource. - -Before proceeding, read the introduction to HPC[2] to gain an understanding of the HPC-UGent infrastructure and related terminology. - -[1]: https://www.ugent.be/hpc/en/training/introhpcugent-recording -[2]: introduction.md - -reference: docs.hpc.ugent.be/getting_started_copy/#getting-started diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt deleted file mode 100644 index 417416007f5c..000000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt +++ /dev/null @@ -1,56 +0,0 @@ - -When your job finishes it generates 2 output files: - -- One for normal output messages (*stdout* output channel). -- One for warning and error messages (*stderr* output channel). - -By default located in the directory where you issued `qsub`. - - -!!! Info - - For more information about the stdout and stderr output channels, see this section[1]. - - -In our example when running ls in the current directory you should see 2 new files: - -- **run.sh.o{{jobid}}**, containing *normal output messages* produced by job {{jobid}}; -- **run.sh.e{{jobid}}**, containing *errors and warnings* produced by job {{jobid}}. - -!!! Info - - run.sh.e{{jobid}} should be empty (no errors or warnings). - -!!! Warning "Use your own job ID" - - Replace {{jobid}} with the jobid you got from the `qstat` command (see above) or simply look for added files in your current directory by running `ls`. - -When examining the contents of ``run.sh.o{{jobid}}`` you will see something like this: -``` -Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz -11493376/11490434 [==============================] - 1s 0us/step -Epoch 1/5 -1875/1875 [==============================] - 2s 823us/step - loss: 0.2960 - accuracy: 0.9133 -Epoch 2/5 -1875/1875 [==============================] - 1s 771us/step - loss: 0.1427 - accuracy: 0.9571 -Epoch 3/5 -1875/1875 [==============================] - 1s 767us/step - loss: 0.1070 - accuracy: 0.9675 -Epoch 4/5 -1875/1875 [==============================] - 1s 764us/step - loss: 0.0881 - accuracy: 0.9727 -Epoch 5/5 -1875/1875 [==============================] - 1s 764us/step - loss: 0.0741 - accuracy: 0.9768 -313/313 - 0s - loss: 0.0782 - accuracy: 0.9764 -``` - -Hurray šŸŽ‰, we trained a deep learning model and achieved 97,64 percent accuracy. - -!!! Warning - - When using TensorFlow specifically, you should actually submit jobs to a GPU cluster for better performance, see GPU clusters[2]. - - For the purpose of this example, we are running a very small TensorFlow workload on a CPU-only cluster. - -[1]: linux-tutorial/beyond_the_basics.md#inputoutput -[2]: gpu.md - -reference: docs.hpc.ugent.be/getting_started_copy/#inspect-your-results diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt deleted file mode 100644 index 804b56b8251b..000000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt +++ /dev/null @@ -1,15 +0,0 @@ - -- Running interactive jobs[1] -- Running jobs with input/output data[2] -- Multi core jobs/Parallel Computing[3] -- Interactive and debug cluster[4] - -For more examples see Program examples[5] and Job script examples[6] -[1]: running_interactive_jobs.md -[2]: running_jobs_with_input_output_data.md -[3]: multi_core_jobs.md -[4]: interactive_debug.md#interactive-and-debug-cluster -[5]: program_examples.md -[6]: jobscript_examples.md - -reference: docs.hpc.ugent.be/getting_started_copy/#next-steps diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt deleted file mode 100644 index edb336fa06ba..000000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt +++ /dev/null @@ -1,60 +0,0 @@ - -Jobs are submitted and executed using job scripts. In our case **run.sh** can be used as a (very minimal) job script. - -A job script is a shell script, a text file that specifies the resources, -the software that is used (via `module load` statements), -and the steps that should be executed to run the calculation. - -Our job script looks like this: - -
-- run.sh --
- -```bash -#!/bin/bash - -module load TensorFlow/2.11.0-foss-2022a - -python tensorflow_mnist.py - -``` -As you can see this job script will run the Python script named **tensorflow_mnist.py**. - - -The jobs you submit are per default executed on **cluser/{{defaultcluster}}**, you can swap to another cluster by issuing the following command. - -```shell -module swap cluster/{{othercluster}} -``` - -!!! Tip - - When submitting jobs with limited amount of resources, it is recommended to use the debug/interactive cluster[1]: `donphan`. - - - To get a list of all clusters and their hardware, see . - - -This job script can now be submitted to the cluster's job system for execution, using the qsub (**q**ueue **sub**mit) command: - -```shell -$ qsub run.sh -{{jobid}} -``` - -This command returns a job identifier (*{{jobid}}*) on the HPC cluster. This is a unique identifier for the job which can be used to monitor and manage your job. - -!!! Warning "Make sure you understand what the `module` command does" - - Note that the module commands only modify environment variables. For instance, running `module swap cluster/{{othercluster}}` will update your shell environment so that `qsub` submits a job to the `{{othercluster}}` cluster, - but our active shell session is still running on the login node. - - It is important to understand that while `module` commands affect your session environment, they do ***not*** change where the commands your are running are being executed: they will still be run on the login node you are on. - - When you submit a job script however, the commands ***in*** the job script will be run on a workernode of the cluster the job was submitted to (like `{{othercluster}}`). - -For detailed information about `module` commands, read the running batch jobs[2] chapter. - -[1]: interactive_debug.md#interactive-and-debug-cluster -[2]: running_batch_jobs.md - -reference: docs.hpc.ugent.be/getting_started_copy/#submitting-a-job diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt deleted file mode 100644 index 94dc30f67121..000000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt +++ /dev/null @@ -1,21 +0,0 @@ - -Now that you can login, it is time to transfer files from your local computer to your **home directory** on the HPC-UGent infrastructure. - -Download tensorflow_mnist.py[1] -and run.sh[2] example scripts to your computer (from here[3]). - - -When running `ls` in your session on the HPC-UGent infrastructure, you should see the two files listed in your home directory (`~`): - -```shell -$ ls ~ -run.sh tensorflow_mnist.py -``` - -When you do not see these files, make sure you uploaded the files to your **home directory**. - -[1]: https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist/tensorflow_mnist.py -[2]: https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist/run.sh -[3]: https://github.com/hpcugent/vsc_user_docs/tree/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist - -reference: docs.hpc.ugent.be/getting_started_copy/#transfer-your-files diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt deleted file mode 100644 index de177946cf93..000000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/generic/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt +++ /dev/null @@ -1,26 +0,0 @@ - -Your job is put into a queue before being executed, so it may take a while before it actually starts. -(see when will my job start?[1] for scheduling policy). - -You can get an overview of the active jobs using the `qstat` command: -
$ qstat
-Job ID     Name             User            Time Use S Queue
----------- ---------------- --------------- -------- - -------
-{{jobid}}     run.sh           {{userid}}        0:00:00  Q {{othercluster}}
-
- -Eventually, after entering `qstat` again you should see that your job has started running: -
$ qstat
-Job ID     Name             User            Time Use S Queue
----------- ---------------- --------------- -------- - -------
-{{jobid}}     run.sh           {{userid}}        0:00:01  R {{othercluster}}
-
- -If you don't see your job in the output of the `qstat` command anymore, your job has likely completed. - -Read this section[2] on how to interpret the output. - -[1]: running_batch_jobs.md#when-will-my-job-start -[2]: running_batch_jobs.md#monitoring-and-managing-your-jobs - -reference: docs.hpc.ugent.be/getting_started_copy/#wait-for-job-to-be-executed diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt deleted file mode 100644 index e756b9a3cbea..000000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt +++ /dev/null @@ -1,2 +0,0 @@ - -reference: docs.hpc.ugent.be/Linux/getting_started_copy/#getting-access diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt deleted file mode 100644 index bac5dfcbfbec..000000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt +++ /dev/null @@ -1,18 +0,0 @@ -it is recommended to make use of the `ssh` command in a terminal to get the most flexibility. - -Assuming you have already generated SSH keys in the previous step (Getting Access[1]), and that they are in a default location, you should now be able to login by running the following command: - -
ssh {{userid}}@{{loginnode}}
- -!!! Warning "User your own VSC account id" - - Replace {{userid}} with your VSC account id (see ) - -!!! Tip - - You can also still use the web portal (see shell access on web portal[2]) - -[1]: #getting-access -[2]: web_portal.md#shell-access - -reference: docs.hpc.ugent.be/Linux/getting_started_copy/#getting-connected diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Started.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Started.txt deleted file mode 100644 index f0b9d83bed36..000000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Getting-Started.txt +++ /dev/null @@ -1,2 +0,0 @@ - -reference: docs.hpc.ugent.be/Linux/getting_started_copy/#getting-started diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt deleted file mode 100644 index 441b54c70424..000000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt +++ /dev/null @@ -1,2 +0,0 @@ - -reference: docs.hpc.ugent.be/Linux/getting_started_copy/#inspect-your-results diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt deleted file mode 100644 index d72ffccf01ad..000000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt +++ /dev/null @@ -1,2 +0,0 @@ - -reference: docs.hpc.ugent.be/Linux/getting_started_copy/#next-steps diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt deleted file mode 100644 index 744c2c3db7a7..000000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt +++ /dev/null @@ -1,2 +0,0 @@ - -reference: docs.hpc.ugent.be/Linux/getting_started_copy/#submitting-a-job diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt deleted file mode 100644 index aca6e05d28ce..000000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt +++ /dev/null @@ -1,21 +0,0 @@ - -On your local machine you can run: -
curl -OL https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist/tensorflow_mnist.py
-curl -OL https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist/run.sh
-
- -Using the `scp` command, the files can be copied from your local host to your *home directory* (`~`) on the remote host (HPC). -
scp tensorflow_mnist.py run.sh {{userid}}{{ loginnode }}:~ 
-
ssh  {{userid}}@{{ loginnode }} 
- -!!! Warning "User your own VSC account id" - - Replace {{userid}} with your VSC account id (see ) - -!!! Info - - For more information about transfering files or `scp`, see tranfer files from/to hpc[1]. - -[1]: connecting.md#transfer-files-tofrom-the-hpc - -reference: docs.hpc.ugent.be/Linux/getting_started_copy/#transfer-your-files diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt deleted file mode 100644 index 93e6fdff1713..000000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/linux/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt +++ /dev/null @@ -1,2 +0,0 @@ - -reference: docs.hpc.ugent.be/Linux/getting_started_copy/#wait-for-job-to-be-executed diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt deleted file mode 100644 index 8732e5869811..000000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt +++ /dev/null @@ -1,2 +0,0 @@ - -reference: docs.hpc.ugent.be/macOS/getting_started_copy/#getting-access diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt deleted file mode 100644 index 2b1de2be8385..000000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt +++ /dev/null @@ -1,13 +0,0 @@ -it should be easy to make use of the `ssh` command in a terminal, but the web portal will work too. -The web portal[1] offers a convenient way to upload files and gain shell access to the HPC-UGent infrastructure from a standard web browser (no software installation or configuration required). - -See shell access[2] when using the web portal, or -connection to the HPC-UGent infrastructure[3] when using a terminal. - -Make sure you can get to a shell access to the HPC-UGent infrastructure before proceeding with the next steps. - -[1]: web_portal.md -[2]: web_portal.md#shell-access -[3]: connecting.md#first-time-connection-to-the-hpc-infrastructure - -reference: docs.hpc.ugent.be/macOS/getting_started_copy/#getting-connected diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Started.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Started.txt deleted file mode 100644 index 4e60f862a0a1..000000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Getting-Started.txt +++ /dev/null @@ -1,2 +0,0 @@ - -reference: docs.hpc.ugent.be/macOS/getting_started_copy/#getting-started diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt deleted file mode 100644 index f7ae9f96226f..000000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt +++ /dev/null @@ -1,2 +0,0 @@ - -reference: docs.hpc.ugent.be/macOS/getting_started_copy/#inspect-your-results diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt deleted file mode 100644 index 71f384bcf17c..000000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt +++ /dev/null @@ -1,2 +0,0 @@ - -reference: docs.hpc.ugent.be/macOS/getting_started_copy/#next-steps diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt deleted file mode 100644 index d72ba48195a5..000000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt +++ /dev/null @@ -1,2 +0,0 @@ - -reference: docs.hpc.ugent.be/macOS/getting_started_copy/#submitting-a-job diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt deleted file mode 100644 index fce05042ab2c..000000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt +++ /dev/null @@ -1,21 +0,0 @@ - -On your local machine you can run: -
curl -OL https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist/tensorflow_mnist.py
-curl -OL https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist/run.sh
-
- -Using the `scp` command, the files can be copied from your local host to your *home directory* (`~`) on the remote host (HPC). -
scp tensorflow_mnist.py run.sh {{userid}}{{ loginnode }}:~ 
-
ssh  {{userid}}@{{ loginnode }} 
- -!!! Warning "User your own VSC account id" - - Replace {{userid}} with your VSC account id (see ) - -!!! Info - - For more information about transfering files or `scp`, see tranfer files from/to hpc[1]. - -[1]: connecting.md#transfer-files-tofrom-the-hpc - -reference: docs.hpc.ugent.be/macOS/getting_started_copy/#transfer-your-files diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt deleted file mode 100644 index 2ef8770504b5..000000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/macos/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt +++ /dev/null @@ -1,2 +0,0 @@ - -reference: docs.hpc.ugent.be/macOS/getting_started_copy/#wait-for-job-to-be-executed diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt deleted file mode 100644 index 874af3657046..000000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Access/Getting-Access.txt +++ /dev/null @@ -1,2 +0,0 @@ - -reference: docs.hpc.ugent.be/Windows/getting_started_copy/#getting-access diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt deleted file mode 100644 index ce0b873b2b0e..000000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Connected/Getting-Connected.txt +++ /dev/null @@ -1,13 +0,0 @@ -it is recommended to use the web portal. -The web portal[1] offers a convenient way to upload files and gain shell access to the HPC-UGent infrastructure from a standard web browser (no software installation or configuration required). - -See shell access[2] when using the web portal, or -connection to the HPC-UGent infrastructure[3] when using a terminal. - -Make sure you can get to a shell access to the HPC-UGent infrastructure before proceeding with the next steps. - -[1]: web_portal.md -[2]: web_portal.md#shell-access -[3]: connecting.md#first-time-connection-to-the-hpc-infrastructure - -reference: docs.hpc.ugent.be/Windows/getting_started_copy/#getting-connected diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Started.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Started.txt deleted file mode 100644 index 44d1f17b73be..000000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Getting-Started.txt +++ /dev/null @@ -1,2 +0,0 @@ - -reference: docs.hpc.ugent.be/Windows/getting_started_copy/#getting-started diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt deleted file mode 100644 index 730fbbc3b740..000000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Inspect-your-results/Inspect-your-results.txt +++ /dev/null @@ -1,2 +0,0 @@ - -reference: docs.hpc.ugent.be/Windows/getting_started_copy/#inspect-your-results diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt deleted file mode 100644 index 55df915125a7..000000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Next-steps/Next-steps.txt +++ /dev/null @@ -1,2 +0,0 @@ - -reference: docs.hpc.ugent.be/Windows/getting_started_copy/#next-steps diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt deleted file mode 100644 index f67d48ece4a1..000000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Submitting-a-job/Submitting-a-job.txt +++ /dev/null @@ -1,2 +0,0 @@ - -reference: docs.hpc.ugent.be/Windows/getting_started_copy/#submitting-a-job diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt deleted file mode 100644 index dce86fc7cf3e..000000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Transfer-your-files/Transfer-your-files.txt +++ /dev/null @@ -1,15 +0,0 @@ - -The HPC-UGent web portal[1] provides a file browser that allows uploading files. -For more information see the file browser section[2]. - -Upload both files (`run.sh` and `tensorflow-mnist.py`) to your **home directory** and go back to your shell. - -!!! Info - - As an alternative, you can use WinSCP (see our section[3]) - -[1]: https://login.hpc.ugent.be -[2]: web_portal.md#file-browser -[3]: connecting.md#winscp - -reference: docs.hpc.ugent.be/Windows/getting_started_copy/#transfer-your-files diff --git a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt b/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt deleted file mode 100644 index bdd7387e3790..000000000000 --- a/scripts/HPC chatbot preprocessor/copies/parsed_mds/os_specific/windows/getting_started_copy/Getting-Started/Wait-for-job-to-be-executed/Wait-for-job-to-be-executed.txt +++ /dev/null @@ -1,2 +0,0 @@ - -reference: docs.hpc.ugent.be/Windows/getting_started_copy/#wait-for-job-to-be-executed diff --git a/scripts/HPC chatbot preprocessor/if_mangler.py b/scripts/HPC chatbot preprocessor/if_mangler.py deleted file mode 100644 index 46b121610c95..000000000000 --- a/scripts/HPC chatbot preprocessor/if_mangler.py +++ /dev/null @@ -1,72 +0,0 @@ -import re - -# global variable to keep track of latest if-statement scope -is_os = 0 # Can be 0, 1, 2 or 3 {0: not in an os-if; 1: in a non-os-if nested in an os-if; 2: in an os-if; 3: in an os-if nested in an os-if} - - -def mangle_os_ifs(line): - global is_os - - match = re.search(r'\{%(.*?)%}(.*)', line) - - start_index = 0 - added_length = 0 - - while match: - - constr_match = re.search(r'\{%.*?%}', match.string) - if_match = re.search(r'if ', match.group(1)) - if_os_match = re.search(r'if OS == ', match.group(1)) - endif_match = re.search(r'endif', match.group(1)) - - if endif_match: - if is_os == 2: - line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[ - constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[ - constr_match.end() + start_index + added_length - 1:] - added_length += 8 - is_os = 0 - if is_os == 3: - line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[ - constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[ - constr_match.end() + start_index + added_length - 1:] - added_length += 8 - is_os = 2 - elif is_os == 1: - is_os = 2 - elif if_match: - if if_os_match: - if is_os == 2: - line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[ - constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[ - constr_match.end() + start_index + added_length - 1:] - added_length += 8 - is_os = 3 - else: - line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[ - constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[ - constr_match.end() + start_index + added_length - 1:] - added_length += 8 - is_os = 2 - else: - if is_os == 2: - is_os = 1 - else: - is_os = 0 - else: - if is_os == 2 or is_os == 3: - line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[ - constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[ - constr_match.end() + start_index + added_length - 1:] - added_length += 8 - start_index += constr_match.end() - match = re.search(r'\{%(.*?)%}(.*)', match.group(2)) - return line - - -def mangle_ifs(directory, file): - with open(".\\if_mangled_files\\" + file, 'w') as write_file: - with open(directory, 'r') as read_file: - for line in read_file: - new_line = mangle_os_ifs(line) - write_file.write(new_line) diff --git a/scripts/HPC chatbot preprocessor/jinja_parser.py b/scripts/HPC chatbot preprocessor/jinja_parser.py deleted file mode 100644 index 603a453ecf7d..000000000000 --- a/scripts/HPC chatbot preprocessor/jinja_parser.py +++ /dev/null @@ -1,31 +0,0 @@ -import yaml -from jinja2 import Template, FileSystemLoader, Environment, ChoiceLoader -from if_mangler import mangle_ifs - - -# function that let's jinja do its thing to format the files expect for the os-related if-statements -def jinja_parser(filename, copy_location): - # Read the YAML file - with open('..\\..\\mkdocs\\extra\\gent.yml', 'r') as yml_file: - words_dict = yaml.safe_load(yml_file) - - # ugly fix for index.md error - additional_context = { - 'config': { - 'repo_url': 'https://github.com/hpcugent/vsc_user_docs' - } - } - combined_context = {**words_dict, **additional_context} - - # Mangle the OS-related if-statements - mangle_ifs(copy_location, filename) - - # Use Jinja2 to replace the macros - template_loader = ChoiceLoader([FileSystemLoader(searchpath='.\\if_mangled_files'), FileSystemLoader(searchpath="..\\..\\mkdocs\\docs\\HPC")]) - templateEnv = Environment(loader=template_loader) - template = templateEnv.get_template(filename) - rendered_content = template.render(combined_context) - - # Save the rendered content to a new file - with open(copy_location, 'w', encoding='utf-8', errors='ignore') as output_file: - output_file.write(rendered_content) From b595301e5bd4b0c19a1beea04affeedb31e00a8c Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 13 Aug 2024 13:12:37 +0200 Subject: [PATCH 018/145] Rename directory --- .../chatbot_parser.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename scripts/{HPC chatbot preprocessor => HPC_chatbot_preprocessor}/chatbot_parser.py (100%) diff --git a/scripts/HPC chatbot preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py similarity index 100% rename from scripts/HPC chatbot preprocessor/chatbot_parser.py rename to scripts/HPC_chatbot_preprocessor/chatbot_parser.py From 90c8ab760b7ff96d1536d5d85e29a36ea8bf90b5 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 13 Aug 2024 13:26:20 +0200 Subject: [PATCH 019/145] add a main function --- .../chatbot_parser.py | 256 +++++++++--------- 1 file changed, 130 insertions(+), 126 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index b6e42e99ea04..79951a5d0da8 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -30,7 +30,7 @@ problem_files = ["getting_started.md", "navigating.md"] # global variable to keep track of latest if-statement scope -is_os = 0 # Can be 0, 1, 2 or 3 {0: not in an os-if; 1: in a non-os-if nested in an os-if; 2: in an os-if; 3: in an os-if nested in an os-if} +is_os = 0 # Can be 0, 1, 2 or 3 {0: not in an os-if; 1: in a non-os-if nested in an os-if; 2: in an os-if; 3: in an os-if nested in an os-if} ################### define functions ################### @@ -356,138 +356,142 @@ def make_valid_title(s): return valid_filename +def main(): + global main_title, active_OS_if_states, last_directory, root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos, is_linux_tutorial, in_code_block, last_title, curr_dirs, links_generic, links_linux, links_windows, links_macos + # remove the directories from a previous run of the parser if they weren't cleaned up properly for some reason + remove_directory_tree(".\\parsed_mds") + remove_directory_tree(".\\copies") + remove_directory_tree(".\\if_mangled_files") -# remove the directories from a previous run of the parser -remove_directory_tree(".\\parsed_mds") -remove_directory_tree(".\\copies") -remove_directory_tree(".\\if_mangled_files") + # make the necessary directories + if not os.path.exists(".\\copies"): + os.mkdir(".\\copies") -# make the necessary directories -if not os.path.exists(".\\copies"): - os.mkdir(".\\copies") + if not os.path.exists(".\\copies\\linux"): + os.mkdir(".\\copies\\linux") -if not os.path.exists(".\\copies\\linux"): - os.mkdir(".\\copies\\linux") + if not os.path.exists(".\\parsed_mds"): + os.mkdir(".\\parsed_mds") -if not os.path.exists(".\\parsed_mds"): - os.mkdir(".\\parsed_mds") + if not os.path.exists(".\\if_mangled_files"): + os.mkdir(".\\if_mangled_files") -if not os.path.exists(".\\if_mangled_files"): - os.mkdir(".\\if_mangled_files") + for filenames in [filenames_generic, filenames_linux]: + for filename in filenames.keys(): + ################### define/reset loop specific variables ################### -for filenames in [filenames_generic, filenames_linux]: - for filename in filenames.keys(): - ################### define/reset loop specific variables ################### + # variable that keeps track of whether file is part of the linux tutorial + is_linux_tutorial = bool("linux-tutorial" in filenames[filename]) - # variable that keeps track of whether file is part of the linux tutorial - is_linux_tutorial = bool("linux-tutorial" in filenames[filename]) - - # make a copy of the original file in order to make sure the original does not get altered - if is_linux_tutorial: - copy_file = ".\\copies\\linux\\" + filename - else: - copy_file = ".\\copies\\" + filename - shutil.copyfile(filenames[filename], copy_file) - - # variable that keeps track of the directories that are used to write in at different levels - if is_linux_tutorial: - root_dir_generic = ".\\parsed_mds\\generic\\linux_tutorial\\" - root_dir_os_specific_linux = ".\\parsed_mds\\os_specific\\linux\\linux_tutorial\\" - root_dir_os_specific_windows = ".\\parsed_mds\\os_specific\\windows\\linux_tutorial\\" - root_dir_os_specific_macos = ".\\parsed_mds\\os_specific\\macos\\linux_tutorial\\" - else: - root_dir_generic = ".\\parsed_mds\\generic\\" - root_dir_os_specific_linux = ".\\parsed_mds\\os_specific\\linux\\" - root_dir_os_specific_windows = ".\\parsed_mds\\os_specific\\windows\\" - root_dir_os_specific_macos = ".\\parsed_mds\\os_specific\\macos\\" - - # variable for the main title (needed for reference links) - main_title = filename[:-3] - - # variable that keeps track of the directories that are used to write in at different levels - curr_dirs = [filename[:-3] for i in range(5)] - - # variable to keep track whether we're dealing with OS-specific info or not - OS_specific = False - - # variable that keeps track of the latest non-zero level title and corresponding directory - last_title_level = 1 - last_title = None - last_directory = None - last_was_title = False - - # list to keep track of links in the text - links_generic = [] - links_linux = [] - links_windows = [] - links_macos = [] - - # dictionaries to keep track of current OS - active_OS_if_states = {"linux": "inactive", "windows": "inactive", "macos": "inactive"} - - # variable that shows whether the first title has been reached yet - after_first_title = False - - # variable that is used to be sure that we are detecting titles and not comments from codeblocks - in_code_block = False - - ################### actually parse the md file ################### - - # create directories for the source markdown file - create_directory(root_dir_generic) - create_directory(".\\parsed_mds\\os_specific") - create_directory(root_dir_os_specific_linux) - create_directory(root_dir_os_specific_windows) - create_directory(root_dir_os_specific_macos) - create_directory(root_dir_generic + curr_dirs[0]) - create_directory(root_dir_os_specific_linux + curr_dirs[0]) - create_directory(root_dir_os_specific_windows + curr_dirs[0]) - create_directory(root_dir_os_specific_macos + curr_dirs[0]) - - # process the jinja macros - jinja_parser(filename, copy_file) - - # convert the files without proper markdown layout into markdown using pandoc - if "linux-tutorial" in filenames[filename] and filename in problem_files: - pypandoc.convert_file(copy_file, 'markdown', outputfile=copy_file) - - # open the file and store line by line in the right file - with open(copy_file, 'r') as readfile: - - for line in readfile: - title_level, title, directory = check_for_title(line) - - detect_in_code_block(line) - - # line is a title with a maximum depth of 4 - if title_level > 0: - last_title_level = title_level - last_title = title - last_directory = directory - after_first_title = True - - # line is not a title - elif after_first_title: - # check for if-statements and write the appropriate lines in the right files - next_action = check_if_statements(line) - while next_action[0] == "write_text_and_check_extra_message" or next_action[0] == "check_extra_message": - if next_action[0] == "write_text_and_check_extra_message": + # make a copy of the original file in order to make sure the original does not get altered + if is_linux_tutorial: + copy_file = ".\\copies\\linux\\" + filename + else: + copy_file = ".\\copies\\" + filename + shutil.copyfile(filenames[filename], copy_file) + + # variable that keeps track of the directories that are used to write in at different levels + if is_linux_tutorial: + root_dir_generic = ".\\parsed_mds\\generic\\linux_tutorial\\" + root_dir_os_specific_linux = ".\\parsed_mds\\os_specific\\linux\\linux_tutorial\\" + root_dir_os_specific_windows = ".\\parsed_mds\\os_specific\\windows\\linux_tutorial\\" + root_dir_os_specific_macos = ".\\parsed_mds\\os_specific\\macos\\linux_tutorial\\" + else: + root_dir_generic = ".\\parsed_mds\\generic\\" + root_dir_os_specific_linux = ".\\parsed_mds\\os_specific\\linux\\" + root_dir_os_specific_windows = ".\\parsed_mds\\os_specific\\windows\\" + root_dir_os_specific_macos = ".\\parsed_mds\\os_specific\\macos\\" + + # variable for the main title (needed for reference links) + main_title = filename[:-3] + + # variable that keeps track of the directories that are used to write in at different levels + curr_dirs = [filename[:-3] for i in range(5)] + + # variable to keep track whether we're dealing with OS-specific info or not + OS_specific = False + + # variable that keeps track of the latest non-zero level title and corresponding directory + last_title_level = 1 + last_title = None + last_directory = None + last_was_title = False + + # list to keep track of links in the text + links_generic = [] + links_linux = [] + links_windows = [] + links_macos = [] + + # dictionaries to keep track of current OS + active_OS_if_states = {"linux": "inactive", "windows": "inactive", "macos": "inactive"} + + # variable that shows whether the first title has been reached yet + after_first_title = False + + # variable that is used to be sure that we are detecting titles and not comments from codeblocks + in_code_block = False + + ################### actually parse the md file ################### + + # create directories for the source markdown file + create_directory(root_dir_generic) + create_directory(".\\parsed_mds\\os_specific") + create_directory(root_dir_os_specific_linux) + create_directory(root_dir_os_specific_windows) + create_directory(root_dir_os_specific_macos) + create_directory(root_dir_generic + curr_dirs[0]) + create_directory(root_dir_os_specific_linux + curr_dirs[0]) + create_directory(root_dir_os_specific_windows + curr_dirs[0]) + create_directory(root_dir_os_specific_macos + curr_dirs[0]) + + # process the jinja macros + jinja_parser(filename, copy_file) + + # convert the files without proper markdown layout into markdown using pandoc + if "linux-tutorial" in filenames[filename] and filename in problem_files: + pypandoc.convert_file(copy_file, 'markdown', outputfile=copy_file) + + # open the file and store line by line in the right file + with open(copy_file, 'r') as readfile: + + for line in readfile: + title_level, title, directory = check_for_title(line) + + detect_in_code_block(line) + + # line is a title with a maximum depth of 4 + if title_level > 0: + last_title_level = title_level + last_title = title + last_directory = directory + after_first_title = True + + # line is not a title + elif after_first_title: + # check for if-statements and write the appropriate lines in the right files + next_action = check_if_statements(line) + while next_action[0] == "write_text_and_check_extra_message" or next_action[0] == "check_extra_message": + if next_action[0] == "write_text_and_check_extra_message": + choose_and_write_to_file(next_action[2]) + next_action = check_if_statements(next_action[1]) + + if next_action[0] == "write_text": choose_and_write_to_file(next_action[2]) - next_action = check_if_statements(next_action[1]) - - if next_action[0] == "write_text": - choose_and_write_to_file(next_action[2]) - - # write end of file for the last file - write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic, is_linux_tutorial) - write_end_of_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", "Linux", - links_linux, is_linux_tutorial) - write_end_of_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", "Windows", - links_windows, is_linux_tutorial) - write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS", - links_macos, is_linux_tutorial) - -remove_directory_tree(".\\copies") -remove_directory_tree(".\\if_mangled_files") + + # write end of file for the last file + write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic, is_linux_tutorial) + write_end_of_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", "Linux", + links_linux, is_linux_tutorial) + write_end_of_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", "Windows", + links_windows, is_linux_tutorial) + write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS", + links_macos, is_linux_tutorial) + + remove_directory_tree(".\\copies") + remove_directory_tree(".\\if_mangled_files") + + +main() # TODO: reconsider maximum depth to be detected as title (now at four) # TODO: adapt script to be used from command line From b8ae7066d089202d8554ae5f00e98a9281d7c25d Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 13 Aug 2024 14:16:27 +0200 Subject: [PATCH 020/145] make file paths non os-specific --- .../chatbot_parser.py | 106 +++++++++--------- 1 file changed, 56 insertions(+), 50 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 79951a5d0da8..f67d0f0d5295 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -12,7 +12,7 @@ ################### define global variables ################### # variable that keeps track of the source directories -source_directories = ["..\\..\\mkdocs\\docs\\HPC\\", "..\\..\\mkdocs\\docs\\HPC\\linux-tutorial"] +source_directories = [os.path.join("..", "..", "mkdocs", "docs", "HPC"), os.path.join("..", "..", "mkdocs", "docs", "HPC", "linux-tutorial")] # list of all the filenames filenames_generic = {} @@ -67,22 +67,21 @@ def check_for_title(curr_line): return 0, None, None else: if last_title is not None: - write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic, is_linux_tutorial) - write_end_of_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", "Linux", + write_end_of_file(os.path.join(root_dir_generic, last_directory, last_title + ".txt"), "", links_generic, is_linux_tutorial) + write_end_of_file(os.path.join(root_dir_os_specific_linux, last_directory, last_title + ".txt"), "Linux", links_linux, is_linux_tutorial) - write_end_of_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", "Windows", + write_end_of_file(os.path.join(root_dir_os_specific_windows, last_directory, last_title + ".txt"), "Windows", links_windows, is_linux_tutorial) - write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS", + write_end_of_file(os.path.join(root_dir_os_specific_macos, last_directory, last_title + ".txt"), "macOS", links_macos, is_linux_tutorial) reset_link_lists() - curr_dirs[logic_output] = curr_dirs[logic_output - 1] + "\\" + make_valid_title( - curr_line[logic_output + 1:-1].replace(' ', '-')) + curr_dirs[logic_output] = os.path.join(curr_dirs[logic_output - 1], make_valid_title(curr_line[logic_output + 1:-1].replace(' ', '-'))) - create_directory(root_dir_generic + curr_dirs[logic_output]) - create_directory(root_dir_os_specific_linux + curr_dirs[logic_output]) - create_directory(root_dir_os_specific_windows + curr_dirs[logic_output]) - create_directory(root_dir_os_specific_macos + curr_dirs[logic_output]) + create_directory(os.path.join(root_dir_generic, curr_dirs[logic_output])) + create_directory(os.path.join(root_dir_os_specific_linux, curr_dirs[logic_output])) + create_directory(os.path.join(root_dir_os_specific_windows, curr_dirs[logic_output])) + create_directory(os.path.join(root_dir_os_specific_macos, curr_dirs[logic_output])) update_lower_curr_dir(curr_dirs[logic_output], logic_output) return logic_output, make_valid_title(curr_line[logic_output + 1:-1].replace(' ', '-')), curr_dirs[logic_output] @@ -127,8 +126,12 @@ def replace_markdown_markers(curr_line, linklist): # function that let's jinja do its thing to format the files expect for the os-related if-statements def jinja_parser(filename, copy_location): + + # YAML file location + yml_file_path = os.path.join('..', '..', 'mkdocs', 'extra', 'gent.yml') + # Read the YAML file - with open('..\\..\\mkdocs\\extra\\gent.yml', 'r') as yml_file: + with open(yml_file_path, 'r') as yml_file: words_dict = yaml.safe_load(yml_file) # ugly fix for index.md error @@ -143,7 +146,7 @@ def jinja_parser(filename, copy_location): mangle_ifs(copy_location, filename) # Use Jinja2 to replace the macros - template_loader = ChoiceLoader([FileSystemLoader(searchpath='.\\if_mangled_files'), FileSystemLoader(searchpath="..\\..\\mkdocs\\docs\\HPC")]) + template_loader = ChoiceLoader([FileSystemLoader(searchpath='if_mangled_files'), FileSystemLoader(searchpath=os.path.join("..", "..", "mkdocs", "docs", "HPC"))]) templateEnv = Environment(loader=template_loader) template = templateEnv.get_template(filename) rendered_content = template.render(combined_context) @@ -214,7 +217,7 @@ def mangle_os_ifs(line): def mangle_ifs(directory, file): - with open(".\\if_mangled_files\\" + file, 'w') as write_file: + with open(os.path.join("if_mangled_files", file), 'w') as write_file: with open(directory, 'r') as read_file: for line in read_file: new_line = mangle_os_ifs(line) @@ -305,13 +308,13 @@ def choose_and_write_to_file(curr_line): # check that the line is part of the website for gent if active_OS_if_states["linux"] == "inactive" and active_OS_if_states["windows"] == "inactive" and \ active_OS_if_states["macos"] == "inactive": - write_text_to_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", curr_line) + write_text_to_file(os.path.join(root_dir_generic, last_directory, last_title + ".txt"), curr_line) if active_OS_if_states["linux"] == "active": - write_text_to_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", curr_line) + write_text_to_file(os.path.join(root_dir_os_specific_linux, last_directory, last_title + ".txt"), curr_line) if active_OS_if_states["windows"] == "active": - write_text_to_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", curr_line) + write_text_to_file(os.path.join(root_dir_os_specific_windows, last_directory, last_title + ".txt"), curr_line) if active_OS_if_states["macos"] == "active": - write_text_to_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", curr_line) + write_text_to_file(os.path.join(root_dir_os_specific_macos, last_directory, last_title + ".txt"), curr_line) # function that adds a reference link at the end of every txt file @@ -356,25 +359,26 @@ def make_valid_title(s): return valid_filename + def main(): global main_title, active_OS_if_states, last_directory, root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos, is_linux_tutorial, in_code_block, last_title, curr_dirs, links_generic, links_linux, links_windows, links_macos # remove the directories from a previous run of the parser if they weren't cleaned up properly for some reason - remove_directory_tree(".\\parsed_mds") - remove_directory_tree(".\\copies") - remove_directory_tree(".\\if_mangled_files") + remove_directory_tree("parsed_mds") + remove_directory_tree("copies") + remove_directory_tree("if_mangled_files") # make the necessary directories - if not os.path.exists(".\\copies"): - os.mkdir(".\\copies") + if not os.path.exists("copies"): + os.mkdir("copies") - if not os.path.exists(".\\copies\\linux"): - os.mkdir(".\\copies\\linux") + if not os.path.exists(os.path.join("copies", "linux")): + os.mkdir(os.path.join("copies", "linux")) - if not os.path.exists(".\\parsed_mds"): - os.mkdir(".\\parsed_mds") + if not os.path.exists("parsed_mds"): + os.mkdir("parsed_mds") - if not os.path.exists(".\\if_mangled_files"): - os.mkdir(".\\if_mangled_files") + if not os.path.exists("if_mangled_files"): + os.mkdir("if_mangled_files") for filenames in [filenames_generic, filenames_linux]: for filename in filenames.keys(): @@ -385,22 +389,22 @@ def main(): # make a copy of the original file in order to make sure the original does not get altered if is_linux_tutorial: - copy_file = ".\\copies\\linux\\" + filename + copy_file = os.path.join("copies", "linux", filename) else: - copy_file = ".\\copies\\" + filename + copy_file = os.path.join("copies", filename) shutil.copyfile(filenames[filename], copy_file) # variable that keeps track of the directories that are used to write in at different levels if is_linux_tutorial: - root_dir_generic = ".\\parsed_mds\\generic\\linux_tutorial\\" - root_dir_os_specific_linux = ".\\parsed_mds\\os_specific\\linux\\linux_tutorial\\" - root_dir_os_specific_windows = ".\\parsed_mds\\os_specific\\windows\\linux_tutorial\\" - root_dir_os_specific_macos = ".\\parsed_mds\\os_specific\\macos\\linux_tutorial\\" + root_dir_generic = os.path.join("parsed_mds", "generic", "linux_tutorial") + root_dir_os_specific_linux = os.path.join("parsed_mds", "os_specific", "linux", "linux_tutorial") + root_dir_os_specific_windows = os.path.join("parsed_mds", "os_specific", "windows", "linux_tutorial") + root_dir_os_specific_macos = os.path.join("parsed_mds", "os_specific", "macos", "linux_tutorial") else: - root_dir_generic = ".\\parsed_mds\\generic\\" - root_dir_os_specific_linux = ".\\parsed_mds\\os_specific\\linux\\" - root_dir_os_specific_windows = ".\\parsed_mds\\os_specific\\windows\\" - root_dir_os_specific_macos = ".\\parsed_mds\\os_specific\\macos\\" + root_dir_generic = os.path.join("parsed_mds", "generic") + root_dir_os_specific_linux = os.path.join("parsed_mds", "os_specific", "linux") + root_dir_os_specific_windows = os.path.join("parsed_mds", "os_specific", "windows") + root_dir_os_specific_macos = os.path.join("parsed_mds", "os_specific", "macos") # variable for the main title (needed for reference links) main_title = filename[:-3] @@ -436,14 +440,14 @@ def main(): # create directories for the source markdown file create_directory(root_dir_generic) - create_directory(".\\parsed_mds\\os_specific") + create_directory(os.path.join("parsed_mds", "os_specific")) create_directory(root_dir_os_specific_linux) create_directory(root_dir_os_specific_windows) create_directory(root_dir_os_specific_macos) - create_directory(root_dir_generic + curr_dirs[0]) - create_directory(root_dir_os_specific_linux + curr_dirs[0]) - create_directory(root_dir_os_specific_windows + curr_dirs[0]) - create_directory(root_dir_os_specific_macos + curr_dirs[0]) + create_directory(os.path.join(root_dir_generic, curr_dirs[0])) + create_directory(os.path.join(root_dir_os_specific_linux, curr_dirs[0])) + create_directory(os.path.join(root_dir_os_specific_windows, curr_dirs[0])) + create_directory(os.path.join(root_dir_os_specific_macos, curr_dirs[0])) # process the jinja macros jinja_parser(filename, copy_file) @@ -480,16 +484,18 @@ def main(): choose_and_write_to_file(next_action[2]) # write end of file for the last file - write_end_of_file(root_dir_generic + last_directory + "\\" + last_title + ".txt", "", links_generic, is_linux_tutorial) - write_end_of_file(root_dir_os_specific_linux + last_directory + "\\" + last_title + ".txt", "Linux", + write_end_of_file(os.path.join(root_dir_generic, last_directory, last_title + ".txt"), "", links_generic, + is_linux_tutorial) + write_end_of_file(os.path.join(root_dir_os_specific_linux, last_directory, last_title + ".txt"), "Linux", links_linux, is_linux_tutorial) - write_end_of_file(root_dir_os_specific_windows + last_directory + "\\" + last_title + ".txt", "Windows", + write_end_of_file(os.path.join(root_dir_os_specific_windows, last_directory, last_title + ".txt"), + "Windows", links_windows, is_linux_tutorial) - write_end_of_file(root_dir_os_specific_macos + last_directory + "\\" + last_title + ".txt", "macOS", + write_end_of_file(os.path.join(root_dir_os_specific_macos, last_directory, last_title + ".txt"), "macOS", links_macos, is_linux_tutorial) - remove_directory_tree(".\\copies") - remove_directory_tree(".\\if_mangled_files") + remove_directory_tree("copies") + remove_directory_tree("if_mangled_files") main() From b7514973facd2edeb274161b67ae4eee53140229 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 13 Aug 2024 14:54:41 +0200 Subject: [PATCH 021/145] use docstrings to document the functions --- .../chatbot_parser.py | 143 +++++++++++++++--- 1 file changed, 125 insertions(+), 18 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index f67d0f0d5295..f5596fd5b1d6 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -34,14 +34,24 @@ ################### define functions ################### -# function that removes the previous file structure before starting the process of making a new one def remove_directory_tree(old_directory): + """ + function that removes a full directory tree + + :param old_directory: the directory to be removed + :return: + """ if os.path.exists(old_directory): shutil.rmtree(old_directory) -# function that checks whether the current line has a title of level 4 at maximum (returns the level of the title or 0 if the line is not a title) def check_for_title_logic(curr_line): + """ + function that checks whether the current line has a title of level 4 at maximum (returns the level of the title or 0 if the line is not a title) + + :param curr_line: the line to be checked for a title + :return: depth of the title + """ global curr_dirs match = re.match(r'^#+ ', curr_line) if match and len(match.group(0)) <= 5: @@ -50,8 +60,12 @@ def check_for_title_logic(curr_line): return 0 -# function that resets the contents of the link_lists def reset_link_lists(): + """ + function that resets the contents of the link_lists + + :return: + """ global links_generic, links_linux, links_windows, links_macos links_generic = [] links_linux = [] @@ -59,8 +73,15 @@ def reset_link_lists(): links_macos = [] -# function that uses the check_for_title_logic function to create the appropriate directories and update the necessary variables def check_for_title(curr_line): + """ + function that uses the check_for_title_logic function to create the appropriate directories and update the necessary variables + + :param curr_line: the line to be checked for a title + :return: the depth of the title + :return: the title found in the line if any + :return: the new directory in which the next file will be written + """ global curr_dirs, last_title, in_code_block logic_output = check_for_title_logic(curr_line) if logic_output == 0 or in_code_block: @@ -87,28 +108,51 @@ def check_for_title(curr_line): return logic_output, make_valid_title(curr_line[logic_output + 1:-1].replace(' ', '-')), curr_dirs[logic_output] -# function used to detect codeblocks and make sure the comments don't get detected as titles def detect_in_code_block(curr_line): + """ + function used to detect codeblocks and make sure the comments don't get detected as titles + + :param curr_line: the line in which the start or end of a codeblock needs to be detected + :return: + """ global in_code_block if '```' in curr_line or (('
' in curr_line) ^ ('
' in curr_line)): in_code_block = not in_code_block -# function that creates directories if needed def create_directory(new_directory): + """ + function that creates new directories + + :param new_directory: directory to be created + :return: + """ if not os.path.exists(new_directory): os.mkdir(new_directory) -# function that updates the curr_dir variables when needed def update_lower_curr_dir(curr_directory, level): + """ + function that updates the curr_dir variables when needed + + :param curr_directory: the current directory to which the lower level current directories need to be updated + :param level: the depth of the current directory + :return: + """ global curr_dirs for i in range(level + 1, 4): curr_dirs[i] = curr_directory -# function that replaces certain markdown structures with the equivalent used on the website def replace_markdown_markers(curr_line, linklist): + """ + function that replaces certain markdown structures with the equivalent used on the website + + :param curr_line: the current line on which markdown structures need to be replaced + :param linklist: the list used to store links that need to be printed at the end of the file + :return curr_line: the adapted current line + :return linklist: the updated linklist + """ # replace links with a reference matches = re.findall(r'\[(.*?)]\((.*?)\)', curr_line) if matches: @@ -124,9 +168,14 @@ def replace_markdown_markers(curr_line, linklist): return curr_line, linklist -# function that let's jinja do its thing to format the files expect for the os-related if-statements def jinja_parser(filename, copy_location): + """ + function that let's jinja do its thing to format the files except for the os-related if-statements + :param filename: the name of the file that needs to be formatted using jinja + :param copy_location: the location of the file that needs to be formatted using jinja + :return: + """ # YAML file location yml_file_path = os.path.join('..', '..', 'mkdocs', 'extra', 'gent.yml') @@ -157,6 +206,12 @@ def jinja_parser(filename, copy_location): def mangle_os_ifs(line): + """ + function that mangles the os-related if-statements. This is needed because we want to keep these if-statements intact after jinja-parsing to build the directory structure. + + :param line: the current line to check for os-related if-statements + :return line: the modified line with mangled os-related if-statements + """ global is_os match = re.search(r'\{%(.*?)%}(.*)', line) @@ -217,6 +272,13 @@ def mangle_os_ifs(line): def mangle_ifs(directory, file): + """ + function that writes the if-mangled version of a file to a location where the jinja parser will use it + + :param directory: the directory of the file to be if mangled + :param file: the filename of the file to be mangled + :return: + """ with open(os.path.join("if_mangled_files", file), 'w') as write_file: with open(directory, 'r') as read_file: for line in read_file: @@ -224,8 +286,19 @@ def mangle_ifs(directory, file): write_file.write(new_line) -# function that checks for if-statements def check_if_statements(curr_line): + """ + function that checks for if-statements + + :param curr_line: the line to be checked for if-statements to build the directory structure + :return: the next action to be done with the line: + "done": An if-statement has been found at the start of the line, the active os list has been updated, processing of the current line is finished and a following line can be processed. + "check_extra_message": An if-statement has been found at the start of the line, the active os list has been updated, more text has been detected after the if-statement that also needs to be checked. + "write_text": No if-statement has been found, write the current line to a file (can also be part of the current line) + "write_text_and_check_extra_message": An if statement has been found not at the start of the line. Firstly, write the text up until the if-statement to a file, then check the rest of the line. + :return: the extra message to be checked, if any + :return: the text to be written to the file, if any + """ # check whether the first part of the line contains information wrt if-statements match = re.search(r'^\{-if-%([^%]*)%-if-}(.*)', curr_line) @@ -288,8 +361,14 @@ def check_if_statements(curr_line): return "write_text", None, curr_line -# function that writes a line to a file def write_text_to_file(file_name, curr_line): + """ + function that writes a line to a file + + :param file_name: target file to write the line to + :param curr_line: line to be written to the file + :return: + """ global links_generic, links_linux, links_windows, links_macos with open(file_name, "a") as write_file: if "generic" in file_name: @@ -303,8 +382,13 @@ def write_text_to_file(file_name, curr_line): write_file.write(curr_line) -# function that decides what file to write text to def choose_and_write_to_file(curr_line): + """ + function that decides what file to write text to + + :param curr_line: line to be written to a file + :return: + """ # check that the line is part of the website for gent if active_OS_if_states["linux"] == "inactive" and active_OS_if_states["windows"] == "inactive" and \ active_OS_if_states["macos"] == "inactive": @@ -317,14 +401,28 @@ def choose_and_write_to_file(curr_line): write_text_to_file(os.path.join(root_dir_os_specific_macos, last_directory, last_title + ".txt"), curr_line) -# function that adds a reference link at the end of every txt file def add_reference_link(file_location, reference_link): + """ + function that adds a reference link at the end of every txt file + + :param file_location: the file that needs a reference link + :param reference_link: the reference link that needs to be written + :return: + """ with open(file_location, 'a') as write_file: write_file.write("\nreference: " + reference_link + "\n") -# function that adds the links that should be at the end of a file def write_end_of_file(file_location, OS, linklist, is_linux_tutorial_): + """ + function that adds the links that should be at the end of a file + + :param file_location: the location of the file + :param OS: the OS of the file + :param linklist: the links that should be at the end of the file + :param is_linux_tutorial_: boolean indicating whether the file is part of the linux tutorial + :return: + """ if len(OS) > 0: OS = OS + "/" @@ -343,16 +441,21 @@ def write_end_of_file(file_location, OS, linklist, is_linux_tutorial_): add_reference_link(file_location, "docs.hpc.ugent.be/" + OS + linux_part + main_title + "/#" + ''.join(char.lower() for char in last_title if char.isalnum() or char == '-').strip('-')) -# function that makes sure all titles can be used as valid filenames -def make_valid_title(s): +def make_valid_title(title): + """ + function that makes sure all titles can be used as valid filenames + + :param title: the string that will be used as title and filename + :return valid_filename: the adapted title that can be used as filename + """ # Define a regex pattern for invalid characters on both Windows and Linux invalid_chars = r'[<>:"/\\|?*\0()]' # get rid of extra information between {} brackets - s = re.sub(r'\{.*?}', '', s) + s = re.sub(r'\{.*?}', '', title) # Remove invalid characters - valid_filename = re.sub(invalid_chars, '', s) + valid_filename = re.sub(invalid_chars, '', title) # Strip leading/trailing whitespace valid_filename = valid_filename.strip().strip('-') @@ -361,6 +464,10 @@ def make_valid_title(s): def main(): + """ + main function + :return: + """ global main_title, active_OS_if_states, last_directory, root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos, is_linux_tutorial, in_code_block, last_title, curr_dirs, links_generic, links_linux, links_windows, links_macos # remove the directories from a previous run of the parser if they weren't cleaned up properly for some reason remove_directory_tree("parsed_mds") From 0f8eb5dfa6e7cc8adee238fd7467dbc9247e0012 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 13 Aug 2024 15:15:56 +0200 Subject: [PATCH 022/145] rewrite the if-mangler to make it more readable --- .../chatbot_parser.py | 35 +++++++------------ 1 file changed, 13 insertions(+), 22 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index f5596fd5b1d6..4f1865b94112 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -225,35 +225,27 @@ def mangle_os_ifs(line): if_match = re.search(r'if ', match.group(1)) if_os_match = re.search(r'if OS == ', match.group(1)) endif_match = re.search(r'endif', match.group(1)) + pos_first_mangle = constr_match.start() + start_index + added_length + 1 + pos_second_mangle = constr_match.end() + start_index + added_length - 1 + # this logic isn't flawless, there are number of nested if-constructions that are technically possible that would break this logic, but these don't appear in the documentation as it doesn't make sense to have these if endif_match: - if is_os == 2: - line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[ - constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[ - constr_match.end() + start_index + added_length - 1:] - added_length += 8 - is_os = 0 - if is_os == 3: - line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[ - constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[ - constr_match.end() + start_index + added_length - 1:] + if is_os == 2 or is_os == 3: + line = line[:pos_first_mangle] + "-if-" + line[pos_first_mangle:pos_second_mangle] + "-if-" + line[pos_second_mangle:] added_length += 8 - is_os = 2 + if is_os == 2: + is_os = 0 + elif is_os == 3: + is_os = 2 elif is_os == 1: is_os = 2 elif if_match: if if_os_match: + line = line[:pos_first_mangle] + "-if-" + line[pos_first_mangle:pos_second_mangle] + "-if-" + line[pos_second_mangle:] + added_length += 8 if is_os == 2: - line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[ - constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[ - constr_match.end() + start_index + added_length - 1:] - added_length += 8 is_os = 3 else: - line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[ - constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[ - constr_match.end() + start_index + added_length - 1:] - added_length += 8 is_os = 2 else: if is_os == 2: @@ -262,10 +254,9 @@ def mangle_os_ifs(line): is_os = 0 else: if is_os == 2 or is_os == 3: - line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[ - constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[ - constr_match.end() + start_index + added_length - 1:] + line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[constr_match.end() + start_index + added_length - 1:] added_length += 8 + start_index += constr_match.end() match = re.search(r'\{%(.*?)%}(.*)', match.group(2)) return line From 9938e921674d5e46a3917feef8a780f22b427440 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 13 Aug 2024 16:09:23 +0200 Subject: [PATCH 023/145] got rid of most global variables --- .../chatbot_parser.py | 162 +++++++----------- 1 file changed, 63 insertions(+), 99 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 4f1865b94112..e7a88e4de7cf 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -29,9 +29,6 @@ # some files are not written in proper markdown but rather in reST, they will be converted later down the line using pandoc problem_files = ["getting_started.md", "navigating.md"] -# global variable to keep track of latest if-statement scope -is_os = 0 # Can be 0, 1, 2 or 3 {0: not in an os-if; 1: in a non-os-if nested in an os-if; 2: in an os-if; 3: in an os-if nested in an os-if} - ################### define functions ################### def remove_directory_tree(old_directory): @@ -45,57 +42,48 @@ def remove_directory_tree(old_directory): shutil.rmtree(old_directory) -def check_for_title_logic(curr_line): - """ - function that checks whether the current line has a title of level 4 at maximum (returns the level of the title or 0 if the line is not a title) - - :param curr_line: the line to be checked for a title - :return: depth of the title - """ - global curr_dirs - match = re.match(r'^#+ ', curr_line) - if match and len(match.group(0)) <= 5: - return len(match.group(0)) - 1 - else: - return 0 - - -def reset_link_lists(): - """ - function that resets the contents of the link_lists - - :return: - """ - global links_generic, links_linux, links_windows, links_macos - links_generic = [] - links_linux = [] - links_windows = [] - links_macos = [] - - -def check_for_title(curr_line): +def check_for_title(curr_line, main_title, last_directory, last_title, curr_dirs, is_linux_tutorial_, in_code_block_): """ function that uses the check_for_title_logic function to create the appropriate directories and update the necessary variables :param curr_line: the line to be checked for a title + :param main_title: the main title of the file, needed in the case where a file is finished + :param last_directory: the most recently encountered directory + :param last_title: the most recently encountered title + :param curr_dirs: the most recent directories at each title level + :param is_linux_tutorial_: boolean to indicate whether the current file is part of the linux tutorial + :param in_code_block_: boolean to indicate whether the current line is part of a codeblock :return: the depth of the title :return: the title found in the line if any :return: the new directory in which the next file will be written """ - global curr_dirs, last_title, in_code_block - logic_output = check_for_title_logic(curr_line) - if logic_output == 0 or in_code_block: - return 0, None, None + global links_generic, links_linux, links_windows, links_macos + + # detect titles + match = re.match(r'^#+ ', curr_line) + if match and len(match.group(0)) <= 5: + logic_output = len(match.group(0)) - 1 + else: + logic_output = 0 + + # make necessary changes if a title has been detected + if logic_output == 0 or in_code_block_: + return 0, None, None, curr_dirs else: if last_title is not None: - write_end_of_file(os.path.join(root_dir_generic, last_directory, last_title + ".txt"), "", links_generic, is_linux_tutorial) + write_end_of_file(os.path.join(root_dir_generic, last_directory, last_title + ".txt"), "", links_generic, is_linux_tutorial_, main_title, last_title) write_end_of_file(os.path.join(root_dir_os_specific_linux, last_directory, last_title + ".txt"), "Linux", - links_linux, is_linux_tutorial) + links_linux, is_linux_tutorial_, main_title, last_title) write_end_of_file(os.path.join(root_dir_os_specific_windows, last_directory, last_title + ".txt"), "Windows", - links_windows, is_linux_tutorial) + links_windows, is_linux_tutorial_, main_title, last_title) write_end_of_file(os.path.join(root_dir_os_specific_macos, last_directory, last_title + ".txt"), "macOS", - links_macos, is_linux_tutorial) - reset_link_lists() + links_macos, is_linux_tutorial_, main_title, last_title) + + # reset the link lists + links_generic = [] + links_linux = [] + links_windows = [] + links_macos = [] curr_dirs[logic_output] = os.path.join(curr_dirs[logic_output - 1], make_valid_title(curr_line[logic_output + 1:-1].replace(' ', '-'))) @@ -104,20 +92,11 @@ def check_for_title(curr_line): create_directory(os.path.join(root_dir_os_specific_windows, curr_dirs[logic_output])) create_directory(os.path.join(root_dir_os_specific_macos, curr_dirs[logic_output])) - update_lower_curr_dir(curr_dirs[logic_output], logic_output) - return logic_output, make_valid_title(curr_line[logic_output + 1:-1].replace(' ', '-')), curr_dirs[logic_output] - - -def detect_in_code_block(curr_line): - """ - function used to detect codeblocks and make sure the comments don't get detected as titles + # update the lower order current directories + for i in range(logic_output + 1, 4): + curr_dirs[i] = curr_dirs[logic_output] - :param curr_line: the line in which the start or end of a codeblock needs to be detected - :return: - """ - global in_code_block - if '```' in curr_line or (('
' in curr_line) ^ ('
' in curr_line)): - in_code_block = not in_code_block + return logic_output, make_valid_title(curr_line[logic_output + 1:-1].replace(' ', '-')), curr_dirs[logic_output], curr_dirs def create_directory(new_directory): @@ -131,19 +110,6 @@ def create_directory(new_directory): os.mkdir(new_directory) -def update_lower_curr_dir(curr_directory, level): - """ - function that updates the curr_dir variables when needed - - :param curr_directory: the current directory to which the lower level current directories need to be updated - :param level: the depth of the current directory - :return: - """ - global curr_dirs - for i in range(level + 1, 4): - curr_dirs[i] = curr_directory - - def replace_markdown_markers(curr_line, linklist): """ function that replaces certain markdown structures with the equivalent used on the website @@ -205,14 +171,14 @@ def jinja_parser(filename, copy_location): output_file.write(rendered_content) -def mangle_os_ifs(line): +def mangle_os_ifs(line, is_os): """ function that mangles the os-related if-statements. This is needed because we want to keep these if-statements intact after jinja-parsing to build the directory structure. :param line: the current line to check for os-related if-statements + :param is_os: boolean keep track of the current os-state of the if-statements. Can be 0, 1, 2 or 3 {0: not in an os-if; 1: in a non-os-if nested in an os-if; 2: in an os-if; 3: in an os-if nested in an os-if} :return line: the modified line with mangled os-related if-statements """ - global is_os match = re.search(r'\{%(.*?)%}(.*)', line) @@ -259,7 +225,7 @@ def mangle_os_ifs(line): start_index += constr_match.end() match = re.search(r'\{%(.*?)%}(.*)', match.group(2)) - return line + return line, is_os def mangle_ifs(directory, file): @@ -270,18 +236,22 @@ def mangle_ifs(directory, file): :param file: the filename of the file to be mangled :return: """ + # variable to keep track of latest if-statement scope + is_os = 0 # Can be 0, 1, 2 or 3 {0: not in an os-if; 1: in a non-os-if nested in an os-if; 2: in an os-if; 3: in an os-if nested in an os-if} + with open(os.path.join("if_mangled_files", file), 'w') as write_file: with open(directory, 'r') as read_file: for line in read_file: - new_line = mangle_os_ifs(line) + new_line, is_os = mangle_os_ifs(line, is_os) write_file.write(new_line) -def check_if_statements(curr_line): +def check_if_statements(curr_line, active_OS_if_states): """ function that checks for if-statements :param curr_line: the line to be checked for if-statements to build the directory structure + :param active_OS_if_states: dictionary keeping track of the active OS states according to the if-statements :return: the next action to be done with the line: "done": An if-statement has been found at the start of the line, the active os list has been updated, processing of the current line is finished and a following line can be processed. "check_extra_message": An if-statement has been found at the start of the line, the active os list has been updated, more text has been detected after the if-statement that also needs to be checked. @@ -373,11 +343,14 @@ def write_text_to_file(file_name, curr_line): write_file.write(curr_line) -def choose_and_write_to_file(curr_line): +def choose_and_write_to_file(curr_line, active_OS_if_states, last_directory, last_title): """ function that decides what file to write text to :param curr_line: line to be written to a file + :param active_OS_if_states: dictionary keeping track of which OSes are active according to the if-statements + :param last_directory: most recently made directory + :param last_title: the most recently encountered title :return: """ # check that the line is part of the website for gent @@ -392,19 +365,7 @@ def choose_and_write_to_file(curr_line): write_text_to_file(os.path.join(root_dir_os_specific_macos, last_directory, last_title + ".txt"), curr_line) -def add_reference_link(file_location, reference_link): - """ - function that adds a reference link at the end of every txt file - - :param file_location: the file that needs a reference link - :param reference_link: the reference link that needs to be written - :return: - """ - with open(file_location, 'a') as write_file: - write_file.write("\nreference: " + reference_link + "\n") - - -def write_end_of_file(file_location, OS, linklist, is_linux_tutorial_): +def write_end_of_file(file_location, OS, linklist, is_linux_tutorial_, main_title, last_title): """ function that adds the links that should be at the end of a file @@ -412,6 +373,8 @@ def write_end_of_file(file_location, OS, linklist, is_linux_tutorial_): :param OS: the OS of the file :param linklist: the links that should be at the end of the file :param is_linux_tutorial_: boolean indicating whether the file is part of the linux tutorial + :param main_title: the main title of the file, to be used in the reference link + :param last_title: the most recently encountered title :return: """ if len(OS) > 0: @@ -429,7 +392,8 @@ def write_end_of_file(file_location, OS, linklist, is_linux_tutorial_): linux_part = "" # finally add the reference link - add_reference_link(file_location, "docs.hpc.ugent.be/" + OS + linux_part + main_title + "/#" + ''.join(char.lower() for char in last_title if char.isalnum() or char == '-').strip('-')) + with open(file_location, 'a') as write_file: + write_file.write("\nreference: docs.hpc.ugent.be/" + OS + linux_part + main_title + "/#" + ''.join(char.lower() for char in last_title if char.isalnum() or char == '-').strip('-') + "\n") def make_valid_title(title): @@ -459,7 +423,7 @@ def main(): main function :return: """ - global main_title, active_OS_if_states, last_directory, root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos, is_linux_tutorial, in_code_block, last_title, curr_dirs, links_generic, links_linux, links_windows, links_macos + global root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos, links_generic, links_linux, links_windows, links_macos # remove the directories from a previous run of the parser if they weren't cleaned up properly for some reason remove_directory_tree("parsed_mds") remove_directory_tree("copies") @@ -558,9 +522,10 @@ def main(): with open(copy_file, 'r') as readfile: for line in readfile: - title_level, title, directory = check_for_title(line) + title_level, title, directory, curr_dirs = check_for_title(line, main_title, last_directory, last_title, curr_dirs, is_linux_tutorial, in_code_block) - detect_in_code_block(line) + if '```' in line or (('
' in line) ^ ('
' in line)): + in_code_block = not in_code_block # line is a title with a maximum depth of 4 if title_level > 0: @@ -572,25 +537,24 @@ def main(): # line is not a title elif after_first_title: # check for if-statements and write the appropriate lines in the right files - next_action = check_if_statements(line) + next_action = check_if_statements(line, active_OS_if_states) while next_action[0] == "write_text_and_check_extra_message" or next_action[0] == "check_extra_message": if next_action[0] == "write_text_and_check_extra_message": - choose_and_write_to_file(next_action[2]) - next_action = check_if_statements(next_action[1]) + choose_and_write_to_file(next_action[2], active_OS_if_states, last_directory, last_title) + next_action = check_if_statements(next_action[1], active_OS_if_states) if next_action[0] == "write_text": - choose_and_write_to_file(next_action[2]) + choose_and_write_to_file(next_action[2], active_OS_if_states, last_directory, last_title) # write end of file for the last file write_end_of_file(os.path.join(root_dir_generic, last_directory, last_title + ".txt"), "", links_generic, - is_linux_tutorial) + is_linux_tutorial, main_title, last_title) write_end_of_file(os.path.join(root_dir_os_specific_linux, last_directory, last_title + ".txt"), "Linux", - links_linux, is_linux_tutorial) + links_linux, is_linux_tutorial, main_title, last_title) write_end_of_file(os.path.join(root_dir_os_specific_windows, last_directory, last_title + ".txt"), - "Windows", - links_windows, is_linux_tutorial) + "Windows", links_windows, is_linux_tutorial, main_title, last_title) write_end_of_file(os.path.join(root_dir_os_specific_macos, last_directory, last_title + ".txt"), "macOS", - links_macos, is_linux_tutorial) + links_macos, is_linux_tutorial, main_title, last_title) remove_directory_tree("copies") remove_directory_tree("if_mangled_files") From 508b22c7b3a485f8fdb64059a45d2ee9dfdd4f04 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 13 Aug 2024 16:30:48 +0200 Subject: [PATCH 024/145] fixed some issues with if statements --- .../chatbot_parser.py | 31 +++++++++++++------ 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index e7a88e4de7cf..bbab687bd398 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -12,7 +12,7 @@ ################### define global variables ################### # variable that keeps track of the source directories -source_directories = [os.path.join("..", "..", "mkdocs", "docs", "HPC"), os.path.join("..", "..", "mkdocs", "docs", "HPC", "linux-tutorial")] +source_directories = [os.path.join("..", "..", "mkdocs", "docs", "HPC")]#, os.path.join("..", "..", "mkdocs", "docs", "HPC", "linux-tutorial")] # list of all the filenames filenames_generic = {} @@ -189,7 +189,7 @@ def mangle_os_ifs(line, is_os): constr_match = re.search(r'\{%.*?%}', match.string) if_match = re.search(r'if ', match.group(1)) - if_os_match = re.search(r'if OS == ', match.group(1)) + if_os_match = re.search(r'if OS ', match.group(1)) endif_match = re.search(r'endif', match.group(1)) pos_first_mangle = constr_match.start() + start_index + added_length + 1 pos_second_mangle = constr_match.end() + start_index + added_length - 1 @@ -228,18 +228,18 @@ def mangle_os_ifs(line, is_os): return line, is_os -def mangle_ifs(directory, file): +def mangle_ifs(directory, filename): """ function that writes the if-mangled version of a file to a location where the jinja parser will use it :param directory: the directory of the file to be if mangled - :param file: the filename of the file to be mangled + :param filename: the filename of the file to be mangled :return: """ # variable to keep track of latest if-statement scope is_os = 0 # Can be 0, 1, 2 or 3 {0: not in an os-if; 1: in a non-os-if nested in an os-if; 2: in an os-if; 3: in an os-if nested in an os-if} - with open(os.path.join("if_mangled_files", file), 'w') as write_file: + with open(os.path.join("if_mangled_files", filename), 'w') as write_file: with open(directory, 'r') as read_file: for line in read_file: new_line, is_os = mangle_os_ifs(line, is_os) @@ -261,17 +261,17 @@ def check_if_statements(curr_line, active_OS_if_states): :return: the text to be written to the file, if any """ # check whether the first part of the line contains information wrt if-statements - match = re.search(r'^\{-if-%([^%]*)%-if-}(.*)', curr_line) + match = re.search(r'^\{-if-%(.*?)%-if-}(.*)', curr_line) # check whether the line contains information wrt if-statements that is not in its first part - match_large = re.search(r'^(.*)(\{-if-%[^%]*%-if-})(.*)', curr_line) + match_large = re.search(r'^(.*)(\{-if-%.*?%-if-})(.*)', curr_line) if match: content = match.group(1) # new if-statement wrt OS if re.search(r'if OS == ', content): - OS = content[9:-1] + OS = content.split()[-1] # set new active OS active_OS_if_states[OS] = "active" @@ -281,6 +281,17 @@ def check_if_statements(curr_line, active_OS_if_states): if other_OS != OS and active_OS_if_states[other_OS] == "active": active_OS_if_states[other_OS] = "inactive" + elif re.search(r'if OS != ', content): + OS = content.split()[-1] + + # set new active OS + active_OS_if_states[OS] = "inactive" + + # set other inactive ones on active + for other_OS in active_OS_if_states.keys(): + if other_OS != OS and active_OS_if_states[other_OS] == "inactive": + active_OS_if_states[other_OS] = "active" + # endif statement wrt OS elif re.search(r'endif', content): if str(1) in active_OS_if_states.values(): @@ -556,8 +567,8 @@ def main(): write_end_of_file(os.path.join(root_dir_os_specific_macos, last_directory, last_title + ".txt"), "macOS", links_macos, is_linux_tutorial, main_title, last_title) - remove_directory_tree("copies") - remove_directory_tree("if_mangled_files") + # remove_directory_tree("copies") + # remove_directory_tree("if_mangled_files") main() From a25ce2dc8bf7ffb9f732b6eb7e796ad54cd724b5 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 13 Aug 2024 16:36:04 +0200 Subject: [PATCH 025/145] fixed some issues with if statements --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index bbab687bd398..fb7d8a8b176b 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -12,7 +12,7 @@ ################### define global variables ################### # variable that keeps track of the source directories -source_directories = [os.path.join("..", "..", "mkdocs", "docs", "HPC")]#, os.path.join("..", "..", "mkdocs", "docs", "HPC", "linux-tutorial")] +source_directories = [os.path.join("..", "..", "mkdocs", "docs", "HPC"), os.path.join("..", "..", "mkdocs", "docs", "HPC", "linux-tutorial")] # list of all the filenames filenames_generic = {} From 80d0535a74564ecd18f626d5c18568cc17c7d7fc Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 13 Aug 2024 17:03:07 +0200 Subject: [PATCH 026/145] got rid of all global variables --- .../chatbot_parser.py | 125 +++++++++--------- 1 file changed, 65 insertions(+), 60 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index fb7d8a8b176b..eb30cb7fb6a7 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -5,30 +5,6 @@ import yaml from jinja2 import FileSystemLoader, Environment, ChoiceLoader -# variables for analytics -succeeded = 0 -failed = 0 - -################### define global variables ################### - -# variable that keeps track of the source directories -source_directories = [os.path.join("..", "..", "mkdocs", "docs", "HPC"), os.path.join("..", "..", "mkdocs", "docs", "HPC", "linux-tutorial")] - -# list of all the filenames -filenames_generic = {} -filenames_linux = {} -for source_directory in source_directories: - all_items = os.listdir(source_directory) - files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]] - for file in files: - if "linux-tutorial" in source_directory: - filenames_linux[file] = os.path.join(source_directory, file) - else: - filenames_generic[file] = os.path.join(source_directory, file) - -# some files are not written in proper markdown but rather in reST, they will be converted later down the line using pandoc -problem_files = ["getting_started.md", "navigating.md"] - ################### define functions ################### def remove_directory_tree(old_directory): @@ -42,7 +18,7 @@ def remove_directory_tree(old_directory): shutil.rmtree(old_directory) -def check_for_title(curr_line, main_title, last_directory, last_title, curr_dirs, is_linux_tutorial_, in_code_block_): +def check_for_title(curr_line, main_title, last_directory, last_title, curr_dirs, root_dirs, link_lists, is_linux_tutorial_, in_code_block_): """ function that uses the check_for_title_logic function to create the appropriate directories and update the necessary variables @@ -51,13 +27,15 @@ def check_for_title(curr_line, main_title, last_directory, last_title, curr_dirs :param last_directory: the most recently encountered directory :param last_title: the most recently encountered title :param curr_dirs: the most recent directories at each title level + :param root_dirs: a list containing the root directories + param link_lists: a list containing all four link_lists with the links that will be printed at the bottom of a file :param is_linux_tutorial_: boolean to indicate whether the current file is part of the linux tutorial :param in_code_block_: boolean to indicate whether the current line is part of a codeblock :return: the depth of the title :return: the title found in the line if any :return: the new directory in which the next file will be written + :return link_lists: updated link_lists """ - global links_generic, links_linux, links_windows, links_macos # detect titles match = re.match(r'^#+ ', curr_line) @@ -68,35 +46,35 @@ def check_for_title(curr_line, main_title, last_directory, last_title, curr_dirs # make necessary changes if a title has been detected if logic_output == 0 or in_code_block_: - return 0, None, None, curr_dirs + return 0, None, None, curr_dirs, link_lists else: if last_title is not None: - write_end_of_file(os.path.join(root_dir_generic, last_directory, last_title + ".txt"), "", links_generic, is_linux_tutorial_, main_title, last_title) - write_end_of_file(os.path.join(root_dir_os_specific_linux, last_directory, last_title + ".txt"), "Linux", - links_linux, is_linux_tutorial_, main_title, last_title) - write_end_of_file(os.path.join(root_dir_os_specific_windows, last_directory, last_title + ".txt"), "Windows", - links_windows, is_linux_tutorial_, main_title, last_title) - write_end_of_file(os.path.join(root_dir_os_specific_macos, last_directory, last_title + ".txt"), "macOS", - links_macos, is_linux_tutorial_, main_title, last_title) + write_end_of_file(os.path.join(root_dirs[0], last_directory, last_title + ".txt"), "", link_lists[0], is_linux_tutorial_, main_title, last_title) + write_end_of_file(os.path.join(root_dirs[1], last_directory, last_title + ".txt"), "Linux", + link_lists[1], is_linux_tutorial_, main_title, last_title) + write_end_of_file(os.path.join(root_dirs[2], last_directory, last_title + ".txt"), "Windows", + link_lists[2], is_linux_tutorial_, main_title, last_title) + write_end_of_file(os.path.join(root_dirs[3], last_directory, last_title + ".txt"), "macOS", + link_lists[3], is_linux_tutorial_, main_title, last_title) # reset the link lists - links_generic = [] - links_linux = [] - links_windows = [] - links_macos = [] + link_lists[0] = [] + link_lists[1] = [] + link_lists[2] = [] + link_lists[3] = [] curr_dirs[logic_output] = os.path.join(curr_dirs[logic_output - 1], make_valid_title(curr_line[logic_output + 1:-1].replace(' ', '-'))) - create_directory(os.path.join(root_dir_generic, curr_dirs[logic_output])) - create_directory(os.path.join(root_dir_os_specific_linux, curr_dirs[logic_output])) - create_directory(os.path.join(root_dir_os_specific_windows, curr_dirs[logic_output])) - create_directory(os.path.join(root_dir_os_specific_macos, curr_dirs[logic_output])) + create_directory(os.path.join(root_dirs[0], curr_dirs[logic_output])) + create_directory(os.path.join(root_dirs[1], curr_dirs[logic_output])) + create_directory(os.path.join(root_dirs[2], curr_dirs[logic_output])) + create_directory(os.path.join(root_dirs[3], curr_dirs[logic_output])) # update the lower order current directories for i in range(logic_output + 1, 4): curr_dirs[i] = curr_dirs[logic_output] - return logic_output, make_valid_title(curr_line[logic_output + 1:-1].replace(' ', '-')), curr_dirs[logic_output], curr_dirs + return logic_output, make_valid_title(curr_line[logic_output + 1:-1].replace(' ', '-')), curr_dirs[logic_output], curr_dirs, link_lists def create_directory(new_directory): @@ -333,28 +311,30 @@ def check_if_statements(curr_line, active_OS_if_states): return "write_text", None, curr_line -def write_text_to_file(file_name, curr_line): +def write_text_to_file(file_name, curr_line, link_lists): """ function that writes a line to a file :param file_name: target file to write the line to :param curr_line: line to be written to the file - :return: + :param link_lists: list containing all the links that will be printed at the end of files + :return link_lists: updated link_lists """ - global links_generic, links_linux, links_windows, links_macos with open(file_name, "a") as write_file: if "generic" in file_name: - curr_line, links_generic = replace_markdown_markers(curr_line, links_generic) + curr_line, links_generic = replace_markdown_markers(curr_line, link_lists[0]) elif "linux" in file_name: - curr_line, links_linux = replace_markdown_markers(curr_line, links_linux) + curr_line, links_linux = replace_markdown_markers(curr_line, link_lists[1]) elif "windows" in file_name: - curr_line, links_windows = replace_markdown_markers(curr_line, links_windows) + curr_line, links_windows = replace_markdown_markers(curr_line, link_lists[2]) else: - curr_line, links_macos = replace_markdown_markers(curr_line, links_macos) + curr_line, links_macos = replace_markdown_markers(curr_line, link_lists[3]) write_file.write(curr_line) + return link_lists -def choose_and_write_to_file(curr_line, active_OS_if_states, last_directory, last_title): + +def choose_and_write_to_file(curr_line, active_OS_if_states, last_directory, last_title, root_dirs, link_lists): """ function that decides what file to write text to @@ -362,18 +342,22 @@ def choose_and_write_to_file(curr_line, active_OS_if_states, last_directory, las :param active_OS_if_states: dictionary keeping track of which OSes are active according to the if-statements :param last_directory: most recently made directory :param last_title: the most recently encountered title - :return: + :param root_dirs: a list with all root directories + :param link_lists: list of links that need to be written at the end of the files + :return link_lists: an updated link_lists """ # check that the line is part of the website for gent if active_OS_if_states["linux"] == "inactive" and active_OS_if_states["windows"] == "inactive" and \ active_OS_if_states["macos"] == "inactive": - write_text_to_file(os.path.join(root_dir_generic, last_directory, last_title + ".txt"), curr_line) + link_lists = write_text_to_file(os.path.join(root_dirs[0], last_directory, last_title + ".txt"), curr_line, link_lists) if active_OS_if_states["linux"] == "active": - write_text_to_file(os.path.join(root_dir_os_specific_linux, last_directory, last_title + ".txt"), curr_line) + link_lists = write_text_to_file(os.path.join(root_dirs[1], last_directory, last_title + ".txt"), curr_line, link_lists) if active_OS_if_states["windows"] == "active": - write_text_to_file(os.path.join(root_dir_os_specific_windows, last_directory, last_title + ".txt"), curr_line) + link_lists = write_text_to_file(os.path.join(root_dirs[2], last_directory, last_title + ".txt"), curr_line, link_lists) if active_OS_if_states["macos"] == "active": - write_text_to_file(os.path.join(root_dir_os_specific_macos, last_directory, last_title + ".txt"), curr_line) + link_lists = write_text_to_file(os.path.join(root_dirs[3], last_directory, last_title + ".txt"), curr_line, link_lists) + + return link_lists def write_end_of_file(file_location, OS, linklist, is_linux_tutorial_, main_title, last_title): @@ -434,7 +418,6 @@ def main(): main function :return: """ - global root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos, links_generic, links_linux, links_windows, links_macos # remove the directories from a previous run of the parser if they weren't cleaned up properly for some reason remove_directory_tree("parsed_mds") remove_directory_tree("copies") @@ -453,6 +436,27 @@ def main(): if not os.path.exists("if_mangled_files"): os.mkdir("if_mangled_files") + ################### define loop-invariant variables ################### + + # variable that keeps track of the source directories + source_directories = [os.path.join("..", "..", "mkdocs", "docs", "HPC"), + os.path.join("..", "..", "mkdocs", "docs", "HPC", "linux-tutorial")] + + # list of all the filenames + filenames_generic = {} + filenames_linux = {} + for source_directory in source_directories: + all_items = os.listdir(source_directory) + files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]] + for file in files: + if "linux-tutorial" in source_directory: + filenames_linux[file] = os.path.join(source_directory, file) + else: + filenames_generic[file] = os.path.join(source_directory, file) + + # some files are not written in proper markdown but rather in reST, they will be converted later down the line using pandoc (temporary, should be taken out when the original files have been converted to proper markdown) + problem_files = ["getting_started.md", "navigating.md"] + for filenames in [filenames_generic, filenames_linux]: for filename in filenames.keys(): ################### define/reset loop specific variables ################### @@ -499,6 +503,7 @@ def main(): links_linux = [] links_windows = [] links_macos = [] + link_lists = [links_generic, links_linux, links_windows, links_macos] # dictionaries to keep track of current OS active_OS_if_states = {"linux": "inactive", "windows": "inactive", "macos": "inactive"} @@ -533,7 +538,7 @@ def main(): with open(copy_file, 'r') as readfile: for line in readfile: - title_level, title, directory, curr_dirs = check_for_title(line, main_title, last_directory, last_title, curr_dirs, is_linux_tutorial, in_code_block) + title_level, title, directory, curr_dirs, link_lists = check_for_title(line, main_title, last_directory, last_title, curr_dirs, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists, is_linux_tutorial, in_code_block) if '```' in line or (('
' in line) ^ ('
' in line)): in_code_block = not in_code_block @@ -551,11 +556,11 @@ def main(): next_action = check_if_statements(line, active_OS_if_states) while next_action[0] == "write_text_and_check_extra_message" or next_action[0] == "check_extra_message": if next_action[0] == "write_text_and_check_extra_message": - choose_and_write_to_file(next_action[2], active_OS_if_states, last_directory, last_title) + link_lists = choose_and_write_to_file(next_action[2], active_OS_if_states, last_directory, last_title, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists) next_action = check_if_statements(next_action[1], active_OS_if_states) if next_action[0] == "write_text": - choose_and_write_to_file(next_action[2], active_OS_if_states, last_directory, last_title) + link_lists = choose_and_write_to_file(next_action[2], active_OS_if_states, last_directory, last_title, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists) # write end of file for the last file write_end_of_file(os.path.join(root_dir_generic, last_directory, last_title + ".txt"), "", links_generic, From 9163a759c3d06cc7eb10185b10f177baa86f2294 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 14 Aug 2024 09:45:27 +0200 Subject: [PATCH 027/145] small changes to make file more readable --- .../chatbot_parser.py | 51 ++++++++----------- 1 file changed, 21 insertions(+), 30 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index eb30cb7fb6a7..cb3ed26fda87 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -48,29 +48,23 @@ def check_for_title(curr_line, main_title, last_directory, last_title, curr_dirs if logic_output == 0 or in_code_block_: return 0, None, None, curr_dirs, link_lists else: + + # if a new title is detected, write the end of the previous file if last_title is not None: - write_end_of_file(os.path.join(root_dirs[0], last_directory, last_title + ".txt"), "", link_lists[0], is_linux_tutorial_, main_title, last_title) - write_end_of_file(os.path.join(root_dirs[1], last_directory, last_title + ".txt"), "Linux", - link_lists[1], is_linux_tutorial_, main_title, last_title) - write_end_of_file(os.path.join(root_dirs[2], last_directory, last_title + ".txt"), "Windows", - link_lists[2], is_linux_tutorial_, main_title, last_title) - write_end_of_file(os.path.join(root_dirs[3], last_directory, last_title + ".txt"), "macOS", - link_lists[3], is_linux_tutorial_, main_title, last_title) - - # reset the link lists - link_lists[0] = [] - link_lists[1] = [] - link_lists[2] = [] - link_lists[3] = [] + for i, OS in enumerate(["", "Linux", "Windows", "macOS"]): + write_end_of_file(os.path.join(root_dirs[i], last_directory, last_title + ".txt"), OS, link_lists[i], is_linux_tutorial_, main_title, last_title) + + # reset the link lists for each OS + for i in range(4): + link_lists[i] = [] + # make a new directory corresponding with the new title curr_dirs[logic_output] = os.path.join(curr_dirs[logic_output - 1], make_valid_title(curr_line[logic_output + 1:-1].replace(' ', '-'))) - create_directory(os.path.join(root_dirs[0], curr_dirs[logic_output])) - create_directory(os.path.join(root_dirs[1], curr_dirs[logic_output])) - create_directory(os.path.join(root_dirs[2], curr_dirs[logic_output])) - create_directory(os.path.join(root_dirs[3], curr_dirs[logic_output])) + for i in range(4): + create_directory(os.path.join(root_dirs[i], curr_dirs[logic_output])) - # update the lower order current directories + # update the higher order current directories for i in range(logic_output + 1, 4): curr_dirs[i] = curr_dirs[logic_output] @@ -152,6 +146,7 @@ def jinja_parser(filename, copy_location): def mangle_os_ifs(line, is_os): """ function that mangles the os-related if-statements. This is needed because we want to keep these if-statements intact after jinja-parsing to build the directory structure. + We don't want to mangle all if-related statements (such as else and endif) so we need to keep track of the context of the last few if-statements. :param line: the current line to check for os-related if-statements :param is_os: boolean keep track of the current os-state of the if-statements. Can be 0, 1, 2 or 3 {0: not in an os-if; 1: in a non-os-if nested in an os-if; 2: in an os-if; 3: in an os-if nested in an os-if} @@ -198,7 +193,7 @@ def mangle_os_ifs(line, is_os): is_os = 0 else: if is_os == 2 or is_os == 3: - line = line[:constr_match.start() + start_index + added_length + 1] + "-if-" + line[constr_match.start() + start_index + added_length + 1:constr_match.end() + start_index + added_length - 1] + "-if-" + line[constr_match.end() + start_index + added_length - 1:] + line = line[:pos_first_mangle] + "-if-" + line[pos_first_mangle:pos_second_mangle] + "-if-" + line[pos_second_mangle:] added_length += 8 start_index += constr_match.end() @@ -247,7 +242,7 @@ def check_if_statements(curr_line, active_OS_if_states): if match: content = match.group(1) - # new if-statement wrt OS + # new if-statement wrt OS with '==' if re.search(r'if OS == ', content): OS = content.split()[-1] @@ -259,6 +254,7 @@ def check_if_statements(curr_line, active_OS_if_states): if other_OS != OS and active_OS_if_states[other_OS] == "active": active_OS_if_states[other_OS] = "inactive" + # new if-statement wrt OS with '!=' elif re.search(r'if OS != ', content): OS = content.split()[-1] @@ -347,8 +343,7 @@ def choose_and_write_to_file(curr_line, active_OS_if_states, last_directory, las :return link_lists: an updated link_lists """ # check that the line is part of the website for gent - if active_OS_if_states["linux"] == "inactive" and active_OS_if_states["windows"] == "inactive" and \ - active_OS_if_states["macos"] == "inactive": + if active_OS_if_states["linux"] == "inactive" and active_OS_if_states["windows"] == "inactive" and active_OS_if_states["macos"] == "inactive": link_lists = write_text_to_file(os.path.join(root_dirs[0], last_directory, last_title + ".txt"), curr_line, link_lists) if active_OS_if_states["linux"] == "active": link_lists = write_text_to_file(os.path.join(root_dirs[1], last_directory, last_title + ".txt"), curr_line, link_lists) @@ -457,6 +452,7 @@ def main(): # some files are not written in proper markdown but rather in reST, they will be converted later down the line using pandoc (temporary, should be taken out when the original files have been converted to proper markdown) problem_files = ["getting_started.md", "navigating.md"] + # for loops over all files for filenames in [filenames_generic, filenames_linux]: for filename in filenames.keys(): ################### define/reset loop specific variables ################### @@ -540,6 +536,7 @@ def main(): for line in readfile: title_level, title, directory, curr_dirs, link_lists = check_for_title(line, main_title, last_directory, last_title, curr_dirs, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists, is_linux_tutorial, in_code_block) + # detect codeblocks to make sure titles aren't detected in them if '```' in line or (('
' in line) ^ ('
' in line)): in_code_block = not in_code_block @@ -563,14 +560,8 @@ def main(): link_lists = choose_and_write_to_file(next_action[2], active_OS_if_states, last_directory, last_title, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists) # write end of file for the last file - write_end_of_file(os.path.join(root_dir_generic, last_directory, last_title + ".txt"), "", links_generic, - is_linux_tutorial, main_title, last_title) - write_end_of_file(os.path.join(root_dir_os_specific_linux, last_directory, last_title + ".txt"), "Linux", - links_linux, is_linux_tutorial, main_title, last_title) - write_end_of_file(os.path.join(root_dir_os_specific_windows, last_directory, last_title + ".txt"), - "Windows", links_windows, is_linux_tutorial, main_title, last_title) - write_end_of_file(os.path.join(root_dir_os_specific_macos, last_directory, last_title + ".txt"), "macOS", - links_macos, is_linux_tutorial, main_title, last_title) + for OS in ["", "Linux", "Windows", "macOS"]: + write_end_of_file(os.path.join(root_dir_generic, last_directory, last_title + ".txt"), OS, links_generic, is_linux_tutorial, main_title, last_title) # remove_directory_tree("copies") # remove_directory_tree("if_mangled_files") From 1dcffc1bac4ee341556ef29c3557bb21686eaf2d Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 14 Aug 2024 11:45:16 +0200 Subject: [PATCH 028/145] codeblocks, tips, warnings and info reformatted --- .../chatbot_parser.py | 39 +++++++++++++++++-- 1 file changed, 35 insertions(+), 4 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index cb3ed26fda87..d8d8000bdcc5 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -98,10 +98,37 @@ def replace_markdown_markers(curr_line, linklist): curr_line = curr_line.replace(f"[{match[0]}]({match[1]})", match[0] + "[" + str(len(linklist) + 1) + "]") linklist.append(match[1]) - # TODO: code-blocks - # TODO: tips - # TODO: warnings - # etc + # codeblock (with ``` -> always stands on a separate line, so line can be dropped) + if '```' in curr_line: + curr_line = "" + + # structures within <> + match = re.findall(r'<(.*?)>', curr_line) + if match: + for i, content in enumerate(match): + exception_words = ['SEQUENCE', 'vsc40000', 'Session', 'OUTPUT_DIR', 'jobname', 'jobid', 'hostname', 'Enjoy the day!', 'stdout', 'stderr', 'coursecode', 'year', 'nickname', '01', 'number of ', 'user', 'home', 'software', 'module'] + if '#include' in curr_line: + pass + elif '.' in content: + curr_line = re.sub(f'<{content}>', f"{content}", curr_line) + elif '***' in content: + curr_line = re.sub(r'<\*\*\*', "", re.sub(r'\*\*\*\\>', "", curr_line)) + elif '-' in content and ' ' not in content: + curr_line = re.sub(f'<{content}>', f"{content}", curr_line) + # sometimes normal words are between <> brackets and should be excluded (ugly fix) + elif any(substring in content for substring in exception_words): + pass + # special cases that messed up the formatting (ugly fix) + elif ' files', "", curr_line) + elif '<>' in curr_line: + pass + else: + curr_line = re.sub(r'<.*?>', "", curr_line) + + # structures with !!! (info, tips, warnings) + if '!!!' in curr_line: + curr_line = re.sub(r'!!!', "", curr_line) return curr_line, linklist @@ -327,6 +354,9 @@ def write_text_to_file(file_name, curr_line, link_lists): curr_line, links_macos = replace_markdown_markers(curr_line, link_lists[3]) write_file.write(curr_line) + # if re.search(r'<.*?>', curr_line): + # print(curr_line) + return link_lists @@ -567,6 +597,7 @@ def main(): # remove_directory_tree("if_mangled_files") +print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.") main() # TODO: reconsider maximum depth to be detected as title (now at four) # TODO: adapt script to be used from command line From 4d7fbdb193e14fc8d93dd914748409aff0d2170c Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 14 Aug 2024 11:48:59 +0200 Subject: [PATCH 029/145] small optimisations --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index d8d8000bdcc5..91198ea777e3 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -28,7 +28,7 @@ def check_for_title(curr_line, main_title, last_directory, last_title, curr_dirs :param last_title: the most recently encountered title :param curr_dirs: the most recent directories at each title level :param root_dirs: a list containing the root directories - param link_lists: a list containing all four link_lists with the links that will be printed at the bottom of a file + :param link_lists: a list containing all four link_lists with the links that will be printed at the bottom of a file :param is_linux_tutorial_: boolean to indicate whether the current file is part of the linux tutorial :param in_code_block_: boolean to indicate whether the current line is part of a codeblock :return: the depth of the title @@ -427,7 +427,7 @@ def make_valid_title(title): invalid_chars = r'[<>:"/\\|?*\0()]' # get rid of extra information between {} brackets - s = re.sub(r'\{.*?}', '', title) + title = re.sub(r'\{.*?}', '', title) # Remove invalid characters valid_filename = re.sub(invalid_chars, '', title) @@ -513,16 +513,11 @@ def main(): main_title = filename[:-3] # variable that keeps track of the directories that are used to write in at different levels - curr_dirs = [filename[:-3] for i in range(5)] - - # variable to keep track whether we're dealing with OS-specific info or not - OS_specific = False + curr_dirs = [filename[:-3] for _ in range(5)] # variable that keeps track of the latest non-zero level title and corresponding directory - last_title_level = 1 last_title = None last_directory = None - last_was_title = False # list to keep track of links in the text links_generic = [] @@ -572,7 +567,6 @@ def main(): # line is a title with a maximum depth of 4 if title_level > 0: - last_title_level = title_level last_title = title last_directory = directory after_first_title = True From 671f7f3b5e57a2643e87a65a8b449e068176261d Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 14 Aug 2024 11:50:18 +0200 Subject: [PATCH 030/145] small optimisations --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 91198ea777e3..70ba8b17ee5d 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -354,9 +354,6 @@ def write_text_to_file(file_name, curr_line, link_lists): curr_line, links_macos = replace_markdown_markers(curr_line, link_lists[3]) write_file.write(curr_line) - # if re.search(r'<.*?>', curr_line): - # print(curr_line) - return link_lists @@ -587,8 +584,8 @@ def main(): for OS in ["", "Linux", "Windows", "macOS"]: write_end_of_file(os.path.join(root_dir_generic, last_directory, last_title + ".txt"), OS, links_generic, is_linux_tutorial, main_title, last_title) - # remove_directory_tree("copies") - # remove_directory_tree("if_mangled_files") + remove_directory_tree("copies") + remove_directory_tree("if_mangled_files") print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.") From e5c39bd2dd5f7e708b802f193656c20dfaa41253 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 14 Aug 2024 12:04:39 +0200 Subject: [PATCH 031/145] initial commit --- scripts/HPC_chatbot_preprocessor/README.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 scripts/HPC_chatbot_preprocessor/README.md diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md new file mode 100644 index 000000000000..32ec81c2fa5a --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/README.md @@ -0,0 +1,16 @@ +# Chatbot parser + +`chatbot_parser.py` is a script that transforms the markdown sourcefiles into a structured directory for a chatbot to be trained on. + +## Generated file structure + +This directory structure is written as a subdirectory of `parsed_mds`. In `parsed_mds`, two subdirectories can be found: + +- `generic` contains the parts of the markdown sources that were non-OS-specific +- `os_specific` contains the parts of the markdown sources that were OS-specific + +Withing `os_specific` a further distinction is made for each of the three possible operating systems included in the documentation. + +These subdirectories then contain a subdirectory for each individual markdown sourcefile. In the file specific subdirectories, further divisions are made according to the titles and subtitles found in that markdown sourcefile. + +Finally, each of these subtitle-specific subdirectories contains a `.txt` file with the (processed) plaintext of that section and at the end a reference link to the corresponding part of the documentation website on . \ No newline at end of file From c6492fc14120e9391507a8363bcd9a82976766e8 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 14 Aug 2024 13:03:46 +0200 Subject: [PATCH 032/145] added requirements --- scripts/HPC_chatbot_preprocessor/requirements.txt | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 scripts/HPC_chatbot_preprocessor/requirements.txt diff --git a/scripts/HPC_chatbot_preprocessor/requirements.txt b/scripts/HPC_chatbot_preprocessor/requirements.txt new file mode 100644 index 000000000000..19ed8a2a29de --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/requirements.txt @@ -0,0 +1,6 @@ +os +re +shutil +pypandoc +yaml +jinja2 \ No newline at end of file From aff8198d90ed64b044e837fd672c0019b88520d8 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 14 Aug 2024 13:04:08 +0200 Subject: [PATCH 033/145] added requirements and usage info --- scripts/HPC_chatbot_preprocessor/README.md | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md index 32ec81c2fa5a..e1e12046dd5a 100644 --- a/scripts/HPC_chatbot_preprocessor/README.md +++ b/scripts/HPC_chatbot_preprocessor/README.md @@ -13,4 +13,17 @@ Withing `os_specific` a further distinction is made for each of the three possib These subdirectories then contain a subdirectory for each individual markdown sourcefile. In the file specific subdirectories, further divisions are made according to the titles and subtitles found in that markdown sourcefile. -Finally, each of these subtitle-specific subdirectories contains a `.txt` file with the (processed) plaintext of that section and at the end a reference link to the corresponding part of the documentation website on . \ No newline at end of file +Finally, each of these subtitle-specific subdirectories contains a `.txt` file with the (processed) plaintext of that section and at the end a reference link to the corresponding part of the documentation website on . + +## Requirements + +- The required Python packages are listed in `requirements.txt` +- [Pandoc](https://pandoc.org/installing.html) must be installed and must be added to the system PATH + +## Usage + +The script can be ran in a shell environment with the following command: + +```shell +python chatbot_parser.py +``` \ No newline at end of file From a981002d1cd8eab50a69d860838084b768f538e8 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 14 Aug 2024 13:04:27 +0200 Subject: [PATCH 034/145] minor changes to the print statements --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 70ba8b17ee5d..6cb74a5c9bee 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -590,5 +590,4 @@ def main(): print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.") main() -# TODO: reconsider maximum depth to be detected as title (now at four) -# TODO: adapt script to be used from command line +print("Parsing finished successfully") From 1f3b3432fdba5390befbfd2109fa1b698c5b0728 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 16 Aug 2024 10:53:10 +0200 Subject: [PATCH 035/145] reworked function to take care of html structures --- .../chatbot_parser.py | 56 +++++++++++++------ 1 file changed, 38 insertions(+), 18 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 6cb74a5c9bee..d5e950973ec3 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -4,6 +4,7 @@ import pypandoc import yaml from jinja2 import FileSystemLoader, Environment, ChoiceLoader +from itertools import chain ################### define functions ################### @@ -91,6 +92,13 @@ def replace_markdown_markers(curr_line, linklist): :return curr_line: the adapted current line :return linklist: the updated linklist """ + + # TODO: filter out images before links + # replace images with an empty line + if re.match(r'!\[image]\(.*?\)', curr_line) or re.match(r'!\[]\(img/.*?.png\)', curr_line): + print(curr_line) + curr_line = "" + # replace links with a reference matches = re.findall(r'\[(.*?)]\((.*?)\)', curr_line) if matches: @@ -106,25 +114,36 @@ def replace_markdown_markers(curr_line, linklist): match = re.findall(r'<(.*?)>', curr_line) if match: for i, content in enumerate(match): - exception_words = ['SEQUENCE', 'vsc40000', 'Session', 'OUTPUT_DIR', 'jobname', 'jobid', 'hostname', 'Enjoy the day!', 'stdout', 'stderr', 'coursecode', 'year', 'nickname', '01', 'number of ', 'user', 'home', 'software', 'module'] - if '#include' in curr_line: - pass - elif '.' in content: - curr_line = re.sub(f'<{content}>', f"{content}", curr_line) - elif '***' in content: - curr_line = re.sub(r'<\*\*\*', "", re.sub(r'\*\*\*\\>', "", curr_line)) - elif '-' in content and ' ' not in content: - curr_line = re.sub(f'<{content}>', f"{content}", curr_line) - # sometimes normal words are between <> brackets and should be excluded (ugly fix) - elif any(substring in content for substring in exception_words): - pass - # special cases that messed up the formatting (ugly fix) + syntax_words = ["pre", "b", "code", "sub", "br", "center", "p", "div", "u", "p", "i", "tt", "a", "t", "span"] # make sure these are always lowercase + syntax_words_variations = list(chain.from_iterable([[element, element + "/", "/" + element] for element in syntax_words])) + syntax_words_style = [element + " style=.*" for element in syntax_words] + + # add references for every link of format + if re.search(r'a href=.*', content): + link = content[8:-1] + curr_line = re.sub(f'<{content}>', "[" + str(len(linklist) + 1) + "]", curr_line) + linklist.append(link) + + # drop the syntax words + elif content.lower() in syntax_words_variations: + curr_line = re.sub(f'<{content}>', "", curr_line) + + # drop the version of the syntax_words followed by " style=" + elif any(re.match(pattern, content) for pattern in syntax_words_style): + curr_line = re.sub(r'<.*?>', "", curr_line) + + # drop markdown comments + elif re.fullmatch(r'!--.*?--', content): + curr_line = re.sub(r'<.*?>', "", curr_line) + + # special case (ugly fix) elif ' files', "", curr_line) - elif '<>' in curr_line: - pass + + # keep the rest else: - curr_line = re.sub(r'<.*?>', "", curr_line) + # print("<" + content + ">") + pass # structures with !!! (info, tips, warnings) if '!!!' in curr_line: @@ -505,6 +524,7 @@ def main(): root_dir_os_specific_linux = os.path.join("parsed_mds", "os_specific", "linux") root_dir_os_specific_windows = os.path.join("parsed_mds", "os_specific", "windows") root_dir_os_specific_macos = os.path.join("parsed_mds", "os_specific", "macos") + root_dirs = [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos] # variable for the main title (needed for reference links) main_title = filename[:-3] @@ -581,8 +601,8 @@ def main(): link_lists = choose_and_write_to_file(next_action[2], active_OS_if_states, last_directory, last_title, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists) # write end of file for the last file - for OS in ["", "Linux", "Windows", "macOS"]: - write_end_of_file(os.path.join(root_dir_generic, last_directory, last_title + ".txt"), OS, links_generic, is_linux_tutorial, main_title, last_title) + for i, OS in enumerate(["", "Linux", "Windows", "macOS"]): + write_end_of_file(os.path.join(root_dirs[i], last_directory, last_title + ".txt"), OS, link_lists[i], is_linux_tutorial, main_title, last_title) remove_directory_tree("copies") remove_directory_tree("if_mangled_files") From 48cad9779f0ed2a492027330b5af531cf0631079 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 16 Aug 2024 11:26:43 +0200 Subject: [PATCH 036/145] filter out images --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index d5e950973ec3..2408557fd496 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -93,10 +93,8 @@ def replace_markdown_markers(curr_line, linklist): :return linklist: the updated linklist """ - # TODO: filter out images before links # replace images with an empty line - if re.match(r'!\[image]\(.*?\)', curr_line) or re.match(r'!\[]\(img/.*?.png\)', curr_line): - print(curr_line) + if re.search(r'(?i)!\[image]\(.*?\)', curr_line) or re.search(r'!\[]\(img/.*?.png\)', curr_line): curr_line = "" # replace links with a reference @@ -495,9 +493,6 @@ def main(): else: filenames_generic[file] = os.path.join(source_directory, file) - # some files are not written in proper markdown but rather in reST, they will be converted later down the line using pandoc (temporary, should be taken out when the original files have been converted to proper markdown) - problem_files = ["getting_started.md", "navigating.md"] - # for loops over all files for filenames in [filenames_generic, filenames_linux]: for filename in filenames.keys(): @@ -568,10 +563,6 @@ def main(): # process the jinja macros jinja_parser(filename, copy_file) - # convert the files without proper markdown layout into markdown using pandoc - if "linux-tutorial" in filenames[filename] and filename in problem_files: - pypandoc.convert_file(copy_file, 'markdown', outputfile=copy_file) - # open the file and store line by line in the right file with open(copy_file, 'r') as readfile: From df58f233e125078552318647815054bdfdff0bcb Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 16 Aug 2024 13:28:28 +0200 Subject: [PATCH 037/145] get rid of backquotes, asterisks, pluses and underscores used for formatting --- .../chatbot_parser.py | 53 ++++++++++++++----- 1 file changed, 39 insertions(+), 14 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 2408557fd496..72d8c251c556 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -1,7 +1,6 @@ import os import re import shutil -import pypandoc import yaml from jinja2 import FileSystemLoader, Environment, ChoiceLoader from itertools import chain @@ -83,12 +82,13 @@ def create_directory(new_directory): os.mkdir(new_directory) -def replace_markdown_markers(curr_line, linklist): +def replace_markdown_markers(curr_line, linklist, in_code_block): """ function that replaces certain markdown structures with the equivalent used on the website :param curr_line: the current line on which markdown structures need to be replaced :param linklist: the list used to store links that need to be printed at the end of the file + :param in_code_block: boolean indicating whether the current line is part of a code block :return curr_line: the adapted current line :return linklist: the updated linklist """ @@ -147,6 +147,29 @@ def replace_markdown_markers(curr_line, linklist): if '!!!' in curr_line: curr_line = re.sub(r'!!!', "", curr_line) + # get rid of other markdown indicators (`, *, +, _) + if not in_code_block: + + backquotes = re.findall(r'`(.*?)`', curr_line) + if backquotes: + for i, content in enumerate(backquotes): + curr_line = curr_line.replace(f"`{content}`", content) + + asterisks = re.findall(r'(? Date: Fri, 16 Aug 2024 14:22:01 +0200 Subject: [PATCH 038/145] dump to json files instead of txt files --- .../chatbot_parser.py | 78 ++++++++++++------- 1 file changed, 50 insertions(+), 28 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 72d8c251c556..b6061ef9d90a 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -1,9 +1,10 @@ +import json import os import re import shutil import yaml -from jinja2 import FileSystemLoader, Environment, ChoiceLoader from itertools import chain +from jinja2 import FileSystemLoader, Environment, ChoiceLoader ################### define functions ################### @@ -52,7 +53,7 @@ def check_for_title(curr_line, main_title, last_directory, last_title, curr_dirs # if a new title is detected, write the end of the previous file if last_title is not None: for i, OS in enumerate(["", "Linux", "Windows", "macOS"]): - write_end_of_file(os.path.join(root_dirs[i], last_directory, last_title + ".txt"), OS, link_lists[i], is_linux_tutorial_, main_title, last_title) + write_end_of_file(os.path.join(root_dirs[i], last_directory, last_title + ".json"), OS, link_lists[i], is_linux_tutorial_, main_title, last_title) # reset the link lists for each OS for i in range(4): @@ -384,16 +385,30 @@ def write_text_to_file(file_name, curr_line, link_lists, in_code_block): :param in_code_block: boolean indicating whether the current line is in a codeblock :return link_lists: updated link_lists """ - with open(file_name, "a") as write_file: + + if os.path.exists(file_name) or curr_line.strip(): + if os.path.exists(file_name): + with open(file_name, "r") as read_file: + data = json.load(read_file) + else: + data = {} + if "generic" in file_name: - curr_line, links_generic = replace_markdown_markers(curr_line, link_lists[0], in_code_block) + curr_line, link_lists[0] = replace_markdown_markers(curr_line, link_lists[0], in_code_block) elif "linux" in file_name: - curr_line, links_linux = replace_markdown_markers(curr_line, link_lists[1], in_code_block) + curr_line, link_lists[1] = replace_markdown_markers(curr_line, link_lists[1], in_code_block) elif "windows" in file_name: - curr_line, links_windows = replace_markdown_markers(curr_line, link_lists[2], in_code_block) + curr_line, link_lists[2] = replace_markdown_markers(curr_line, link_lists[2], in_code_block) + else: + curr_line, link_lists[3] = replace_markdown_markers(curr_line, link_lists[3], in_code_block) + + if 'content' in data: + data['content'] += curr_line else: - curr_line, links_macos = replace_markdown_markers(curr_line, link_lists[3], in_code_block) - write_file.write(curr_line) + data['content'] = curr_line + + with open(file_name, "w") as write_file: + json.dump(data, write_file, indent=4) return link_lists @@ -413,13 +428,13 @@ def choose_and_write_to_file(curr_line, active_OS_if_states, last_directory, las """ # check that the line is part of the website for gent if active_OS_if_states["linux"] == "inactive" and active_OS_if_states["windows"] == "inactive" and active_OS_if_states["macos"] == "inactive": - link_lists = write_text_to_file(os.path.join(root_dirs[0], last_directory, last_title + ".txt"), curr_line, link_lists, in_code_block) + link_lists = write_text_to_file(os.path.join(root_dirs[0], last_directory, last_title + ".json"), curr_line, link_lists, in_code_block) if active_OS_if_states["linux"] == "active": - link_lists = write_text_to_file(os.path.join(root_dirs[1], last_directory, last_title + ".txt"), curr_line, link_lists, in_code_block) + link_lists = write_text_to_file(os.path.join(root_dirs[1], last_directory, last_title + ".json"), curr_line, link_lists, in_code_block) if active_OS_if_states["windows"] == "active": - link_lists = write_text_to_file(os.path.join(root_dirs[2], last_directory, last_title + ".txt"), curr_line, link_lists, in_code_block) + link_lists = write_text_to_file(os.path.join(root_dirs[2], last_directory, last_title + ".json"), curr_line, link_lists, in_code_block) if active_OS_if_states["macos"] == "active": - link_lists = write_text_to_file(os.path.join(root_dirs[3], last_directory, last_title + ".txt"), curr_line, link_lists, in_code_block) + link_lists = write_text_to_file(os.path.join(root_dirs[3], last_directory, last_title + ".json"), curr_line, link_lists, in_code_block) return link_lists @@ -436,23 +451,30 @@ def write_end_of_file(file_location, OS, linklist, is_linux_tutorial_, main_titl :param last_title: the most recently encountered title :return: """ - if len(OS) > 0: - OS = OS + "/" - # add the links from within the document - with open(file_location, 'a') as write_file: - write_file.write("\n\n") + if os.path.exists(file_location): + + if len(OS) > 0: + OS = OS + "/" + + with open(file_location, "r") as read_file: + data = json.load(read_file) + + # add the links from within the document + data['links'] = {} for i, link in enumerate(linklist): - write_file.write("[" + str(i + 1) + "]: " + str(link) + "\n") + data['links'][str(i + 1)] = str(link) - if is_linux_tutorial_: - linux_part = "linux-tutorial/" - else: - linux_part = "" + if is_linux_tutorial_: + linux_part = "linux-tutorial/" + else: + linux_part = "" + + # add the reference link + data['reference_link'] = ("docs.hpc.ugent.be/" + OS + linux_part + main_title + "/#" + ''.join(char.lower() for char in last_title if char.isalnum() or char == '-').strip('-')) - # finally add the reference link - with open(file_location, 'a') as write_file: - write_file.write("\nreference: docs.hpc.ugent.be/" + OS + linux_part + main_title + "/#" + ''.join(char.lower() for char in last_title if char.isalnum() or char == '-').strip('-') + "\n") + with open(file_location, 'w') as write_file: + json.dump(data, write_file, indent=4) def make_valid_title(title): @@ -618,10 +640,10 @@ def main(): # write end of file for the last file for i, OS in enumerate(["", "Linux", "Windows", "macOS"]): - write_end_of_file(os.path.join(root_dirs[i], last_directory, last_title + ".txt"), OS, link_lists[i], is_linux_tutorial, main_title, last_title) + write_end_of_file(os.path.join(root_dirs[i], last_directory, last_title + ".json"), OS, link_lists[i], is_linux_tutorial, main_title, last_title) - remove_directory_tree("copies") - remove_directory_tree("if_mangled_files") + # remove_directory_tree("copies") + # remove_directory_tree("if_mangled_files") print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.") From 2c333fea2e36229a6db8fd7d85ce906ae0479c8c Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 16 Aug 2024 15:44:07 +0200 Subject: [PATCH 039/145] cleaned up parser with macros --- .../chatbot_parser.py | 284 ++++++++++-------- 1 file changed, 158 insertions(+), 126 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index b6061ef9d90a..b36f5c3c471e 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -6,19 +6,55 @@ from itertools import chain from jinja2 import FileSystemLoader, Environment, ChoiceLoader - -################### define functions ################### -def remove_directory_tree(old_directory): - """ - function that removes a full directory tree - - :param old_directory: the directory to be removed - :return: - """ - if os.path.exists(old_directory): - shutil.rmtree(old_directory) +#################### define macro's #################### +# directories +PARSED_MDS = "parsed_mds" +COPIES = "copies" +IF_MANGLED_FILES = "if_mangled_files" +LINUX_TUTORIAL = "linux-tutorial" +RETURN_DIR = ".." +MKDOCS_DIR = "mkdocs" +DOCS_DIR = "docs" +HPC_DIR = "HPC" +EXTRA_DIR = "extra" +GENERIC_DIR = "generic" +OS_SPECIFIC_DIR = "os_specific" + +# OSes +LINUX = "linux" +WINDOWS = "windows" +MACOS = "macos" + +# urls +REPO_URL = 'https://github.com/hpcugent/vsc_user_docs' +DOCS_URL = "docs.hpc.ugent.be" + +# OS-related if-states +ACTIVE = "active" +INACTIVE = "inactive" + +# if mangler states +NON_OS_IF = 0 +NON_OS_IF_IN_OS_IF = 1 +OS_IF = 2 +OS_IF_IN_OS_IF = 3 + +# if mangler macros +IF_MANGLED_PART = "-if-" + +# actions +DONE = "done" +WRITE_TEXT = "write_text" +CHECK_EXTRA_MESSAGE = "check_extra_message" +WRITE_TEXT_AND_CHECK_EXTRA_MESSAGE = "write_text_and_check_extra_message" + +# JSON attributes +CONTENT = "content" +LINKS = "links" +REFERENCE_LINK = "reference_link" +################### define functions ################### def check_for_title(curr_line, main_title, last_directory, last_title, curr_dirs, root_dirs, link_lists, is_linux_tutorial_, in_code_block_): """ function that uses the check_for_title_logic function to create the appropriate directories and update the necessary variables @@ -63,7 +99,7 @@ def check_for_title(curr_line, main_title, last_directory, last_title, curr_dirs curr_dirs[logic_output] = os.path.join(curr_dirs[logic_output - 1], make_valid_title(curr_line[logic_output + 1:-1].replace(' ', '-'))) for i in range(4): - create_directory(os.path.join(root_dirs[i], curr_dirs[logic_output])) + os.makedirs(os.path.join(root_dirs[i], curr_dirs[logic_output]), exist_ok=True) # update the higher order current directories for i in range(logic_output + 1, 4): @@ -72,17 +108,6 @@ def check_for_title(curr_line, main_title, last_directory, last_title, curr_dirs return logic_output, make_valid_title(curr_line[logic_output + 1:-1].replace(' ', '-')), curr_dirs[logic_output], curr_dirs, link_lists -def create_directory(new_directory): - """ - function that creates new directories - - :param new_directory: directory to be created - :return: - """ - if not os.path.exists(new_directory): - os.mkdir(new_directory) - - def replace_markdown_markers(curr_line, linklist, in_code_block): """ function that replaces certain markdown structures with the equivalent used on the website @@ -183,16 +208,16 @@ def jinja_parser(filename, copy_location): :return: """ # YAML file location - yml_file_path = os.path.join('..', '..', 'mkdocs', 'extra', 'gent.yml') + yml_file_path = os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, EXTRA_DIR, 'gent.yml') # Read the YAML file with open(yml_file_path, 'r') as yml_file: words_dict = yaml.safe_load(yml_file) - # ugly fix for index.md error + # ugly fix for index.md error that occurs because of the macro "config.repo_url" in mkdocs/docs/HPC/index.md additional_context = { 'config': { - 'repo_url': 'https://github.com/hpcugent/vsc_user_docs' + 'repo_url': REPO_URL } } combined_context = {**words_dict, **additional_context} @@ -201,7 +226,7 @@ def jinja_parser(filename, copy_location): mangle_ifs(copy_location, filename) # Use Jinja2 to replace the macros - template_loader = ChoiceLoader([FileSystemLoader(searchpath='if_mangled_files'), FileSystemLoader(searchpath=os.path.join("..", "..", "mkdocs", "docs", "HPC"))]) + template_loader = ChoiceLoader([FileSystemLoader(searchpath=IF_MANGLED_FILES), FileSystemLoader(searchpath=os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR))]) templateEnv = Environment(loader=template_loader) template = templateEnv.get_template(filename) rendered_content = template.render(combined_context) @@ -217,7 +242,11 @@ def mangle_os_ifs(line, is_os): We don't want to mangle all if-related statements (such as else and endif) so we need to keep track of the context of the last few if-statements. :param line: the current line to check for os-related if-statements - :param is_os: boolean keep track of the current os-state of the if-statements. Can be 0, 1, 2 or 3 {0: not in an os-if; 1: in a non-os-if nested in an os-if; 2: in an os-if; 3: in an os-if nested in an os-if} + :param is_os: variable keep track of the current os-state of the if-statements. Can be NON_OS_IF, NON_OS_IF_IN_OS_IF, OS_IF or OS_IF_IN_OS_IF + NON_OS_IF: not in an os-if + NON_OS_IF_IN_OS_IF: in a non-os-if nested in an os-if + OS_IF: in an os-if + OS_IF_IN_OS_IF: in an os-if nested in an os-if} :return line: the modified line with mangled os-related if-statements """ @@ -232,37 +261,46 @@ def mangle_os_ifs(line, is_os): if_match = re.search(r'if ', match.group(1)) if_os_match = re.search(r'if OS ', match.group(1)) endif_match = re.search(r'endif', match.group(1)) + + # mangle positions pos_first_mangle = constr_match.start() + start_index + added_length + 1 pos_second_mangle = constr_match.end() + start_index + added_length - 1 + # different parts of the original string + PART_BEFORE_MANGLING = line[:pos_first_mangle] + PART_BETWEEN_MANGLING = line[pos_first_mangle:pos_second_mangle] + PART_AFTER_MANGLING = line[pos_second_mangle:] + # this logic isn't flawless, there are number of nested if-constructions that are technically possible that would break this logic, but these don't appear in the documentation as it doesn't make sense to have these if endif_match: - if is_os == 2 or is_os == 3: - line = line[:pos_first_mangle] + "-if-" + line[pos_first_mangle:pos_second_mangle] + "-if-" + line[pos_second_mangle:] - added_length += 8 - if is_os == 2: - is_os = 0 - elif is_os == 3: - is_os = 2 - elif is_os == 1: - is_os = 2 + if is_os == OS_IF or is_os == OS_IF_IN_OS_IF: + line = PART_BEFORE_MANGLING + IF_MANGLED_PART + PART_BETWEEN_MANGLING + IF_MANGLED_PART + PART_AFTER_MANGLING + added_length += 2 * len(IF_MANGLED_PART) + if is_os == OS_IF: + is_os = NON_OS_IF + elif is_os == OS_IF_IN_OS_IF: + is_os = OS_IF + elif is_os == NON_OS_IF_IN_OS_IF: + is_os = OS_IF + elif if_match: if if_os_match: - line = line[:pos_first_mangle] + "-if-" + line[pos_first_mangle:pos_second_mangle] + "-if-" + line[pos_second_mangle:] - added_length += 8 - if is_os == 2: - is_os = 3 + line = PART_BEFORE_MANGLING + IF_MANGLED_PART + PART_BETWEEN_MANGLING + IF_MANGLED_PART + PART_AFTER_MANGLING + added_length += 2 * len(IF_MANGLED_PART) + if is_os == OS_IF: + is_os = OS_IF_IN_OS_IF else: - is_os = 2 + is_os = OS_IF else: - if is_os == 2: - is_os = 1 + if is_os == OS_IF: + is_os = NON_OS_IF_IN_OS_IF else: - is_os = 0 + is_os = NON_OS_IF + else: - if is_os == 2 or is_os == 3: - line = line[:pos_first_mangle] + "-if-" + line[pos_first_mangle:pos_second_mangle] + "-if-" + line[pos_second_mangle:] - added_length += 8 + if is_os == OS_IF or is_os == OS_IF_IN_OS_IF: + line = PART_BEFORE_MANGLING + IF_MANGLED_PART + PART_BETWEEN_MANGLING + IF_MANGLED_PART + PART_AFTER_MANGLING + added_length += 2 * len(IF_MANGLED_PART) start_index += constr_match.end() match = re.search(r'\{%(.*?)%}(.*)', match.group(2)) @@ -278,9 +316,9 @@ def mangle_ifs(directory, filename): :return: """ # variable to keep track of latest if-statement scope - is_os = 0 # Can be 0, 1, 2 or 3 {0: not in an os-if; 1: in a non-os-if nested in an os-if; 2: in an os-if; 3: in an os-if nested in an os-if} + is_os = NON_OS_IF - with open(os.path.join("if_mangled_files", filename), 'w') as write_file: + with open(os.path.join(IF_MANGLED_FILES, filename), 'w') as write_file: with open(directory, 'r') as read_file: for line in read_file: new_line, is_os = mangle_os_ifs(line, is_os) @@ -294,18 +332,18 @@ def check_if_statements(curr_line, active_OS_if_states): :param curr_line: the line to be checked for if-statements to build the directory structure :param active_OS_if_states: dictionary keeping track of the active OS states according to the if-statements :return: the next action to be done with the line: - "done": An if-statement has been found at the start of the line, the active os list has been updated, processing of the current line is finished and a following line can be processed. - "check_extra_message": An if-statement has been found at the start of the line, the active os list has been updated, more text has been detected after the if-statement that also needs to be checked. - "write_text": No if-statement has been found, write the current line to a file (can also be part of the current line) - "write_text_and_check_extra_message": An if statement has been found not at the start of the line. Firstly, write the text up until the if-statement to a file, then check the rest of the line. + DONE: An if-statement has been found at the start of the line, the active os list has been updated, processing of the current line is finished and a following line can be processed. + CHECK_EXTRA_MESSAGE: An if-statement has been found at the start of the line, the active os list has been updated, more text has been detected after the if-statement that also needs to be checked. + WRITE_TEXT: No if-statement has been found, write the current line to a file (can also be part of the current line) + WRITE_TEXT_AND_CHECK_EXTRA_MESSAGE: An if statement has been found not at the start of the line. Firstly, write the text up until the if-statement to a file, then check the rest of the line. :return: the extra message to be checked, if any :return: the text to be written to the file, if any """ # check whether the first part of the line contains information wrt if-statements - match = re.search(r'^\{-if-%(.*?)%-if-}(.*)', curr_line) + match = re.search(r'^\{' + IF_MANGLED_PART + '%(.*?)%' + IF_MANGLED_PART + '}(.*)', curr_line) # check whether the line contains information wrt if-statements that is not in its first part - match_large = re.search(r'^(.*)(\{-if-%.*?%-if-})(.*)', curr_line) + match_large = re.search(r'^(.*)(\{' + IF_MANGLED_PART + '%.*?%' + IF_MANGLED_PART + '})(.*)', curr_line) if match: content = match.group(1) @@ -315,33 +353,33 @@ def check_if_statements(curr_line, active_OS_if_states): OS = content.split()[-1] # set new active OS - active_OS_if_states[OS] = "active" + active_OS_if_states[OS] = ACTIVE # set other active ones on inactive for other_OS in active_OS_if_states.keys(): - if other_OS != OS and active_OS_if_states[other_OS] == "active": - active_OS_if_states[other_OS] = "inactive" + if other_OS != OS and active_OS_if_states[other_OS] == ACTIVE: + active_OS_if_states[other_OS] = INACTIVE # new if-statement wrt OS with '!=' elif re.search(r'if OS != ', content): OS = content.split()[-1] # set new active OS - active_OS_if_states[OS] = "inactive" + active_OS_if_states[OS] = INACTIVE # set other inactive ones on active for other_OS in active_OS_if_states.keys(): - if other_OS != OS and active_OS_if_states[other_OS] == "inactive": - active_OS_if_states[other_OS] = "active" + if other_OS != OS and active_OS_if_states[other_OS] == INACTIVE: + active_OS_if_states[other_OS] = ACTIVE # endif statement wrt OS elif re.search(r'endif', content): if str(1) in active_OS_if_states.values(): active_OS_if_states[ - list(active_OS_if_states.keys())[list(active_OS_if_states.values()).index(str(1))]] = "active" + list(active_OS_if_states.keys())[list(active_OS_if_states.values()).index(str(1))]] = ACTIVE else: for key in active_OS_if_states.keys(): - active_OS_if_states[key] = "inactive" + active_OS_if_states[key] = INACTIVE # else statement wrt OS elif re.search(r'else', content): @@ -353,26 +391,26 @@ def check_if_statements(curr_line, active_OS_if_states): # set the previously active one on inactive until the next endif key_list = list(active_OS_if_states.keys()) - position = list(active_OS_if_states.values()).index("active") + position = list(active_OS_if_states.values()).index(ACTIVE) active_OS_if_states[key_list[position]] = str(i) # set inactive ones on active - while "inactive" in active_OS_if_states.values(): - position = list(active_OS_if_states.values()).index("inactive") - active_OS_if_states[key_list[position]] = "active" + while INACTIVE in active_OS_if_states.values(): + position = list(active_OS_if_states.values()).index(INACTIVE) + active_OS_if_states[key_list[position]] = ACTIVE if len(match.group(2)) != 0: extra_message = match.group(2).lstrip() - return "check_extra_message", extra_message, None + return CHECK_EXTRA_MESSAGE, extra_message, None else: - return "done", None, None + return DONE, None, None elif match_large: - return "write_text_and_check_extra_message", match_large.group(2), match_large.group(1) + return WRITE_TEXT_AND_CHECK_EXTRA_MESSAGE, match_large.group(2), match_large.group(1) else: - return "write_text", None, curr_line + return WRITE_TEXT, None, curr_line def write_text_to_file(file_name, curr_line, link_lists, in_code_block): @@ -393,19 +431,19 @@ def write_text_to_file(file_name, curr_line, link_lists, in_code_block): else: data = {} - if "generic" in file_name: + if GENERIC_DIR in file_name: curr_line, link_lists[0] = replace_markdown_markers(curr_line, link_lists[0], in_code_block) - elif "linux" in file_name: + elif LINUX in file_name: curr_line, link_lists[1] = replace_markdown_markers(curr_line, link_lists[1], in_code_block) - elif "windows" in file_name: + elif WINDOWS in file_name: curr_line, link_lists[2] = replace_markdown_markers(curr_line, link_lists[2], in_code_block) else: curr_line, link_lists[3] = replace_markdown_markers(curr_line, link_lists[3], in_code_block) - if 'content' in data: - data['content'] += curr_line + if CONTENT in data: + data[CONTENT] += curr_line else: - data['content'] = curr_line + data[CONTENT] = curr_line with open(file_name, "w") as write_file: json.dump(data, write_file, indent=4) @@ -427,13 +465,13 @@ def choose_and_write_to_file(curr_line, active_OS_if_states, last_directory, las :return link_lists: an updated link_lists """ # check that the line is part of the website for gent - if active_OS_if_states["linux"] == "inactive" and active_OS_if_states["windows"] == "inactive" and active_OS_if_states["macos"] == "inactive": + if active_OS_if_states[LINUX] == INACTIVE and active_OS_if_states[WINDOWS] == INACTIVE and active_OS_if_states[MACOS] == INACTIVE: link_lists = write_text_to_file(os.path.join(root_dirs[0], last_directory, last_title + ".json"), curr_line, link_lists, in_code_block) - if active_OS_if_states["linux"] == "active": + if active_OS_if_states[LINUX] == ACTIVE: link_lists = write_text_to_file(os.path.join(root_dirs[1], last_directory, last_title + ".json"), curr_line, link_lists, in_code_block) - if active_OS_if_states["windows"] == "active": + if active_OS_if_states[WINDOWS] == ACTIVE: link_lists = write_text_to_file(os.path.join(root_dirs[2], last_directory, last_title + ".json"), curr_line, link_lists, in_code_block) - if active_OS_if_states["macos"] == "active": + if active_OS_if_states[MACOS] == ACTIVE: link_lists = write_text_to_file(os.path.join(root_dirs[3], last_directory, last_title + ".json"), curr_line, link_lists, in_code_block) return link_lists @@ -461,17 +499,17 @@ def write_end_of_file(file_location, OS, linklist, is_linux_tutorial_, main_titl data = json.load(read_file) # add the links from within the document - data['links'] = {} + data[LINKS] = {} for i, link in enumerate(linklist): - data['links'][str(i + 1)] = str(link) + data[LINKS][str(i + 1)] = str(link) if is_linux_tutorial_: - linux_part = "linux-tutorial/" + linux_part = LINUX_TUTORIAL + "/" else: linux_part = "" # add the reference link - data['reference_link'] = ("docs.hpc.ugent.be/" + OS + linux_part + main_title + "/#" + ''.join(char.lower() for char in last_title if char.isalnum() or char == '-').strip('-')) + data[REFERENCE_LINK] = (DOCS_URL + "/" + OS + linux_part + main_title + "/#" + ''.join(char.lower() for char in last_title if char.isalnum() or char == '-').strip('-')) with open(file_location, 'w') as write_file: json.dump(data, write_file, indent=4) @@ -505,28 +543,28 @@ def main(): :return: """ # remove the directories from a previous run of the parser if they weren't cleaned up properly for some reason - remove_directory_tree("parsed_mds") - remove_directory_tree("copies") - remove_directory_tree("if_mangled_files") + shutil.rmtree(PARSED_MDS) + shutil.rmtree(COPIES) + shutil.rmtree(IF_MANGLED_FILES) # make the necessary directories - if not os.path.exists("copies"): - os.mkdir("copies") + if not os.path.exists(COPIES): + os.mkdir(COPIES) - if not os.path.exists(os.path.join("copies", "linux")): - os.mkdir(os.path.join("copies", "linux")) + if not os.path.exists(os.path.join(COPIES, LINUX_TUTORIAL)): + os.mkdir(os.path.join(COPIES, LINUX_TUTORIAL)) - if not os.path.exists("parsed_mds"): - os.mkdir("parsed_mds") + if not os.path.exists(PARSED_MDS): + os.mkdir(PARSED_MDS) - if not os.path.exists("if_mangled_files"): - os.mkdir("if_mangled_files") + if not os.path.exists(IF_MANGLED_FILES): + os.mkdir(IF_MANGLED_FILES) ################### define loop-invariant variables ################### # variable that keeps track of the source directories - source_directories = [os.path.join("..", "..", "mkdocs", "docs", "HPC"), - os.path.join("..", "..", "mkdocs", "docs", "HPC", "linux-tutorial")] + source_directories = [os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR), + os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR, LINUX_TUTORIAL)] # list of all the filenames filenames_generic = {} @@ -535,7 +573,7 @@ def main(): all_items = os.listdir(source_directory) files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]] for file in files: - if "linux-tutorial" in source_directory: + if LINUX_TUTORIAL in source_directory: filenames_linux[file] = os.path.join(source_directory, file) else: filenames_generic[file] = os.path.join(source_directory, file) @@ -546,26 +584,26 @@ def main(): ################### define/reset loop specific variables ################### # variable that keeps track of whether file is part of the linux tutorial - is_linux_tutorial = bool("linux-tutorial" in filenames[filename]) + is_linux_tutorial = bool(LINUX_TUTORIAL in filenames[filename]) # make a copy of the original file in order to make sure the original does not get altered if is_linux_tutorial: - copy_file = os.path.join("copies", "linux", filename) + copy_file = os.path.join(COPIES, LINUX_TUTORIAL, filename) else: - copy_file = os.path.join("copies", filename) + copy_file = os.path.join(COPIES, filename) shutil.copyfile(filenames[filename], copy_file) # variable that keeps track of the directories that are used to write in at different levels if is_linux_tutorial: - root_dir_generic = os.path.join("parsed_mds", "generic", "linux_tutorial") - root_dir_os_specific_linux = os.path.join("parsed_mds", "os_specific", "linux", "linux_tutorial") - root_dir_os_specific_windows = os.path.join("parsed_mds", "os_specific", "windows", "linux_tutorial") - root_dir_os_specific_macos = os.path.join("parsed_mds", "os_specific", "macos", "linux_tutorial") + root_dir_generic = os.path.join(PARSED_MDS, GENERIC_DIR, LINUX_TUTORIAL) + root_dir_os_specific_linux = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, LINUX, LINUX_TUTORIAL) + root_dir_os_specific_windows = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, WINDOWS, LINUX_TUTORIAL) + root_dir_os_specific_macos = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, MACOS, LINUX_TUTORIAL) else: - root_dir_generic = os.path.join("parsed_mds", "generic") - root_dir_os_specific_linux = os.path.join("parsed_mds", "os_specific", "linux") - root_dir_os_specific_windows = os.path.join("parsed_mds", "os_specific", "windows") - root_dir_os_specific_macos = os.path.join("parsed_mds", "os_specific", "macos") + root_dir_generic = os.path.join(PARSED_MDS, GENERIC_DIR) + root_dir_os_specific_linux = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, LINUX) + root_dir_os_specific_windows = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, WINDOWS) + root_dir_os_specific_macos = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, MACOS) root_dirs = [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos] # variable for the main title (needed for reference links) @@ -586,7 +624,7 @@ def main(): link_lists = [links_generic, links_linux, links_windows, links_macos] # dictionaries to keep track of current OS - active_OS_if_states = {"linux": "inactive", "windows": "inactive", "macos": "inactive"} + active_OS_if_states = {LINUX: INACTIVE, WINDOWS: INACTIVE, MACOS: INACTIVE} # variable that shows whether the first title has been reached yet after_first_title = False @@ -597,15 +635,8 @@ def main(): ################### actually parse the md file ################### # create directories for the source markdown file - create_directory(root_dir_generic) - create_directory(os.path.join("parsed_mds", "os_specific")) - create_directory(root_dir_os_specific_linux) - create_directory(root_dir_os_specific_windows) - create_directory(root_dir_os_specific_macos) - create_directory(os.path.join(root_dir_generic, curr_dirs[0])) - create_directory(os.path.join(root_dir_os_specific_linux, curr_dirs[0])) - create_directory(os.path.join(root_dir_os_specific_windows, curr_dirs[0])) - create_directory(os.path.join(root_dir_os_specific_macos, curr_dirs[0])) + for directory in [root_dir_generic, os.path.join(PARSED_MDS, OS_SPECIFIC_DIR), root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos, os.path.join(root_dir_generic, curr_dirs[0]), os.path.join(root_dir_os_specific_linux, curr_dirs[0]), os.path.join(root_dir_os_specific_windows, curr_dirs[0]), os.path.join(root_dir_os_specific_macos, curr_dirs[0])]: + os.makedirs(directory, exist_ok=True) # process the jinja macros jinja_parser(filename, copy_file) @@ -630,22 +661,23 @@ def main(): elif after_first_title: # check for if-statements and write the appropriate lines in the right files next_action = check_if_statements(line, active_OS_if_states) - while next_action[0] == "write_text_and_check_extra_message" or next_action[0] == "check_extra_message": - if next_action[0] == "write_text_and_check_extra_message": + while next_action[0] == WRITE_TEXT_AND_CHECK_EXTRA_MESSAGE or next_action[0] == CHECK_EXTRA_MESSAGE: + if next_action[0] == WRITE_TEXT_AND_CHECK_EXTRA_MESSAGE: link_lists = choose_and_write_to_file(next_action[2], active_OS_if_states, last_directory, last_title, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists, in_code_block) next_action = check_if_statements(next_action[1], active_OS_if_states) - if next_action[0] == "write_text": + if next_action[0] == WRITE_TEXT: link_lists = choose_and_write_to_file(next_action[2], active_OS_if_states, last_directory, last_title, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists, in_code_block) # write end of file for the last file for i, OS in enumerate(["", "Linux", "Windows", "macOS"]): write_end_of_file(os.path.join(root_dirs[i], last_directory, last_title + ".json"), OS, link_lists[i], is_linux_tutorial, main_title, last_title) - # remove_directory_tree("copies") - # remove_directory_tree("if_mangled_files") + # remove_directory_tree(COPIES) + # remove_directory_tree(IF_MANGLED_FILES) +################### run the script ################### print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.") main() print("Parsing finished successfully") From ce5235250b0e99e4a60fd03ab150b838c5e4d82e Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 16 Aug 2024 15:47:55 +0200 Subject: [PATCH 040/145] cleaned up parser with macros --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index b36f5c3c471e..b6833632267f 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -273,7 +273,7 @@ def mangle_os_ifs(line, is_os): # this logic isn't flawless, there are number of nested if-constructions that are technically possible that would break this logic, but these don't appear in the documentation as it doesn't make sense to have these if endif_match: - if is_os == OS_IF or is_os == OS_IF_IN_OS_IF: + if is_os in (OS_IF, OS_IF_IN_OS_IF): line = PART_BEFORE_MANGLING + IF_MANGLED_PART + PART_BETWEEN_MANGLING + IF_MANGLED_PART + PART_AFTER_MANGLING added_length += 2 * len(IF_MANGLED_PART) if is_os == OS_IF: @@ -298,7 +298,7 @@ def mangle_os_ifs(line, is_os): is_os = NON_OS_IF else: - if is_os == OS_IF or is_os == OS_IF_IN_OS_IF: + if is_os in (OS_IF, OS_IF_IN_OS_IF): line = PART_BEFORE_MANGLING + IF_MANGLED_PART + PART_BETWEEN_MANGLING + IF_MANGLED_PART + PART_AFTER_MANGLING added_length += 2 * len(IF_MANGLED_PART) From 5db34afdb1a8f8dc4439daaefbd97c9204caa3d9 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 16 Aug 2024 15:52:29 +0200 Subject: [PATCH 041/145] cleaned up parser with macros --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index b6833632267f..a2abc77b798a 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -246,7 +246,7 @@ def mangle_os_ifs(line, is_os): NON_OS_IF: not in an os-if NON_OS_IF_IN_OS_IF: in a non-os-if nested in an os-if OS_IF: in an os-if - OS_IF_IN_OS_IF: in an os-if nested in an os-if} + OS_IF_IN_OS_IF: in an os-if nested in an os-if :return line: the modified line with mangled os-related if-statements """ From 4226d28ddd50e91acb6988317d42de0f881eaea4 Mon Sep 17 00:00:00 2001 From: EwDa291 <100782488+EwDa291@users.noreply.github.com> Date: Mon, 19 Aug 2024 11:17:11 +0200 Subject: [PATCH 042/145] Update README.md --- scripts/HPC_chatbot_preprocessor/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md index e1e12046dd5a..5e895d4d62c0 100644 --- a/scripts/HPC_chatbot_preprocessor/README.md +++ b/scripts/HPC_chatbot_preprocessor/README.md @@ -4,12 +4,12 @@ ## Generated file structure -This directory structure is written as a subdirectory of `parsed_mds`. In `parsed_mds`, two subdirectories can be found: +The generated directory structure is written as a subdirectory of `parsed_mds`. In `parsed_mds`, two subdirectories can be found: - `generic` contains the parts of the markdown sources that were non-OS-specific - `os_specific` contains the parts of the markdown sources that were OS-specific -Withing `os_specific` a further distinction is made for each of the three possible operating systems included in the documentation. +Within `os_specific` a further distinction is made for each of the three possible operating systems included in the documentation. These subdirectories then contain a subdirectory for each individual markdown sourcefile. In the file specific subdirectories, further divisions are made according to the titles and subtitles found in that markdown sourcefile. @@ -26,4 +26,4 @@ The script can be ran in a shell environment with the following command: ```shell python chatbot_parser.py -``` \ No newline at end of file +``` From d730a262f667fc00ce637d4ee7e607f201072c2f Mon Sep 17 00:00:00 2001 From: EwDa291 <100782488+EwDa291@users.noreply.github.com> Date: Mon, 19 Aug 2024 12:37:51 +0200 Subject: [PATCH 043/145] Update README.md --- scripts/HPC_chatbot_preprocessor/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md index 5e895d4d62c0..6e7d0edc71a3 100644 --- a/scripts/HPC_chatbot_preprocessor/README.md +++ b/scripts/HPC_chatbot_preprocessor/README.md @@ -1,6 +1,6 @@ # Chatbot parser -`chatbot_parser.py` is a script that transforms the markdown sourcefiles into a structured directory for a chatbot to be trained on. +`chatbot_parser.py` is a script that transforms the markdown sourcefiles into a structured directory as input for a chatbot. ## Generated file structure From f3182e35b769550f9483a4e690b300c8775e494b Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 19 Aug 2024 13:44:46 +0200 Subject: [PATCH 044/145] added section about restrictions on input files --- scripts/HPC_chatbot_preprocessor/README.md | 92 ++++++++++++++++++++++ 1 file changed, 92 insertions(+) diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md index 6e7d0edc71a3..23c1d87cc44c 100644 --- a/scripts/HPC_chatbot_preprocessor/README.md +++ b/scripts/HPC_chatbot_preprocessor/README.md @@ -27,3 +27,95 @@ The script can be ran in a shell environment with the following command: ```shell python chatbot_parser.py ``` + +## Restrictions on source-files + +Due to the nature of the script, some restrictions should be taken into account about the markdown files it can use as input. + + +### Nested if structures + +The script uses the if-structures in the source-files to split the documentation into general documentation and os-specific documentation. As such it needs to keep track of which types of if-structures (os-related/non-os-related) it is reading from. When using certain nested if-structures, this will cause problems. The supported nested if-structures are determined by the macros `NON_OS_IF`, `NON_OS_IF_IN_OS_IF`, `OS_IF` and `OS_IF_IN_OS_IF`. So respectively a non-os-related if-structure, a non-os-related if nested in an os-related one, an os-related if-structure and an os-related if-structure nested in another os-related if-structure. All of these are allowed to be nested in an undetermined amount of non-os-related if-structures, but no non-os-related if structures should be nested in them. It is also not allowed to nest any of the allowed structures in more os-related if-structures. + +#### Examples of valid and invalid if-structures + +##### Allowed + +###### non-os-related in os-related + +This is an example of one of the basic allowed if-structures + +``` +if OS == windows: + if site == Gent: + ... + endif +endif +``` + +###### os-related in os-related in non-os-related + +This is an example of a basic allowed if-structure nested in a non-os-specific if. + +``` +if site == Gent: + if OS == windows: + ... + else: + if OS == Linux: + ... + endif + endif +endif +``` + +##### Not allowed + +###### non-os-related in os-related in os-related + +This is an example of a non-os-related if-structure nested in one of the basic allowed if-structures. + +``` +if OS != windows: + if OS == Linux: + if site == Gent: + ... + endif + endif +endif +``` + +This will result in the parser "forgetting" it opened an os-specific if-statement with OS != windows and not properly closing it. + +###### os-related in non-os-related in os-related + +This is an example of one of the basic allowed if-structures nested in an os-specific if-structure. + +``` +if OS != windows: + if site == Gent: + if OS == Linux: + ... + endif + endif +endif +``` + +This will also result in the parser "forgetting" it opened an os-specific if-statement with OS != windows and not properly closing it. + +### Allowed html syntax + +The script contains a list of html syntax keywords it filters out. If more html syntax keywords are used in the future, it suffices to add them to this list to adapt the script to filter them out. The current list is: +``` +["pre", "b", "code", "sub", "br", "center", "p", "div", "u", "p", "i", "tt", "a", "t", "span"] +``` +The script is also adapted to take into consideration structures like and retain the link. + +### Markdown comments + +Any comments within the markdown files (for example TODO's) should follow the following syntax: + +``` + +``` + and should be limited to one line. From 675bec5c75d7b0cda95d61867d6a587e7ba13a19 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 19 Aug 2024 13:53:32 +0200 Subject: [PATCH 045/145] adapted section about restrictions on input files --- scripts/HPC_chatbot_preprocessor/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md index 23c1d87cc44c..6899aacf2b0f 100644 --- a/scripts/HPC_chatbot_preprocessor/README.md +++ b/scripts/HPC_chatbot_preprocessor/README.md @@ -43,7 +43,7 @@ The script uses the if-structures in the source-files to split the documentation ###### non-os-related in os-related -This is an example of one of the basic allowed if-structures +This is an example of one of the basic allowed if-structures (`NON_OS_IF_IN_OS_IF`) ``` if OS == windows: @@ -55,7 +55,7 @@ endif ###### os-related in os-related in non-os-related -This is an example of a basic allowed if-structure nested in a non-os-specific if. +This is an example of the basic allowed if-structure `OS_IF_IN_OS_IF` nested in a non-os-specific if. ``` if site == Gent: From f1e58ef776a24eb2bb39bed1de1eb0611a0f60eb Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 19 Aug 2024 13:54:12 +0200 Subject: [PATCH 046/145] adapted section about restrictions on input files --- scripts/HPC_chatbot_preprocessor/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md index 6899aacf2b0f..c18a4ebea64b 100644 --- a/scripts/HPC_chatbot_preprocessor/README.md +++ b/scripts/HPC_chatbot_preprocessor/README.md @@ -73,7 +73,7 @@ endif ###### non-os-related in os-related in os-related -This is an example of a non-os-related if-structure nested in one of the basic allowed if-structures. +This is an example of a non-os-related if-structure nested in one of the basic allowed if-structures (`OS_IF_IN_OS_IF`). ``` if OS != windows: @@ -89,7 +89,7 @@ This will result in the parser "forgetting" it opened an os-specific if-statemen ###### os-related in non-os-related in os-related -This is an example of one of the basic allowed if-structures nested in an os-specific if-structure. +This is an example of the basic allowed if-structure `OS_IF` (indirectly) nested in an os-specific if-structure. ``` if OS != windows: @@ -109,7 +109,7 @@ The script contains a list of html syntax keywords it filters out. If more html ``` ["pre", "b", "code", "sub", "br", "center", "p", "div", "u", "p", "i", "tt", "a", "t", "span"] ``` -The script is also adapted to take into consideration structures like and retain the link. +The script is also adapted to take into consideration structures like `` and retain the link. ### Markdown comments From a16850925bdcfad0f3017578082b727244d8b63b Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 19 Aug 2024 14:37:48 +0200 Subject: [PATCH 047/145] change variables to be lowercase --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index a2abc77b798a..600c2c08c1e3 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -267,14 +267,14 @@ def mangle_os_ifs(line, is_os): pos_second_mangle = constr_match.end() + start_index + added_length - 1 # different parts of the original string - PART_BEFORE_MANGLING = line[:pos_first_mangle] - PART_BETWEEN_MANGLING = line[pos_first_mangle:pos_second_mangle] - PART_AFTER_MANGLING = line[pos_second_mangle:] + part_before_mangling = line[:pos_first_mangle] + part_between_mangling = line[pos_first_mangle:pos_second_mangle] + part_after_mangling = line[pos_second_mangle:] # this logic isn't flawless, there are number of nested if-constructions that are technically possible that would break this logic, but these don't appear in the documentation as it doesn't make sense to have these if endif_match: if is_os in (OS_IF, OS_IF_IN_OS_IF): - line = PART_BEFORE_MANGLING + IF_MANGLED_PART + PART_BETWEEN_MANGLING + IF_MANGLED_PART + PART_AFTER_MANGLING + line = part_before_mangling + IF_MANGLED_PART + part_between_mangling + IF_MANGLED_PART + part_after_mangling added_length += 2 * len(IF_MANGLED_PART) if is_os == OS_IF: is_os = NON_OS_IF @@ -285,7 +285,7 @@ def mangle_os_ifs(line, is_os): elif if_match: if if_os_match: - line = PART_BEFORE_MANGLING + IF_MANGLED_PART + PART_BETWEEN_MANGLING + IF_MANGLED_PART + PART_AFTER_MANGLING + line = part_before_mangling + IF_MANGLED_PART + part_between_mangling + IF_MANGLED_PART + part_after_mangling added_length += 2 * len(IF_MANGLED_PART) if is_os == OS_IF: is_os = OS_IF_IN_OS_IF @@ -299,7 +299,7 @@ def mangle_os_ifs(line, is_os): else: if is_os in (OS_IF, OS_IF_IN_OS_IF): - line = PART_BEFORE_MANGLING + IF_MANGLED_PART + PART_BETWEEN_MANGLING + IF_MANGLED_PART + PART_AFTER_MANGLING + line = part_before_mangling + IF_MANGLED_PART + part_between_mangling + IF_MANGLED_PART + part_after_mangling added_length += 2 * len(IF_MANGLED_PART) start_index += constr_match.end() From 09b86c9fba292b76ddb0c8ecf523e5c73d87c30c Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 19 Aug 2024 14:49:08 +0200 Subject: [PATCH 048/145] take out some copy pasting --- .../chatbot_parser.py | 24 ++++++++----------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 600c2c08c1e3..7b25c5a9d06b 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -431,14 +431,10 @@ def write_text_to_file(file_name, curr_line, link_lists, in_code_block): else: data = {} - if GENERIC_DIR in file_name: - curr_line, link_lists[0] = replace_markdown_markers(curr_line, link_lists[0], in_code_block) - elif LINUX in file_name: - curr_line, link_lists[1] = replace_markdown_markers(curr_line, link_lists[1], in_code_block) - elif WINDOWS in file_name: - curr_line, link_lists[2] = replace_markdown_markers(curr_line, link_lists[2], in_code_block) - else: - curr_line, link_lists[3] = replace_markdown_markers(curr_line, link_lists[3], in_code_block) + os_list = [GENERIC_DIR, LINUX, WINDOWS, MACOS] + for i, os_ in enumerate(os_list): + if os_ in file_name: + curr_line, link_lists[i] = replace_markdown_markers(curr_line, link_lists[i], in_code_block) if CONTENT in data: data[CONTENT] += curr_line @@ -467,12 +463,12 @@ def choose_and_write_to_file(curr_line, active_OS_if_states, last_directory, las # check that the line is part of the website for gent if active_OS_if_states[LINUX] == INACTIVE and active_OS_if_states[WINDOWS] == INACTIVE and active_OS_if_states[MACOS] == INACTIVE: link_lists = write_text_to_file(os.path.join(root_dirs[0], last_directory, last_title + ".json"), curr_line, link_lists, in_code_block) - if active_OS_if_states[LINUX] == ACTIVE: - link_lists = write_text_to_file(os.path.join(root_dirs[1], last_directory, last_title + ".json"), curr_line, link_lists, in_code_block) - if active_OS_if_states[WINDOWS] == ACTIVE: - link_lists = write_text_to_file(os.path.join(root_dirs[2], last_directory, last_title + ".json"), curr_line, link_lists, in_code_block) - if active_OS_if_states[MACOS] == ACTIVE: - link_lists = write_text_to_file(os.path.join(root_dirs[3], last_directory, last_title + ".json"), curr_line, link_lists, in_code_block) + else: + os_list = [LINUX, WINDOWS, MACOS] + for i, os_ in enumerate(os_list): + if active_OS_if_states[os_] == ACTIVE: + link_lists = write_text_to_file(os.path.join(root_dirs[i], last_directory, last_title + ".json"), + curr_line, link_lists, in_code_block) return link_lists From f95b99e203163e5bf0514a4ae4c4af16d1dd50df Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 19 Aug 2024 15:11:04 +0200 Subject: [PATCH 049/145] added warning about long filepaths --- scripts/HPC_chatbot_preprocessor/README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md index c18a4ebea64b..0b715d856500 100644 --- a/scripts/HPC_chatbot_preprocessor/README.md +++ b/scripts/HPC_chatbot_preprocessor/README.md @@ -32,7 +32,6 @@ python chatbot_parser.py Due to the nature of the script, some restrictions should be taken into account about the markdown files it can use as input. - ### Nested if structures The script uses the if-structures in the source-files to split the documentation into general documentation and os-specific documentation. As such it needs to keep track of which types of if-structures (os-related/non-os-related) it is reading from. When using certain nested if-structures, this will cause problems. The supported nested if-structures are determined by the macros `NON_OS_IF`, `NON_OS_IF_IN_OS_IF`, `OS_IF` and `OS_IF_IN_OS_IF`. So respectively a non-os-related if-structure, a non-os-related if nested in an os-related one, an os-related if-structure and an os-related if-structure nested in another os-related if-structure. All of these are allowed to be nested in an undetermined amount of non-os-related if-structures, but no non-os-related if structures should be nested in them. It is also not allowed to nest any of the allowed structures in more os-related if-structures. @@ -119,3 +118,7 @@ Any comments within the markdown files (for example TODO's) should follow the fo ``` and should be limited to one line. + +### Long filenames + +Due to the nature of this script, it can generate directories with very long names. Depending on the operating system, this can cause problems with filepaths being to long resulting in files not being able to open. A possible fix for this is to make sure the filepath to the script is not too long. From 06bb7b9ea18f7cbae70190e1e939eb5952ee09b1 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 19 Aug 2024 15:12:21 +0200 Subject: [PATCH 050/145] fixing typos --- scripts/HPC_chatbot_preprocessor/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md index 0b715d856500..55996e0bef53 100644 --- a/scripts/HPC_chatbot_preprocessor/README.md +++ b/scripts/HPC_chatbot_preprocessor/README.md @@ -119,6 +119,6 @@ Any comments within the markdown files (for example TODO's) should follow the fo ``` and should be limited to one line. -### Long filenames +### Long filepaths -Due to the nature of this script, it can generate directories with very long names. Depending on the operating system, this can cause problems with filepaths being to long resulting in files not being able to open. A possible fix for this is to make sure the filepath to the script is not too long. +Due to the nature of this script, it can generate large directories with very long names. Depending on the operating system, this can cause problems with filepaths being to long, resulting in files not being able to open. A possible fix for this is to make sure the filepath to the script is not too long. From 2f3e5b303a8875fe315592f792addba78f4d0e82 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 19 Aug 2024 17:02:30 +0200 Subject: [PATCH 051/145] take out copy pasting --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 7b25c5a9d06b..c2fe409b4207 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -464,10 +464,9 @@ def choose_and_write_to_file(curr_line, active_OS_if_states, last_directory, las if active_OS_if_states[LINUX] == INACTIVE and active_OS_if_states[WINDOWS] == INACTIVE and active_OS_if_states[MACOS] == INACTIVE: link_lists = write_text_to_file(os.path.join(root_dirs[0], last_directory, last_title + ".json"), curr_line, link_lists, in_code_block) else: - os_list = [LINUX, WINDOWS, MACOS] - for i, os_ in enumerate(os_list): + for i, os_ in enumerate([LINUX, WINDOWS, MACOS]): if active_OS_if_states[os_] == ACTIVE: - link_lists = write_text_to_file(os.path.join(root_dirs[i], last_directory, last_title + ".json"), + link_lists = write_text_to_file(os.path.join(root_dirs[i + 1], last_directory, last_title + ".json"), curr_line, link_lists, in_code_block) return link_lists From 0c4dbe8e02639de7787af8109df7781053101d2a Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 20 Aug 2024 14:39:12 +0200 Subject: [PATCH 052/145] first draft version of the restructured script to accommodate for the new file format --- .../chatbot_parser.py | 302 +++++++++++++++--- .../HPC_chatbot_preprocessor/requirements.txt | 3 +- 2 files changed, 255 insertions(+), 50 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index c2fe409b4207..72aa40292f92 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -4,9 +4,15 @@ import shutil import yaml from itertools import chain -from jinja2 import FileSystemLoader, Environment, ChoiceLoader +from pathlib import Path +from jinja2 import FileSystemLoader, Environment, ChoiceLoader, Template #################### define macro's #################### +# customizable macros +MIN_PARAGRAPH_LENGTH = 128 +MAX_TITLE_DEPTH = 4 +INCLUDE_LINKS_IN_PLAINTEXT = True + # directories PARSED_MDS = "parsed_mds" COPIES = "copies" @@ -24,10 +30,11 @@ LINUX = "linux" WINDOWS = "windows" MACOS = "macos" +GENERIC = "generic" # urls REPO_URL = 'https://github.com/hpcugent/vsc_user_docs' -DOCS_URL = "docs.hpc.ugent.be" +DOCS_URL = "https://docs.hpc.ugent.be" # OS-related if-states ACTIVE = "active" @@ -76,7 +83,7 @@ def check_for_title(curr_line, main_title, last_directory, last_title, curr_dirs # detect titles match = re.match(r'^#+ ', curr_line) - if match and len(match.group(0)) <= 5: + if match and len(match.group(0)) <= MAX_TITLE_DEPTH + 1: logic_output = len(match.group(0)) - 1 else: logic_output = 0 @@ -102,19 +109,37 @@ def check_for_title(curr_line, main_title, last_directory, last_title, curr_dirs os.makedirs(os.path.join(root_dirs[i], curr_dirs[logic_output]), exist_ok=True) # update the higher order current directories - for i in range(logic_output + 1, 4): + for i in range(logic_output + 1, MAX_TITLE_DEPTH + 1): curr_dirs[i] = curr_dirs[logic_output] return logic_output, make_valid_title(curr_line[logic_output + 1:-1].replace(' ', '-')), curr_dirs[logic_output], curr_dirs, link_lists -def replace_markdown_markers(curr_line, linklist, in_code_block): +def check_for_title_simple(line, in_code_block, curr_dirs): + + # detect titles + match = re.match(r'^#+ ', line) + if match and len(match.group(0)) <= 5 and not in_code_block: + title_length = len(match.group(0)) - 1 + curr_dirs[title_length] = os.path.join(curr_dirs[title_length - 1], make_valid_title(line[title_length + 1:-1].replace(' ', '-'))) + + # update the higher order current directories + for i in range(title_length + 1, MAX_TITLE_DEPTH + 1): + curr_dirs[i] = curr_dirs[title_length] + + return title_length + else: + return 0 + + +def replace_markdown_markers(curr_line, linklist, in_code_block, main_title): """ function that replaces certain markdown structures with the equivalent used on the website :param curr_line: the current line on which markdown structures need to be replaced :param linklist: the list used to store links that need to be printed at the end of the file :param in_code_block: boolean indicating whether the current line is part of a code block + :param main_title: the main title of the file that is being processed :return curr_line: the adapted current line :return linklist: the updated linklist """ @@ -128,7 +153,13 @@ def replace_markdown_markers(curr_line, linklist, in_code_block): if matches: for match in matches: curr_line = curr_line.replace(f"[{match[0]}]({match[1]})", match[0] + "[" + str(len(linklist) + 1) + "]") - linklist.append(match[1]) + if ".md" not in match[1]: + if "#" not in match[1]: + linklist.append(match[1]) + else: + linklist.append(DOCS_URL + main_title + "/" + match[1]) + else: + linklist.append(DOCS_URL + match[1].replace(".md", "/").replace("index", "").rstrip("/")) # codeblock (with ``` -> always stands on a separate line, so line can be dropped) if '```' in curr_line: @@ -166,7 +197,6 @@ def replace_markdown_markers(curr_line, linklist, in_code_block): # keep the rest else: - # print("<" + content + ">") pass # structures with !!! (info, tips, warnings) @@ -199,6 +229,91 @@ def replace_markdown_markers(curr_line, linklist, in_code_block): return curr_line, linklist +def split_text(file, main_title): + + # start of assuming we haven't encountered a title + after_first_title = False + + # start of assuming we are not in a code_block + in_code_block = False + + # define initial dictionaries + paragraphs_text = {} + paragraphs_metadata = {} + + # list to keep track of links in the text + link_list = [] + + # list to keep track of the order of the subtitles + subtitle_order = [] + + # variable to keep track of the title level + title_level = 0 + + # list to keep track of most recent directories on each title level + if LINUX_TUTORIAL not in file: + curr_dirs = [main_title for _ in range(MAX_TITLE_DEPTH + 1)] + else: + curr_dirs = [os.path.join(LINUX_TUTORIAL, main_title) for _ in range(MAX_TITLE_DEPTH + 1)] + + with open(file, 'r') as readfile: + + for line in readfile: + + # keep track of title level and directory to write to metadata upon discovering a new subtitle + if title_level > 0: + last_title_level = title_level + last_dir = curr_dirs[last_title_level] + + title_level = check_for_title_simple(line, in_code_block, curr_dirs) + + # detect codeblocks to make sure titles aren't detected in them + if '```' in line or (('
' in line) ^ ('
' in line)): + in_code_block = not in_code_block + + # line is a title with a maximum depth of 4 + if title_level > 0: + if after_first_title: + paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, last_dir) + title = make_valid_title(line[title_level + 1:-1]) + + # create an entry for the file in the paragraphs text dictionary + paragraphs_text[title] = "" + + after_first_title = True + subtitle_order.append(title) + + # reset link_list + link_list = [] + + # line is not a title + elif after_first_title: + line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title) + if title in paragraphs_text.keys() and line != "\n": + paragraphs_text[title] += line + elif line != "\n": + paragraphs_text[title] = line + + # write metadata for the last file + paragraphs_metadata[title] = write_metadata(main_title, title, link_list, title_level, curr_dirs[last_title_level]) + + return paragraphs_text, paragraphs_metadata, subtitle_order + + +def write_metadata(main_title, subtitle, links, title_level, directory): + + paragraph_metadata = {'main_title': main_title, 'subtitle': subtitle, 'title_depth': title_level, 'directory': directory} + + if len(links) > 0: + paragraph_metadata['links'] = {} + for i, link in enumerate(links): + paragraph_metadata['links'][str(i)] = link + + paragraph_metadata['parent_title'] = Path(directory).parent.name + + return paragraph_metadata + + def jinja_parser(filename, copy_location): """ function that let's jinja do its thing to format the files except for the os-related if-statements @@ -434,7 +549,7 @@ def write_text_to_file(file_name, curr_line, link_lists, in_code_block): os_list = [GENERIC_DIR, LINUX, WINDOWS, MACOS] for i, os_ in enumerate(os_list): if os_ in file_name: - curr_line, link_lists[i] = replace_markdown_markers(curr_line, link_lists[i], in_code_block) + curr_line, link_lists[i] = replace_markdown_markers(curr_line, link_lists[i], in_code_block, "placeholder") if CONTENT in data: data[CONTENT] += curr_line @@ -532,6 +647,66 @@ def make_valid_title(title): return valid_filename +def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number): + + # make the directory needed for the files that will be written + filepath = os.path.join(PARSED_MDS, GENERIC_DIR, paragraphs_metadata[title]["directory"]) + os.makedirs(filepath) + + write_files(title, paragraphs_text[title], paragraphs_metadata, title_order, title_order_number, filepath, OS=GENERIC) + + +def write_os_specific_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number): + for i, OS in enumerate([LINUX, WINDOWS, MACOS]): + + # Unmangle if's to use jinja parser + paragraphs_text[title] = re.sub(IF_MANGLED_PART, "", paragraphs_text[title]) + + # Use jinja to render a different version of the text for each OS + template = Template(paragraphs_text[title]) + text = template.render(OS=OS) + + # define the filepath + filepath = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, OS, paragraphs_metadata[title]["directory"]) + os.makedirs(filepath) + + # write the files + write_files(title, text, paragraphs_metadata, title_order, title_order_number, filepath, OS) + + +def write_files(title, text, paragraphs_metadata, title_order, title_order_number, filepath, OS): + # write text file + with open(os.path.join(filepath, paragraphs_metadata[title]["subtitle"] + ".txt"), 'w') as writefile: + writefile.write(text) + + # write metadata + metadata = paragraphs_metadata[title] + + if title_order_number != 0: + metadata["previous_title"] = title_order[title_order_number - 1] + else: + metadata["previous_title"] = None + + if title_order_number != len(title_order) - 1: + metadata["next_title"] = title_order[title_order_number + 1] + else: + metadata["next_title"] = None + + metadata["OS"] = OS + + if bool(LINUX_TUTORIAL in paragraphs_metadata[title]["directory"]): + linux_part = LINUX_TUTORIAL + "/" + else: + linux_part = "" + if OS == GENERIC: + os_part = "" + else: + os_part = OS + "/" + metadata["reference_link"] = DOCS_URL + "/" + os_part + linux_part + paragraphs_metadata[title]["main_title"] + "/#" + ''.join(char.lower() for char in title if char.isalnum() or char == '-').strip('-') + + with open(os.path.join(filepath, paragraphs_metadata[title]["subtitle"] + "_metadata.json"), 'w') as writefile: + json.dump(metadata, writefile, indent=4) + def main(): """ main function @@ -557,21 +732,27 @@ def main(): ################### define loop-invariant variables ################### - # variable that keeps track of the source directories - source_directories = [os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR), - os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR, LINUX_TUTORIAL)] - - # list of all the filenames + # # variable that keeps track of the source directories + # source_directories = [os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR), + # os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR, LINUX_TUTORIAL)] + # + # # list of all the filenames + # filenames_generic = {} + # filenames_linux = {} + # for source_directory in source_directories: + # all_items = os.listdir(source_directory) + # files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]] + # for file in files: + # if LINUX_TUTORIAL in source_directory: + # filenames_linux[file] = os.path.join(source_directory, file) + # else: + # filenames_generic[file] = os.path.join(source_directory, file) + + # Temporary variables to test with just one singular file filenames_generic = {} filenames_linux = {} - for source_directory in source_directories: - all_items = os.listdir(source_directory) - files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]] - for file in files: - if LINUX_TUTORIAL in source_directory: - filenames_linux[file] = os.path.join(source_directory, file) - else: - filenames_generic[file] = os.path.join(source_directory, file) + filenames_generic["getting_started.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/getting_started.md" + filenames_linux["beyond_the_basics.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/linux-tutorial/beyond_the_basics.md" # for loops over all files for filenames in [filenames_generic, filenames_linux]: @@ -621,6 +802,10 @@ def main(): # dictionaries to keep track of current OS active_OS_if_states = {LINUX: INACTIVE, WINDOWS: INACTIVE, MACOS: INACTIVE} + # dictionaries to save the paragraphs and metadata before it is written to files + paragraphs_text = {} + paragraphs_metadata = {} + # variable that shows whether the first title has been reached yet after_first_title = False @@ -636,37 +821,56 @@ def main(): # process the jinja macros jinja_parser(filename, copy_file) - # open the file and store line by line in the right file - with open(copy_file, 'r') as readfile: - - for line in readfile: - title_level, title, directory, curr_dirs, link_lists = check_for_title(line, main_title, last_directory, last_title, curr_dirs, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists, is_linux_tutorial, in_code_block) + # split the text in paragraphs + paragraphs_text, paragraphs_metadata, subtitle_order = split_text(copy_file, main_title) - # detect codeblocks to make sure titles aren't detected in them - if '```' in line or (('
' in line) ^ ('
' in line)): - in_code_block = not in_code_block + # for every section, either make the whole section generic, or create an os-specific file for each OS + for i, subtitle in enumerate(subtitle_order): - # line is a title with a maximum depth of 4 - if title_level > 0: - last_title = title - last_directory = directory - after_first_title = True + # generic + if IF_MANGLED_PART not in paragraphs_text[subtitle]: + write_generic_file(subtitle, paragraphs_text, paragraphs_metadata, subtitle_order, i) - # line is not a title - elif after_first_title: - # check for if-statements and write the appropriate lines in the right files - next_action = check_if_statements(line, active_OS_if_states) - while next_action[0] == WRITE_TEXT_AND_CHECK_EXTRA_MESSAGE or next_action[0] == CHECK_EXTRA_MESSAGE: - if next_action[0] == WRITE_TEXT_AND_CHECK_EXTRA_MESSAGE: - link_lists = choose_and_write_to_file(next_action[2], active_OS_if_states, last_directory, last_title, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists, in_code_block) - next_action = check_if_statements(next_action[1], active_OS_if_states) - - if next_action[0] == WRITE_TEXT: - link_lists = choose_and_write_to_file(next_action[2], active_OS_if_states, last_directory, last_title, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists, in_code_block) - - # write end of file for the last file - for i, OS in enumerate(["", "Linux", "Windows", "macOS"]): - write_end_of_file(os.path.join(root_dirs[i], last_directory, last_title + ".json"), OS, link_lists[i], is_linux_tutorial, main_title, last_title) + # os-specific + else: + write_os_specific_file(subtitle, paragraphs_text, paragraphs_metadata, subtitle_order, i) + + + # # open the file and store line by line in the right file + # with open(copy_file, 'r') as readfile: + # + # for line in readfile: + # title_level, title, directory, curr_dirs, link_lists = check_for_title(line, main_title, last_directory, last_title, curr_dirs, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists, is_linux_tutorial, in_code_block) + # + # # detect codeblocks to make sure titles aren't detected in them + # if '```' in line or (('
' in line) ^ ('
' in line)): + # in_code_block = not in_code_block + # + # # line is a title with a maximum depth of 4 + # if title_level > 0: + # last_title = title + # last_directory = directory + # after_first_title = True + # + # # line is not a title + # elif after_first_title: + # # check for if-statements and write the appropriate lines in the right files + # next_action = check_if_statements(line, active_OS_if_states) + # while next_action[0] == WRITE_TEXT_AND_CHECK_EXTRA_MESSAGE or next_action[0] == CHECK_EXTRA_MESSAGE: + # if next_action[0] == WRITE_TEXT_AND_CHECK_EXTRA_MESSAGE: + # link_lists = choose_and_write_to_file(next_action[2], active_OS_if_states, last_directory, last_title, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists, in_code_block) + # next_action = check_if_statements(next_action[1], active_OS_if_states) + # + # if next_action[0] == WRITE_TEXT: + # link_lists = choose_and_write_to_file(next_action[2], active_OS_if_states, last_directory, last_title, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists, in_code_block) + # + # # write end of file for the last file + # for i, OS in enumerate(["", "Linux", "Windows", "macOS"]): + # write_end_of_file(os.path.join(root_dirs[i], last_directory, last_title + ".json"), OS, link_lists[i], is_linux_tutorial, main_title, last_title) + + print(paragraphs_text) + print(paragraphs_metadata) + print(subtitle_order) # remove_directory_tree(COPIES) # remove_directory_tree(IF_MANGLED_FILES) diff --git a/scripts/HPC_chatbot_preprocessor/requirements.txt b/scripts/HPC_chatbot_preprocessor/requirements.txt index 19ed8a2a29de..907f08fda77f 100644 --- a/scripts/HPC_chatbot_preprocessor/requirements.txt +++ b/scripts/HPC_chatbot_preprocessor/requirements.txt @@ -3,4 +3,5 @@ re shutil pypandoc yaml -jinja2 \ No newline at end of file +jinja2 +pathlib \ No newline at end of file From 38c45723441d13cde3c799f4aa76bff9f4093bfe Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 20 Aug 2024 14:42:28 +0200 Subject: [PATCH 053/145] added support to filter out collapsable admonitions --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 72aa40292f92..4c75df17af00 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -203,6 +203,10 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title): if '!!!' in curr_line: curr_line = re.sub(r'!!!', "", curr_line) + # structures with ??? (collapsable admonitions) + if '???' in curr_line: + curr_line = re.sub(r'\?\?\?', "", curr_line) + # get rid of other markdown indicators (`, *, +, _) if not in_code_block: From 5cbd6533333b0226d812fec08c62b3001ba53ade Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 20 Aug 2024 16:49:51 +0200 Subject: [PATCH 054/145] attempt at fix for problems with jinja include, not working yet --- .../chatbot_parser.py | 51 ++++++++++++++++--- 1 file changed, 43 insertions(+), 8 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 4c75df17af00..c1bd1a99b608 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -254,6 +254,9 @@ def split_text(file, main_title): # variable to keep track of the title level title_level = 0 + # variable to allow for if statements to "continue" over multiple paragraphs + open_ifs = "" + # list to keep track of most recent directories on each title level if LINUX_TUTORIAL not in file: curr_dirs = [main_title for _ in range(MAX_TITLE_DEPTH + 1)] @@ -278,11 +281,12 @@ def split_text(file, main_title): # line is a title with a maximum depth of 4 if title_level > 0: if after_first_title: + paragraphs_text[title], open_ifs = close_ifs(paragraphs_text[title]) paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, last_dir) title = make_valid_title(line[title_level + 1:-1]) # create an entry for the file in the paragraphs text dictionary - paragraphs_text[title] = "" + paragraphs_text[title] = open_ifs after_first_title = True subtitle_order.append(title) @@ -318,6 +322,38 @@ def write_metadata(main_title, subtitle, links, title_level, directory): return paragraph_metadata +def close_ifs(text): + patterns = { + 'if': r'({' + IF_MANGLED_PART + r'%[-\s]*if\s+OS\s*[!=]=\s*.+?[-\s]*%' + IF_MANGLED_PART + '})', + 'endif': r'({' + IF_MANGLED_PART + r'%\s*-?\s*endif\s*-?\s*%' + IF_MANGLED_PART + '})', + 'else': r'({' + IF_MANGLED_PART + r'%\s*-?\s*else\s*-?\s*%' + IF_MANGLED_PART + '})' + } + if_count = len(re.findall(patterns['if'], text.replace("\n", ""))) + endif_count = len(re.findall(patterns['endif'], text.replace("\n", ""))) + if IF_MANGLED_PART not in text or if_count == endif_count: + return text, "" + else: + + # Find all matches for each pattern + matches = [] + for key, pattern in patterns.items(): + for match in re.finditer(pattern, text): + matches.append(match) + + # sort the matches according to their start index + matches.sort(key=lambda x: x.start()) + + # extract the strings from the matches + open_ifs = [] + for match in matches: + open_ifs.append(match.group(0)) + + # Concatenate all matches into a single string + open_ifs = ''.join(open_ifs) + + return text + r'{' + IF_MANGLED_PART + '% endif %' + IF_MANGLED_PART + '}', open_ifs + + def jinja_parser(filename, copy_location): """ function that let's jinja do its thing to format the files except for the os-related if-statements @@ -380,6 +416,7 @@ def mangle_os_ifs(line, is_os): if_match = re.search(r'if ', match.group(1)) if_os_match = re.search(r'if OS ', match.group(1)) endif_match = re.search(r'endif', match.group(1)) + else_match = re.search(r'else', match.group(1)) # mangle positions pos_first_mangle = constr_match.start() + start_index + added_length + 1 @@ -416,7 +453,7 @@ def mangle_os_ifs(line, is_os): else: is_os = NON_OS_IF - else: + elif else_match: if is_os in (OS_IF, OS_IF_IN_OS_IF): line = part_before_mangling + IF_MANGLED_PART + part_between_mangling + IF_MANGLED_PART + part_after_mangling added_length += 2 * len(IF_MANGLED_PART) @@ -655,7 +692,7 @@ def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, # make the directory needed for the files that will be written filepath = os.path.join(PARSED_MDS, GENERIC_DIR, paragraphs_metadata[title]["directory"]) - os.makedirs(filepath) + os.makedirs(filepath, exist_ok=True) write_files(title, paragraphs_text[title], paragraphs_metadata, title_order, title_order_number, filepath, OS=GENERIC) @@ -672,7 +709,7 @@ def write_os_specific_file(title, paragraphs_text, paragraphs_metadata, title_or # define the filepath filepath = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, OS, paragraphs_metadata[title]["directory"]) - os.makedirs(filepath) + os.makedirs(filepath, exist_ok=True) # write the files write_files(title, text, paragraphs_metadata, title_order, title_order_number, filepath, OS) @@ -761,6 +798,7 @@ def main(): # for loops over all files for filenames in [filenames_generic, filenames_linux]: for filename in filenames.keys(): + # print(filename) ################### define/reset loop specific variables ################### # variable that keeps track of whether file is part of the linux tutorial @@ -830,6 +868,7 @@ def main(): # for every section, either make the whole section generic, or create an os-specific file for each OS for i, subtitle in enumerate(subtitle_order): + # print(subtitle) # generic if IF_MANGLED_PART not in paragraphs_text[subtitle]: @@ -872,10 +911,6 @@ def main(): # for i, OS in enumerate(["", "Linux", "Windows", "macOS"]): # write_end_of_file(os.path.join(root_dirs[i], last_directory, last_title + ".json"), OS, link_lists[i], is_linux_tutorial, main_title, last_title) - print(paragraphs_text) - print(paragraphs_metadata) - print(subtitle_order) - # remove_directory_tree(COPIES) # remove_directory_tree(IF_MANGLED_FILES) From 0e6f8b27f19c2256880960c9ee48b680045c5419 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 21 Aug 2024 10:02:41 +0200 Subject: [PATCH 055/145] fixed an issue with jinja templates --- .../chatbot_parser.py | 60 ++++++++++++------- 1 file changed, 39 insertions(+), 21 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index c1bd1a99b608..e72dc0643deb 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -5,7 +5,7 @@ import yaml from itertools import chain from pathlib import Path -from jinja2 import FileSystemLoader, Environment, ChoiceLoader, Template +from jinja2 import FileSystemLoader, Environment, ChoiceLoader, FunctionLoader, Template #################### define macro's #################### # customizable macros @@ -381,7 +381,7 @@ def jinja_parser(filename, copy_location): mangle_ifs(copy_location, filename) # Use Jinja2 to replace the macros - template_loader = ChoiceLoader([FileSystemLoader(searchpath=IF_MANGLED_FILES), FileSystemLoader(searchpath=os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR))]) + template_loader = ChoiceLoader([FileSystemLoader(searchpath=[IF_MANGLED_FILES, os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR)]), FunctionLoader(load_macros)]) templateEnv = Environment(loader=template_loader) template = templateEnv.get_template(filename) rendered_content = template.render(combined_context) @@ -391,6 +391,24 @@ def jinja_parser(filename, copy_location): output_file.write(rendered_content) +def load_macros(name): + """ + function used by the jinja FunctionLoader to retrieve templates from the macros folder since the normal FileSystemLoader can't locate them properly + + :param name: name of the package + :return: + """ + + macros_location = os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, "macros") + + if "../macros/" in name: + package_name = name.split("../macros/")[1] + file_location = os.path.join(macros_location, package_name) + + with open(file_location, 'r') as readfile: + return readfile.read() + + def mangle_os_ifs(line, is_os): """ function that mangles the os-related if-statements. This is needed because we want to keep these if-statements intact after jinja-parsing to build the directory structure. @@ -773,27 +791,27 @@ def main(): ################### define loop-invariant variables ################### - # # variable that keeps track of the source directories - # source_directories = [os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR), - # os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR, LINUX_TUTORIAL)] - # - # # list of all the filenames - # filenames_generic = {} - # filenames_linux = {} - # for source_directory in source_directories: - # all_items = os.listdir(source_directory) - # files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]] - # for file in files: - # if LINUX_TUTORIAL in source_directory: - # filenames_linux[file] = os.path.join(source_directory, file) - # else: - # filenames_generic[file] = os.path.join(source_directory, file) - - # Temporary variables to test with just one singular file + # variable that keeps track of the source directories + source_directories = [os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR), + os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR, LINUX_TUTORIAL)] + + # list of all the filenames filenames_generic = {} filenames_linux = {} - filenames_generic["getting_started.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/getting_started.md" - filenames_linux["beyond_the_basics.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/linux-tutorial/beyond_the_basics.md" + for source_directory in source_directories: + all_items = os.listdir(source_directory) + files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]] + for file in files: + if LINUX_TUTORIAL in source_directory: + filenames_linux[file] = os.path.join(source_directory, file) + else: + filenames_generic[file] = os.path.join(source_directory, file) + + # # Temporary variables to test with just one singular file + # filenames_generic = {} + # filenames_linux = {} + # filenames_generic["getting_started.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/getting_started.md" + # filenames_linux["beyond_the_basics.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/linux-tutorial/beyond_the_basics.md" # for loops over all files for filenames in [filenames_generic, filenames_linux]: From cd778370a6cab55700d3e66745a049d9a644b3f9 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 21 Aug 2024 10:31:10 +0200 Subject: [PATCH 056/145] added docstrings to new functions --- .../chatbot_parser.py | 79 +++++++++++++++++-- 1 file changed, 73 insertions(+), 6 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index e72dc0643deb..938da0628c8d 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -62,7 +62,7 @@ ################### define functions ################### -def check_for_title(curr_line, main_title, last_directory, last_title, curr_dirs, root_dirs, link_lists, is_linux_tutorial_, in_code_block_): +def check_for_title_xl(curr_line, main_title, last_directory, last_title, curr_dirs, root_dirs, link_lists, is_linux_tutorial_, in_code_block_): """ function that uses the check_for_title_logic function to create the appropriate directories and update the necessary variables @@ -115,8 +115,15 @@ def check_for_title(curr_line, main_title, last_directory, last_title, curr_dirs return logic_output, make_valid_title(curr_line[logic_output + 1:-1].replace(' ', '-')), curr_dirs[logic_output], curr_dirs, link_lists -def check_for_title_simple(line, in_code_block, curr_dirs): +def check_for_title(line, in_code_block, curr_dirs): + """ + function that checks for titles in the current line. Used by split_text to split the text among the subtitles + :param line: the current line to be checked for a title + :param in_code_block: boolean indicating whether the current line is part of a codeblock to be sure comments aren't counted as titles + :param curr_dirs: the current working directories for each level of subtitle, to be updated when a new title is found + :return title_length: The amount of hashtags in front of the title on the current line + """ # detect titles match = re.match(r'^#+ ', line) if match and len(match.group(0)) <= 5 and not in_code_block: @@ -234,6 +241,14 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title): def split_text(file, main_title): + """ + Function that splits the text into smaller sections and makes them into two dictionaries containing text and metadata + :param file: the filepath of the file to be split + :param main_title: the main title of the file + :return paragraphs_text: dictionary containing the split sections of text + :return paragraphs_metadata: dictionary containing the metadata of each split section of text + :return subtitle_order: list containing all encountered subtitles in order of appearance + """ # start of assuming we haven't encountered a title after_first_title = False @@ -272,7 +287,7 @@ def split_text(file, main_title): last_title_level = title_level last_dir = curr_dirs[last_title_level] - title_level = check_for_title_simple(line, in_code_block, curr_dirs) + title_level = check_for_title(line, in_code_block, curr_dirs) # detect codeblocks to make sure titles aren't detected in them if '```' in line or (('
' in line) ^ ('
' in line)): @@ -309,6 +324,16 @@ def split_text(file, main_title): def write_metadata(main_title, subtitle, links, title_level, directory): + """ + Function that writes metadata about a text section to a dictionary + + :param main_title: The main title of the file containing the section + :param subtitle: the title of the section + :param links: a list of links contained within the section + :param title_level: the depth of the title of the section + :param directory: the directory where the section will eventually be written (can either be generic or os-specific) + :return paragraph_metadata: dictionary containing the metadata about the section + """ paragraph_metadata = {'main_title': main_title, 'subtitle': subtitle, 'title_depth': title_level, 'directory': directory} @@ -323,6 +348,17 @@ def write_metadata(main_title, subtitle, links, title_level, directory): def close_ifs(text): + """ + Function to check whether all if-statements in a section are closed properly. If that is not the case, the function + closes all if-statements at the end of the section and returns a prefix for the next section containing all if-statements + of the section it is processing. This needs to be done because the start of the next section would also be contained within the + last unclosed if-statement of its previous section. + + :param text: the text of the section it checks + :return text: the adapted text where all if-statements are closed + :return prefix: the prefix for the next section + """ + patterns = { 'if': r'({' + IF_MANGLED_PART + r'%[-\s]*if\s+OS\s*[!=]=\s*.+?[-\s]*%' + IF_MANGLED_PART + '})', 'endif': r'({' + IF_MANGLED_PART + r'%\s*-?\s*endif\s*-?\s*%' + IF_MANGLED_PART + '})', @@ -707,6 +743,16 @@ def make_valid_title(title): def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number): + """ + Function that writes text and metadata of a generic (non-os-specific) file + + :param title: title of section + :param paragraphs_text: dictionary containing all paragraphs of text + :param paragraphs_metadata: dictionary containing the metadata for all paragraphs of text + :param title_order: list containing all subtitles in order + :param title_order_number: order number of the title of the section that is being written + :return: + """ # make the directory needed for the files that will be written filepath = os.path.join(PARSED_MDS, GENERIC_DIR, paragraphs_metadata[title]["directory"]) @@ -716,6 +762,16 @@ def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, def write_os_specific_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number): + """ + Function that writes text and metadata of os-specific files + + :param title: title of section + :param paragraphs_text: dictionary containing all paragraphs of text + :param paragraphs_metadata: dictionary containing the metadata for all paragraphs of text + :param title_order: list containing all subtitles in order + :param title_order_number: order number of the title of the section that is being written + :return: + """ for i, OS in enumerate([LINUX, WINDOWS, MACOS]): # Unmangle if's to use jinja parser @@ -734,6 +790,19 @@ def write_os_specific_file(title, paragraphs_text, paragraphs_metadata, title_or def write_files(title, text, paragraphs_metadata, title_order, title_order_number, filepath, OS): + """ + Function to write files to a certain filepath + + :param title: title of the section to be written + :param text: section of text to be written + :param paragraphs_metadata: dictionary containing the metadata for all paragraphs of text + :param title_order: list containing all subtitles in order + :param title_order_number: order number of the title of the section that is being written + :param filepath: filepath to write files to + :param OS: OS to be included in the metadata + :return: + """ + # write text file with open(os.path.join(filepath, paragraphs_metadata[title]["subtitle"] + ".txt"), 'w') as writefile: writefile.write(text) @@ -816,7 +885,6 @@ def main(): # for loops over all files for filenames in [filenames_generic, filenames_linux]: for filename in filenames.keys(): - # print(filename) ################### define/reset loop specific variables ################### # variable that keeps track of whether file is part of the linux tutorial @@ -896,12 +964,11 @@ def main(): else: write_os_specific_file(subtitle, paragraphs_text, paragraphs_metadata, subtitle_order, i) - # # open the file and store line by line in the right file # with open(copy_file, 'r') as readfile: # # for line in readfile: - # title_level, title, directory, curr_dirs, link_lists = check_for_title(line, main_title, last_directory, last_title, curr_dirs, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists, is_linux_tutorial, in_code_block) + # title_level, title, directory, curr_dirs, link_lists = check_for_title_xl(line, main_title, last_directory, last_title, curr_dirs, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists, is_linux_tutorial, in_code_block) # # # detect codeblocks to make sure titles aren't detected in them # if '```' in line or (('
' in line) ^ ('
' in line)): From 98eb695790b30cfbde32c0b837a318ed11c88d59 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 21 Aug 2024 11:24:35 +0200 Subject: [PATCH 057/145] only add necessary if-statements in front of non-if-complete sections --- .../chatbot_parser.py | 28 +++++++++++++++---- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 938da0628c8d..e15fce4f049c 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -384,10 +384,28 @@ def close_ifs(text): for match in matches: open_ifs.append(match.group(0)) + # only include the non-closed if-statements + changed = True + while changed: + changed = False + last_if = -1 + last_else = -1 + for i, if_part in enumerate(open_ifs): + if re.search(patterns['if'], if_part): + last_if = i + elif re.search(patterns['else'], if_part): + last_else = i + elif re.search(patterns['endif'], if_part): + changed = True + del open_ifs[i] + if last_else > last_if: + del open_ifs[last_else] + del open_ifs[last_if] + # Concatenate all matches into a single string open_ifs = ''.join(open_ifs) - return text + r'{' + IF_MANGLED_PART + '% endif %' + IF_MANGLED_PART + '}', open_ifs + return text + (r'{' + IF_MANGLED_PART + '% endif %' + IF_MANGLED_PART + '}')*(if_count - endif_count), open_ifs def jinja_parser(filename, copy_location): @@ -451,7 +469,7 @@ def mangle_os_ifs(line, is_os): We don't want to mangle all if-related statements (such as else and endif) so we need to keep track of the context of the last few if-statements. :param line: the current line to check for os-related if-statements - :param is_os: variable keep track of the current os-state of the if-statements. Can be NON_OS_IF, NON_OS_IF_IN_OS_IF, OS_IF or OS_IF_IN_OS_IF + :param is_os: variable keep track of the current os-state of the if-statements. Can be NON_OS_IF, NON_OS_IF_IN_OS_IF, OS_IF or OS_IF_IN_OS_IF NON_OS_IF: not in an os-if NON_OS_IF_IN_OS_IF: in a non-os-if nested in an os-if OS_IF: in an os-if @@ -492,7 +510,7 @@ def mangle_os_ifs(line, is_os): is_os = OS_IF elif is_os == NON_OS_IF_IN_OS_IF: is_os = OS_IF - + elif if_match: if if_os_match: line = part_before_mangling + IF_MANGLED_PART + part_between_mangling + IF_MANGLED_PART + part_after_mangling @@ -506,7 +524,7 @@ def mangle_os_ifs(line, is_os): is_os = NON_OS_IF_IN_OS_IF else: is_os = NON_OS_IF - + elif else_match: if is_os in (OS_IF, OS_IF_IN_OS_IF): line = part_before_mangling + IF_MANGLED_PART + part_between_mangling + IF_MANGLED_PART + part_after_mangling @@ -879,7 +897,7 @@ def main(): # # Temporary variables to test with just one singular file # filenames_generic = {} # filenames_linux = {} - # filenames_generic["getting_started.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/getting_started.md" + # filenames_generic["account.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/account.md" # filenames_linux["beyond_the_basics.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/linux-tutorial/beyond_the_basics.md" # for loops over all files From 27457e371bdb494c06ac73a6cf4263a69d389631 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 21 Aug 2024 12:12:45 +0200 Subject: [PATCH 058/145] fixed some more jinja problems --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index e15fce4f049c..6bc9df169e3b 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -401,6 +401,7 @@ def close_ifs(text): if last_else > last_if: del open_ifs[last_else] del open_ifs[last_if] + break # Concatenate all matches into a single string open_ifs = ''.join(open_ifs) @@ -795,10 +796,16 @@ def write_os_specific_file(title, paragraphs_text, paragraphs_metadata, title_or # Unmangle if's to use jinja parser paragraphs_text[title] = re.sub(IF_MANGLED_PART, "", paragraphs_text[title]) + # slightly alter if-statements to be able to use predefined macros + paragraphs_text[title] = re.sub(OS, '"' + OS + '"', paragraphs_text[title]) + # Use jinja to render a different version of the text for each OS template = Template(paragraphs_text[title]) text = template.render(OS=OS) + # readjust text to correct overcorrections + text = re.sub('"' + OS + '"', OS, text) + # define the filepath filepath = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, OS, paragraphs_metadata[title]["directory"]) os.makedirs(filepath, exist_ok=True) From bb722876b2734c5a9deba84128fae6713e499652 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 21 Aug 2024 12:33:18 +0200 Subject: [PATCH 059/145] implemented extra test to make sure generic files dont accidentally get flagged as os-specific --- .../chatbot_parser.py | 28 ++++++++++++------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 6bc9df169e3b..a7d2fd5b5e2e 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -791,7 +791,8 @@ def write_os_specific_file(title, paragraphs_text, paragraphs_metadata, title_or :param title_order_number: order number of the title of the section that is being written :return: """ - for i, OS in enumerate([LINUX, WINDOWS, MACOS]): + text = {} + for OS in [LINUX, WINDOWS, MACOS]: # Unmangle if's to use jinja parser paragraphs_text[title] = re.sub(IF_MANGLED_PART, "", paragraphs_text[title]) @@ -801,17 +802,24 @@ def write_os_specific_file(title, paragraphs_text, paragraphs_metadata, title_or # Use jinja to render a different version of the text for each OS template = Template(paragraphs_text[title]) - text = template.render(OS=OS) + text[OS] = template.render(OS=OS) # readjust text to correct overcorrections - text = re.sub('"' + OS + '"', OS, text) - - # define the filepath - filepath = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, OS, paragraphs_metadata[title]["directory"]) - os.makedirs(filepath, exist_ok=True) - - # write the files - write_files(title, text, paragraphs_metadata, title_order, title_order_number, filepath, OS) + text[OS] = re.sub('"' + OS + '"', OS, text[OS]) + + # check that not all versions are the same + unique_texts = set(text.values()) + if len(unique_texts) > 1: + for OS in [LINUX, WINDOWS, MACOS]: + # define the filepath + filepath = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, OS, paragraphs_metadata[title]["directory"]) + os.makedirs(filepath, exist_ok=True) + + # write the files + write_files(title, text[OS], paragraphs_metadata, title_order, title_order_number, filepath, OS) + else: + paragraphs_text[title] = text[OS] + write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number) def write_files(title, text, paragraphs_metadata, title_order, title_order_number, filepath, OS): From 67cb19e874e64d46564a8a5d34abc64c2a65e2a5 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 21 Aug 2024 13:22:56 +0200 Subject: [PATCH 060/145] make sure empty os-specific files are not saved --- .../chatbot_parser.py | 23 +++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index a7d2fd5b5e2e..2c78ad90df3e 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -809,17 +809,22 @@ def write_os_specific_file(title, paragraphs_text, paragraphs_metadata, title_or # check that not all versions are the same unique_texts = set(text.values()) - if len(unique_texts) > 1: - for OS in [LINUX, WINDOWS, MACOS]: - # define the filepath - filepath = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, OS, paragraphs_metadata[title]["directory"]) - os.makedirs(filepath, exist_ok=True) - - # write the files - write_files(title, text[OS], paragraphs_metadata, title_order, title_order_number, filepath, OS) - else: + if len(unique_texts) == 1: paragraphs_text[title] = text[OS] write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number) + else: + for OS in [LINUX, WINDOWS, MACOS]: + # check that file actually has some content + if len(text[OS]) > 0: + # define the filepath + filepath = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, OS, paragraphs_metadata[title]["directory"]) + os.makedirs(filepath, exist_ok=True) + + # write the files + write_files(title, text[OS], paragraphs_metadata, title_order, title_order_number, filepath, OS) + else: + # don't write empty files + pass def write_files(title, text, paragraphs_metadata, title_order, title_order_number, filepath, OS): From cf9834a25aa1ab3e690cefa76705888605afae2b Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 21 Aug 2024 13:27:05 +0200 Subject: [PATCH 061/145] clean up unused code --- .../chatbot_parser.py | 294 ------------------ 1 file changed, 294 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 2c78ad90df3e..51b4efa00b20 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -62,58 +62,6 @@ ################### define functions ################### -def check_for_title_xl(curr_line, main_title, last_directory, last_title, curr_dirs, root_dirs, link_lists, is_linux_tutorial_, in_code_block_): - """ - function that uses the check_for_title_logic function to create the appropriate directories and update the necessary variables - - :param curr_line: the line to be checked for a title - :param main_title: the main title of the file, needed in the case where a file is finished - :param last_directory: the most recently encountered directory - :param last_title: the most recently encountered title - :param curr_dirs: the most recent directories at each title level - :param root_dirs: a list containing the root directories - :param link_lists: a list containing all four link_lists with the links that will be printed at the bottom of a file - :param is_linux_tutorial_: boolean to indicate whether the current file is part of the linux tutorial - :param in_code_block_: boolean to indicate whether the current line is part of a codeblock - :return: the depth of the title - :return: the title found in the line if any - :return: the new directory in which the next file will be written - :return link_lists: updated link_lists - """ - - # detect titles - match = re.match(r'^#+ ', curr_line) - if match and len(match.group(0)) <= MAX_TITLE_DEPTH + 1: - logic_output = len(match.group(0)) - 1 - else: - logic_output = 0 - - # make necessary changes if a title has been detected - if logic_output == 0 or in_code_block_: - return 0, None, None, curr_dirs, link_lists - else: - - # if a new title is detected, write the end of the previous file - if last_title is not None: - for i, OS in enumerate(["", "Linux", "Windows", "macOS"]): - write_end_of_file(os.path.join(root_dirs[i], last_directory, last_title + ".json"), OS, link_lists[i], is_linux_tutorial_, main_title, last_title) - - # reset the link lists for each OS - for i in range(4): - link_lists[i] = [] - - # make a new directory corresponding with the new title - curr_dirs[logic_output] = os.path.join(curr_dirs[logic_output - 1], make_valid_title(curr_line[logic_output + 1:-1].replace(' ', '-'))) - - for i in range(4): - os.makedirs(os.path.join(root_dirs[i], curr_dirs[logic_output]), exist_ok=True) - - # update the higher order current directories - for i in range(logic_output + 1, MAX_TITLE_DEPTH + 1): - curr_dirs[i] = curr_dirs[logic_output] - - return logic_output, make_valid_title(curr_line[logic_output + 1:-1].replace(' ', '-')), curr_dirs[logic_output], curr_dirs, link_lists - def check_for_title(line, in_code_block, curr_dirs): """ @@ -554,191 +502,6 @@ def mangle_ifs(directory, filename): write_file.write(new_line) -def check_if_statements(curr_line, active_OS_if_states): - """ - function that checks for if-statements - - :param curr_line: the line to be checked for if-statements to build the directory structure - :param active_OS_if_states: dictionary keeping track of the active OS states according to the if-statements - :return: the next action to be done with the line: - DONE: An if-statement has been found at the start of the line, the active os list has been updated, processing of the current line is finished and a following line can be processed. - CHECK_EXTRA_MESSAGE: An if-statement has been found at the start of the line, the active os list has been updated, more text has been detected after the if-statement that also needs to be checked. - WRITE_TEXT: No if-statement has been found, write the current line to a file (can also be part of the current line) - WRITE_TEXT_AND_CHECK_EXTRA_MESSAGE: An if statement has been found not at the start of the line. Firstly, write the text up until the if-statement to a file, then check the rest of the line. - :return: the extra message to be checked, if any - :return: the text to be written to the file, if any - """ - # check whether the first part of the line contains information wrt if-statements - match = re.search(r'^\{' + IF_MANGLED_PART + '%(.*?)%' + IF_MANGLED_PART + '}(.*)', curr_line) - - # check whether the line contains information wrt if-statements that is not in its first part - match_large = re.search(r'^(.*)(\{' + IF_MANGLED_PART + '%.*?%' + IF_MANGLED_PART + '})(.*)', curr_line) - - if match: - content = match.group(1) - - # new if-statement wrt OS with '==' - if re.search(r'if OS == ', content): - OS = content.split()[-1] - - # set new active OS - active_OS_if_states[OS] = ACTIVE - - # set other active ones on inactive - for other_OS in active_OS_if_states.keys(): - if other_OS != OS and active_OS_if_states[other_OS] == ACTIVE: - active_OS_if_states[other_OS] = INACTIVE - - # new if-statement wrt OS with '!=' - elif re.search(r'if OS != ', content): - OS = content.split()[-1] - - # set new active OS - active_OS_if_states[OS] = INACTIVE - - # set other inactive ones on active - for other_OS in active_OS_if_states.keys(): - if other_OS != OS and active_OS_if_states[other_OS] == INACTIVE: - active_OS_if_states[other_OS] = ACTIVE - - # endif statement wrt OS - elif re.search(r'endif', content): - if str(1) in active_OS_if_states.values(): - active_OS_if_states[ - list(active_OS_if_states.keys())[list(active_OS_if_states.values()).index(str(1))]] = ACTIVE - else: - for key in active_OS_if_states.keys(): - active_OS_if_states[key] = INACTIVE - - # else statement wrt OS - elif re.search(r'else', content): - - i = 0 - for i in range(3): - if str(i) not in active_OS_if_states.values(): - break - - # set the previously active one on inactive until the next endif - key_list = list(active_OS_if_states.keys()) - position = list(active_OS_if_states.values()).index(ACTIVE) - active_OS_if_states[key_list[position]] = str(i) - - # set inactive ones on active - while INACTIVE in active_OS_if_states.values(): - position = list(active_OS_if_states.values()).index(INACTIVE) - active_OS_if_states[key_list[position]] = ACTIVE - - if len(match.group(2)) != 0: - extra_message = match.group(2).lstrip() - return CHECK_EXTRA_MESSAGE, extra_message, None - - else: - return DONE, None, None - - elif match_large: - return WRITE_TEXT_AND_CHECK_EXTRA_MESSAGE, match_large.group(2), match_large.group(1) - - else: - return WRITE_TEXT, None, curr_line - - -def write_text_to_file(file_name, curr_line, link_lists, in_code_block): - """ - function that writes a line to a file - - :param file_name: target file to write the line to - :param curr_line: line to be written to the file - :param link_lists: list containing all the links that will be printed at the end of files - :param in_code_block: boolean indicating whether the current line is in a codeblock - :return link_lists: updated link_lists - """ - - if os.path.exists(file_name) or curr_line.strip(): - if os.path.exists(file_name): - with open(file_name, "r") as read_file: - data = json.load(read_file) - else: - data = {} - - os_list = [GENERIC_DIR, LINUX, WINDOWS, MACOS] - for i, os_ in enumerate(os_list): - if os_ in file_name: - curr_line, link_lists[i] = replace_markdown_markers(curr_line, link_lists[i], in_code_block, "placeholder") - - if CONTENT in data: - data[CONTENT] += curr_line - else: - data[CONTENT] = curr_line - - with open(file_name, "w") as write_file: - json.dump(data, write_file, indent=4) - - return link_lists - - -def choose_and_write_to_file(curr_line, active_OS_if_states, last_directory, last_title, root_dirs, link_lists, in_code_block): - """ - function that decides what file to write text to - - :param curr_line: line to be written to a file - :param active_OS_if_states: dictionary keeping track of which OSes are active according to the if-statements - :param last_directory: most recently made directory - :param last_title: the most recently encountered title - :param root_dirs: a list with all root directories - :param link_lists: list of links that need to be written at the end of the files - :param in_code_block: boolean indicating whether the current line is in a code block - :return link_lists: an updated link_lists - """ - # check that the line is part of the website for gent - if active_OS_if_states[LINUX] == INACTIVE and active_OS_if_states[WINDOWS] == INACTIVE and active_OS_if_states[MACOS] == INACTIVE: - link_lists = write_text_to_file(os.path.join(root_dirs[0], last_directory, last_title + ".json"), curr_line, link_lists, in_code_block) - else: - for i, os_ in enumerate([LINUX, WINDOWS, MACOS]): - if active_OS_if_states[os_] == ACTIVE: - link_lists = write_text_to_file(os.path.join(root_dirs[i + 1], last_directory, last_title + ".json"), - curr_line, link_lists, in_code_block) - - return link_lists - - -def write_end_of_file(file_location, OS, linklist, is_linux_tutorial_, main_title, last_title): - """ - function that adds the links that should be at the end of a file - - :param file_location: the location of the file - :param OS: the OS of the file - :param linklist: the links that should be at the end of the file - :param is_linux_tutorial_: boolean indicating whether the file is part of the linux tutorial - :param main_title: the main title of the file, to be used in the reference link - :param last_title: the most recently encountered title - :return: - """ - - if os.path.exists(file_location): - - if len(OS) > 0: - OS = OS + "/" - - with open(file_location, "r") as read_file: - data = json.load(read_file) - - # add the links from within the document - data[LINKS] = {} - for i, link in enumerate(linklist): - data[LINKS][str(i + 1)] = str(link) - - if is_linux_tutorial_: - linux_part = LINUX_TUTORIAL + "/" - else: - linux_part = "" - - # add the reference link - data[REFERENCE_LINK] = (DOCS_URL + "/" + OS + linux_part + main_title + "/#" + ''.join(char.lower() for char in last_title if char.isalnum() or char == '-').strip('-')) - - with open(file_location, 'w') as write_file: - json.dump(data, write_file, indent=4) - - def make_valid_title(title): """ function that makes sure all titles can be used as valid filenames @@ -946,7 +709,6 @@ def main(): root_dir_os_specific_linux = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, LINUX) root_dir_os_specific_windows = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, WINDOWS) root_dir_os_specific_macos = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, MACOS) - root_dirs = [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos] # variable for the main title (needed for reference links) main_title = filename[:-3] @@ -954,30 +716,6 @@ def main(): # variable that keeps track of the directories that are used to write in at different levels curr_dirs = [filename[:-3] for _ in range(5)] - # variable that keeps track of the latest non-zero level title and corresponding directory - last_title = None - last_directory = None - - # list to keep track of links in the text - links_generic = [] - links_linux = [] - links_windows = [] - links_macos = [] - link_lists = [links_generic, links_linux, links_windows, links_macos] - - # dictionaries to keep track of current OS - active_OS_if_states = {LINUX: INACTIVE, WINDOWS: INACTIVE, MACOS: INACTIVE} - - # dictionaries to save the paragraphs and metadata before it is written to files - paragraphs_text = {} - paragraphs_metadata = {} - - # variable that shows whether the first title has been reached yet - after_first_title = False - - # variable that is used to be sure that we are detecting titles and not comments from codeblocks - in_code_block = False - ################### actually parse the md file ################### # create directories for the source markdown file @@ -1002,38 +740,6 @@ def main(): else: write_os_specific_file(subtitle, paragraphs_text, paragraphs_metadata, subtitle_order, i) - # # open the file and store line by line in the right file - # with open(copy_file, 'r') as readfile: - # - # for line in readfile: - # title_level, title, directory, curr_dirs, link_lists = check_for_title_xl(line, main_title, last_directory, last_title, curr_dirs, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists, is_linux_tutorial, in_code_block) - # - # # detect codeblocks to make sure titles aren't detected in them - # if '```' in line or (('
' in line) ^ ('
' in line)): - # in_code_block = not in_code_block - # - # # line is a title with a maximum depth of 4 - # if title_level > 0: - # last_title = title - # last_directory = directory - # after_first_title = True - # - # # line is not a title - # elif after_first_title: - # # check for if-statements and write the appropriate lines in the right files - # next_action = check_if_statements(line, active_OS_if_states) - # while next_action[0] == WRITE_TEXT_AND_CHECK_EXTRA_MESSAGE or next_action[0] == CHECK_EXTRA_MESSAGE: - # if next_action[0] == WRITE_TEXT_AND_CHECK_EXTRA_MESSAGE: - # link_lists = choose_and_write_to_file(next_action[2], active_OS_if_states, last_directory, last_title, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists, in_code_block) - # next_action = check_if_statements(next_action[1], active_OS_if_states) - # - # if next_action[0] == WRITE_TEXT: - # link_lists = choose_and_write_to_file(next_action[2], active_OS_if_states, last_directory, last_title, [root_dir_generic, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos], link_lists, in_code_block) - # - # # write end of file for the last file - # for i, OS in enumerate(["", "Linux", "Windows", "macOS"]): - # write_end_of_file(os.path.join(root_dirs[i], last_directory, last_title + ".json"), OS, link_lists[i], is_linux_tutorial, main_title, last_title) - # remove_directory_tree(COPIES) # remove_directory_tree(IF_MANGLED_FILES) From da32459088fd4bcb0b665df5ab3b24464a585925 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 21 Aug 2024 13:43:04 +0200 Subject: [PATCH 062/145] introduce more macros --- .../chatbot_parser.py | 70 +++++++++++-------- 1 file changed, 42 insertions(+), 28 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 51b4efa00b20..91165d974296 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -25,6 +25,7 @@ EXTRA_DIR = "extra" GENERIC_DIR = "generic" OS_SPECIFIC_DIR = "os_specific" +MACROS = "macros" # OSes LINUX = "linux" @@ -55,11 +56,23 @@ CHECK_EXTRA_MESSAGE = "check_extra_message" WRITE_TEXT_AND_CHECK_EXTRA_MESSAGE = "write_text_and_check_extra_message" -# JSON attributes -CONTENT = "content" +# Metadata attributes +MAIN_TITLE = "main_title" +SUBTITLE = "subtitle" +TITLE_DEPTH = "title_depth" +DIRECTORY = "directory" LINKS = "links" +PARENT_TITLE = "parent_title" +PREVIOUS_TITLE = "previous_title" +NEXT_TITLE = "next_title" +METADATA_OS = "OS" REFERENCE_LINK = "reference_link" +# if-structure components +IF = "if" +ELSE = "else" +ENDIF = "endif" + ################### define functions ################### @@ -283,14 +296,14 @@ def write_metadata(main_title, subtitle, links, title_level, directory): :return paragraph_metadata: dictionary containing the metadata about the section """ - paragraph_metadata = {'main_title': main_title, 'subtitle': subtitle, 'title_depth': title_level, 'directory': directory} + paragraph_metadata = {MAIN_TITLE: main_title, SUBTITLE: subtitle, TITLE_DEPTH: title_level, DIRECTORY: directory} if len(links) > 0: - paragraph_metadata['links'] = {} + paragraph_metadata[LINKS] = {} for i, link in enumerate(links): - paragraph_metadata['links'][str(i)] = link + paragraph_metadata[LINKS][str(i)] = link - paragraph_metadata['parent_title'] = Path(directory).parent.name + paragraph_metadata[PARENT_TITLE] = Path(directory).parent.name return paragraph_metadata @@ -308,12 +321,12 @@ def close_ifs(text): """ patterns = { - 'if': r'({' + IF_MANGLED_PART + r'%[-\s]*if\s+OS\s*[!=]=\s*.+?[-\s]*%' + IF_MANGLED_PART + '})', - 'endif': r'({' + IF_MANGLED_PART + r'%\s*-?\s*endif\s*-?\s*%' + IF_MANGLED_PART + '})', - 'else': r'({' + IF_MANGLED_PART + r'%\s*-?\s*else\s*-?\s*%' + IF_MANGLED_PART + '})' + IF: r'({' + IF_MANGLED_PART + r'%[-\s]*if\s+OS\s*[!=]=\s*.+?[-\s]*%' + IF_MANGLED_PART + '})', + ENDIF: r'({' + IF_MANGLED_PART + r'%\s*-?\s*endif\s*-?\s*%' + IF_MANGLED_PART + '})', + ELSE: r'({' + IF_MANGLED_PART + r'%\s*-?\s*else\s*-?\s*%' + IF_MANGLED_PART + '})' } - if_count = len(re.findall(patterns['if'], text.replace("\n", ""))) - endif_count = len(re.findall(patterns['endif'], text.replace("\n", ""))) + if_count = len(re.findall(patterns[IF], text.replace("\n", ""))) + endif_count = len(re.findall(patterns[ENDIF], text.replace("\n", ""))) if IF_MANGLED_PART not in text or if_count == endif_count: return text, "" else: @@ -339,11 +352,11 @@ def close_ifs(text): last_if = -1 last_else = -1 for i, if_part in enumerate(open_ifs): - if re.search(patterns['if'], if_part): + if re.search(patterns[IF], if_part): last_if = i - elif re.search(patterns['else'], if_part): + elif re.search(patterns[ELSE], if_part): last_else = i - elif re.search(patterns['endif'], if_part): + elif re.search(patterns[ENDIF], if_part): changed = True del open_ifs[i] if last_else > last_if: @@ -402,10 +415,10 @@ def load_macros(name): :return: """ - macros_location = os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, "macros") + macros_location = os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, MACROS) - if "../macros/" in name: - package_name = name.split("../macros/")[1] + if "../" + MACROS + "/" in name: + package_name = name.split("../" + MACROS + "/")[1] file_location = os.path.join(macros_location, package_name) with open(file_location, 'r') as readfile: @@ -537,7 +550,7 @@ def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, """ # make the directory needed for the files that will be written - filepath = os.path.join(PARSED_MDS, GENERIC_DIR, paragraphs_metadata[title]["directory"]) + filepath = os.path.join(PARSED_MDS, GENERIC_DIR, paragraphs_metadata[title][DIRECTORY]) os.makedirs(filepath, exist_ok=True) write_files(title, paragraphs_text[title], paragraphs_metadata, title_order, title_order_number, filepath, OS=GENERIC) @@ -580,7 +593,7 @@ def write_os_specific_file(title, paragraphs_text, paragraphs_metadata, title_or # check that file actually has some content if len(text[OS]) > 0: # define the filepath - filepath = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, OS, paragraphs_metadata[title]["directory"]) + filepath = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, OS, paragraphs_metadata[title][DIRECTORY]) os.makedirs(filepath, exist_ok=True) # write the files @@ -605,25 +618,25 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe """ # write text file - with open(os.path.join(filepath, paragraphs_metadata[title]["subtitle"] + ".txt"), 'w') as writefile: + with open(os.path.join(filepath, paragraphs_metadata[title][SUBTITLE] + ".txt"), 'w') as writefile: writefile.write(text) # write metadata metadata = paragraphs_metadata[title] if title_order_number != 0: - metadata["previous_title"] = title_order[title_order_number - 1] + metadata[PREVIOUS_TITLE] = title_order[title_order_number - 1] else: - metadata["previous_title"] = None + metadata[PREVIOUS_TITLE] = None if title_order_number != len(title_order) - 1: - metadata["next_title"] = title_order[title_order_number + 1] + metadata[NEXT_TITLE] = title_order[title_order_number + 1] else: - metadata["next_title"] = None + metadata[NEXT_TITLE] = None - metadata["OS"] = OS + metadata[METADATA_OS] = OS - if bool(LINUX_TUTORIAL in paragraphs_metadata[title]["directory"]): + if bool(LINUX_TUTORIAL in paragraphs_metadata[title][DIRECTORY]): linux_part = LINUX_TUTORIAL + "/" else: linux_part = "" @@ -631,11 +644,12 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe os_part = "" else: os_part = OS + "/" - metadata["reference_link"] = DOCS_URL + "/" + os_part + linux_part + paragraphs_metadata[title]["main_title"] + "/#" + ''.join(char.lower() for char in title if char.isalnum() or char == '-').strip('-') + metadata[REFERENCE_LINK] = DOCS_URL + "/" + os_part + linux_part + paragraphs_metadata[title][MAIN_TITLE] + "/#" + ''.join(char.lower() for char in title if char.isalnum() or char == '-').strip('-') - with open(os.path.join(filepath, paragraphs_metadata[title]["subtitle"] + "_metadata.json"), 'w') as writefile: + with open(os.path.join(filepath, paragraphs_metadata[title][SUBTITLE] + "_metadata.json"), 'w') as writefile: json.dump(metadata, writefile, indent=4) + def main(): """ main function From 093200b232c1c6ed5c10530ec6a09717b2aaf263 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 21 Aug 2024 13:48:54 +0200 Subject: [PATCH 063/145] reintroduce logic to remove unnecessary directories --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 91165d974296..8e3141c4b52a 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -656,9 +656,9 @@ def main(): :return: """ # remove the directories from a previous run of the parser if they weren't cleaned up properly for some reason - shutil.rmtree(PARSED_MDS) - shutil.rmtree(COPIES) - shutil.rmtree(IF_MANGLED_FILES) + shutil.rmtree(PARSED_MDS, ignore_errors=True) + shutil.rmtree(COPIES, ignore_errors=True) + shutil.rmtree(IF_MANGLED_FILES, ignore_errors=True) # make the necessary directories if not os.path.exists(COPIES): @@ -754,8 +754,8 @@ def main(): else: write_os_specific_file(subtitle, paragraphs_text, paragraphs_metadata, subtitle_order, i) - # remove_directory_tree(COPIES) - # remove_directory_tree(IF_MANGLED_FILES) + shutil.rmtree(COPIES, ignore_errors=True) + shutil.rmtree(IF_MANGLED_FILES, ignore_errors=True) ################### run the script ################### From 5d0ffe951e515ee3fb890b82a2431332e92b3d4a Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 21 Aug 2024 14:34:45 +0200 Subject: [PATCH 064/145] added functionality to include links or leave them out --- .../chatbot_parser.py | 75 ++++++++++++++----- .../HPC_chatbot_preprocessor/requirements.txt | 1 + 2 files changed, 56 insertions(+), 20 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 8e3141c4b52a..33ddefbdbbfb 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -1,3 +1,4 @@ +import copy import json import os import re @@ -73,6 +74,10 @@ ELSE = "else" ENDIF = "endif" +# link indicators +LINK_BEFORE = r'§link§link§' +LINK_AFTER = r'§link§link§' + ################### define functions ################### @@ -120,14 +125,14 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title): matches = re.findall(r'\[(.*?)]\((.*?)\)', curr_line) if matches: for match in matches: - curr_line = curr_line.replace(f"[{match[0]}]({match[1]})", match[0] + "[" + str(len(linklist) + 1) + "]") + curr_line = curr_line.replace(f"[{match[0]}]({match[1]})", match[0] + LINK_BEFORE + str(len(linklist)) + LINK_AFTER) if ".md" not in match[1]: if "#" not in match[1]: linklist.append(match[1]) else: - linklist.append(DOCS_URL + main_title + "/" + match[1]) + linklist.append(DOCS_URL + "/" + main_title + "/" + match[1]) else: - linklist.append(DOCS_URL + match[1].replace(".md", "/").replace("index", "").rstrip("/")) + linklist.append(DOCS_URL + "/" + match[1].replace(".md", "/").replace("index", "").rstrip("/")) # codeblock (with ``` -> always stands on a separate line, so line can be dropped) if '```' in curr_line: @@ -617,13 +622,17 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe :return: """ + metadata = copy.deepcopy(paragraphs_metadata[title]) + # write text file with open(os.path.join(filepath, paragraphs_metadata[title][SUBTITLE] + ".txt"), 'w') as writefile: - writefile.write(text) + if LINKS in paragraphs_metadata[title].keys(): + adapted_text, metadata[LINKS] = insert_links(text, metadata[LINKS]) + writefile.write(adapted_text) + else: + writefile.write(text) # write metadata - metadata = paragraphs_metadata[title] - if title_order_number != 0: metadata[PREVIOUS_TITLE] = title_order[title_order_number - 1] else: @@ -650,6 +659,32 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe json.dump(metadata, writefile, indent=4) +def insert_links(text, links): + """ + Function that inserts links in the plaintext or takes out the references to the links depending on the value of INCLUDE_LINKS_IN_PLAINTEXT + + :param text: The plaintext that needs to be adapted + :param links: The links that might need to be inserted + :return text: The adapted plaintext + :return links: The links that were actually present in the text + """ + + present_links = [] + new_links = {} + for link_number in re.finditer(LINK_BEFORE + r'([0-9]*?)' + LINK_AFTER, text): + present_links.append(link_number.group(1)) + if INCLUDE_LINKS_IN_PLAINTEXT: + text = re.sub(LINK_BEFORE + link_number.group(1) + LINK_AFTER, " " + links[link_number.group(1)] + " ", text) + else: + text = re.sub(LINK_BEFORE + link_number.group(1) + LINK_AFTER, "", text) + + for link_number in links.keys(): + if link_number in present_links: + new_links[len(new_links.keys())] = links[link_number] + + return text, new_links + + def main(): """ main function @@ -679,22 +714,22 @@ def main(): source_directories = [os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR), os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR, LINUX_TUTORIAL)] - # list of all the filenames - filenames_generic = {} - filenames_linux = {} - for source_directory in source_directories: - all_items = os.listdir(source_directory) - files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]] - for file in files: - if LINUX_TUTORIAL in source_directory: - filenames_linux[file] = os.path.join(source_directory, file) - else: - filenames_generic[file] = os.path.join(source_directory, file) - - # # Temporary variables to test with just one singular file + # # list of all the filenames # filenames_generic = {} # filenames_linux = {} - # filenames_generic["account.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/account.md" + # for source_directory in source_directories: + # all_items = os.listdir(source_directory) + # files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]] + # for file in files: + # if LINUX_TUTORIAL in source_directory: + # filenames_linux[file] = os.path.join(source_directory, file) + # else: + # filenames_generic[file] = os.path.join(source_directory, file) + + # Temporary variables to test with just one singular file + filenames_generic = {} + filenames_linux = {} + filenames_generic["account.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/account.md" # filenames_linux["beyond_the_basics.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/linux-tutorial/beyond_the_basics.md" # for loops over all files diff --git a/scripts/HPC_chatbot_preprocessor/requirements.txt b/scripts/HPC_chatbot_preprocessor/requirements.txt index 907f08fda77f..3b118535f3b2 100644 --- a/scripts/HPC_chatbot_preprocessor/requirements.txt +++ b/scripts/HPC_chatbot_preprocessor/requirements.txt @@ -1,3 +1,4 @@ +copy os re shutil From a3e34a97d0fec915d199b4be0b0a9a62f4b4be4f Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 21 Aug 2024 14:37:16 +0200 Subject: [PATCH 065/145] added functionality to include links or leave them out --- scripts/HPC_chatbot_preprocessor/requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/HPC_chatbot_preprocessor/requirements.txt b/scripts/HPC_chatbot_preprocessor/requirements.txt index 3b118535f3b2..907f08fda77f 100644 --- a/scripts/HPC_chatbot_preprocessor/requirements.txt +++ b/scripts/HPC_chatbot_preprocessor/requirements.txt @@ -1,4 +1,3 @@ -copy os re shutil From 7c6154b47023062d4b7b6ff5932b60ccb63d56c3 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 21 Aug 2024 15:36:39 +0200 Subject: [PATCH 066/145] adapt filenames to allow for splitting on something other than subtitles --- .../chatbot_parser.py | 60 +++++++++++-------- 1 file changed, 34 insertions(+), 26 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 33ddefbdbbfb..5c31199d731f 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -12,7 +12,7 @@ # customizable macros MIN_PARAGRAPH_LENGTH = 128 MAX_TITLE_DEPTH = 4 -INCLUDE_LINKS_IN_PLAINTEXT = True +INCLUDE_LINKS_IN_PLAINTEXT = False # directories PARSED_MDS = "parsed_mds" @@ -59,7 +59,7 @@ # Metadata attributes MAIN_TITLE = "main_title" -SUBTITLE = "subtitle" +SUBTITLE = "subtitle (incorrect in some cases, working on a fix)" TITLE_DEPTH = "title_depth" DIRECTORY = "directory" LINKS = "links" @@ -542,7 +542,7 @@ def make_valid_title(title): return valid_filename -def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number): +def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number, paragraph_numbers): """ Function that writes text and metadata of a generic (non-os-specific) file @@ -551,6 +551,7 @@ def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, :param paragraphs_metadata: dictionary containing the metadata for all paragraphs of text :param title_order: list containing all subtitles in order :param title_order_number: order number of the title of the section that is being written + :param paragraph_numbers: dictionary keeping track of the amount of paragraphs that have been written for each OS :return: """ @@ -558,10 +559,10 @@ def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, filepath = os.path.join(PARSED_MDS, GENERIC_DIR, paragraphs_metadata[title][DIRECTORY]) os.makedirs(filepath, exist_ok=True) - write_files(title, paragraphs_text[title], paragraphs_metadata, title_order, title_order_number, filepath, OS=GENERIC) + write_files(title, paragraphs_text[title], paragraphs_metadata, title_order, title_order_number, filepath, OS=GENERIC, paragraph_numbers=paragraph_numbers) -def write_os_specific_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number): +def write_os_specific_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number, paragraph_numbers): """ Function that writes text and metadata of os-specific files @@ -570,6 +571,7 @@ def write_os_specific_file(title, paragraphs_text, paragraphs_metadata, title_or :param paragraphs_metadata: dictionary containing the metadata for all paragraphs of text :param title_order: list containing all subtitles in order :param title_order_number: order number of the title of the section that is being written + :param paragraph_numbers: dictionary keeping track of the amount of paragraphs that have been written for each OS :return: """ text = {} @@ -592,7 +594,7 @@ def write_os_specific_file(title, paragraphs_text, paragraphs_metadata, title_or unique_texts = set(text.values()) if len(unique_texts) == 1: paragraphs_text[title] = text[OS] - write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number) + write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number, paragraph_numbers) else: for OS in [LINUX, WINDOWS, MACOS]: # check that file actually has some content @@ -602,13 +604,13 @@ def write_os_specific_file(title, paragraphs_text, paragraphs_metadata, title_or os.makedirs(filepath, exist_ok=True) # write the files - write_files(title, text[OS], paragraphs_metadata, title_order, title_order_number, filepath, OS) + write_files(title, text[OS], paragraphs_metadata, title_order, title_order_number, filepath, OS, paragraph_numbers=paragraph_numbers) else: # don't write empty files pass -def write_files(title, text, paragraphs_metadata, title_order, title_order_number, filepath, OS): +def write_files(title, text, paragraphs_metadata, title_order, title_order_number, filepath, OS, paragraph_numbers): """ Function to write files to a certain filepath @@ -619,13 +621,14 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe :param title_order_number: order number of the title of the section that is being written :param filepath: filepath to write files to :param OS: OS to be included in the metadata + :param paragraph_numbers: dictionary keeping track of the amount of paragraphs that have been written for each OS :return: """ metadata = copy.deepcopy(paragraphs_metadata[title]) # write text file - with open(os.path.join(filepath, paragraphs_metadata[title][SUBTITLE] + ".txt"), 'w') as writefile: + with open(os.path.join(filepath, paragraphs_metadata[title][MAIN_TITLE] + "_" + OS + "_paragraph_" + str(paragraph_numbers[OS]) + ".txt"), 'w') as writefile: if LINKS in paragraphs_metadata[title].keys(): adapted_text, metadata[LINKS] = insert_links(text, metadata[LINKS]) writefile.write(adapted_text) @@ -655,9 +658,11 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe os_part = OS + "/" metadata[REFERENCE_LINK] = DOCS_URL + "/" + os_part + linux_part + paragraphs_metadata[title][MAIN_TITLE] + "/#" + ''.join(char.lower() for char in title if char.isalnum() or char == '-').strip('-') - with open(os.path.join(filepath, paragraphs_metadata[title][SUBTITLE] + "_metadata.json"), 'w') as writefile: + with open(os.path.join(filepath, paragraphs_metadata[title][MAIN_TITLE] + "_" + OS + "_paragraph_" + str(paragraph_numbers[OS]) + "_metadata.json"), 'w') as writefile: json.dump(metadata, writefile, indent=4) + paragraph_numbers[OS] += 1 + def insert_links(text, links): """ @@ -714,22 +719,22 @@ def main(): source_directories = [os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR), os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR, LINUX_TUTORIAL)] - # # list of all the filenames - # filenames_generic = {} - # filenames_linux = {} - # for source_directory in source_directories: - # all_items = os.listdir(source_directory) - # files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]] - # for file in files: - # if LINUX_TUTORIAL in source_directory: - # filenames_linux[file] = os.path.join(source_directory, file) - # else: - # filenames_generic[file] = os.path.join(source_directory, file) - - # Temporary variables to test with just one singular file + # list of all the filenames filenames_generic = {} filenames_linux = {} - filenames_generic["account.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/account.md" + for source_directory in source_directories: + all_items = os.listdir(source_directory) + files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]] + for file in files: + if LINUX_TUTORIAL in source_directory: + filenames_linux[file] = os.path.join(source_directory, file) + else: + filenames_generic[file] = os.path.join(source_directory, file) + + # # Temporary variables to test with just one singular file + # filenames_generic = {} + # filenames_linux = {} + # filenames_generic["account.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/account.md" # filenames_linux["beyond_the_basics.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/linux-tutorial/beyond_the_basics.md" # for loops over all files @@ -765,6 +770,9 @@ def main(): # variable that keeps track of the directories that are used to write in at different levels curr_dirs = [filename[:-3] for _ in range(5)] + # dictionary that keeps track of the paragraph numbers + paragraph_numbers = {GENERIC: 1, LINUX: 1, WINDOWS: 1, MACOS: 1} + ################### actually parse the md file ################### # create directories for the source markdown file @@ -783,11 +791,11 @@ def main(): # generic if IF_MANGLED_PART not in paragraphs_text[subtitle]: - write_generic_file(subtitle, paragraphs_text, paragraphs_metadata, subtitle_order, i) + write_generic_file(subtitle, paragraphs_text, paragraphs_metadata, subtitle_order, i, paragraph_numbers) # os-specific else: - write_os_specific_file(subtitle, paragraphs_text, paragraphs_metadata, subtitle_order, i) + write_os_specific_file(subtitle, paragraphs_text, paragraphs_metadata, subtitle_order, i, paragraph_numbers) shutil.rmtree(COPIES, ignore_errors=True) shutil.rmtree(IF_MANGLED_FILES, ignore_errors=True) From 8d5b50dc727e284917eb1540d91f692f56ff8a4a Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 21 Aug 2024 16:19:57 +0200 Subject: [PATCH 067/145] making some changes to prepare to add paragraph level splitting tomorrow --- .../chatbot_parser.py | 25 ++++++++++--------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 5c31199d731f..742522e6e70d 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -13,6 +13,7 @@ MIN_PARAGRAPH_LENGTH = 128 MAX_TITLE_DEPTH = 4 INCLUDE_LINKS_IN_PLAINTEXT = False +DEEP_DIRECTORIES = True # directories PARSED_MDS = "parsed_mds" @@ -64,8 +65,8 @@ DIRECTORY = "directory" LINKS = "links" PARENT_TITLE = "parent_title" -PREVIOUS_TITLE = "previous_title" -NEXT_TITLE = "next_title" +PREVIOUS_SUBTITLE = "previous_title" +NEXT_SUBTITLE = "next_title" METADATA_OS = "OS" REFERENCE_LINK = "reference_link" @@ -75,8 +76,7 @@ ENDIF = "endif" # link indicators -LINK_BEFORE = r'§link§link§' -LINK_AFTER = r'§link§link§' +LINK_MARKER = r'§link§link§' ################### define functions ################### @@ -94,11 +94,12 @@ def check_for_title(line, in_code_block, curr_dirs): match = re.match(r'^#+ ', line) if match and len(match.group(0)) <= 5 and not in_code_block: title_length = len(match.group(0)) - 1 - curr_dirs[title_length] = os.path.join(curr_dirs[title_length - 1], make_valid_title(line[title_length + 1:-1].replace(' ', '-'))) + if DEEP_DIRECTORIES: + curr_dirs[title_length] = os.path.join(curr_dirs[title_length - 1], make_valid_title(line[title_length + 1:-1].replace(' ', '-'))) - # update the higher order current directories - for i in range(title_length + 1, MAX_TITLE_DEPTH + 1): - curr_dirs[i] = curr_dirs[title_length] + # update the higher order current directories + for i in range(title_length + 1, MAX_TITLE_DEPTH + 1): + curr_dirs[i] = curr_dirs[title_length] return title_length else: @@ -125,7 +126,7 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title): matches = re.findall(r'\[(.*?)]\((.*?)\)', curr_line) if matches: for match in matches: - curr_line = curr_line.replace(f"[{match[0]}]({match[1]})", match[0] + LINK_BEFORE + str(len(linklist)) + LINK_AFTER) + curr_line = curr_line.replace(f"[{match[0]}]({match[1]})", match[0] + LINK_MARKER + str(len(linklist)) + LINK_MARKER) if ".md" not in match[1]: if "#" not in match[1]: linklist.append(match[1]) @@ -676,12 +677,12 @@ def insert_links(text, links): present_links = [] new_links = {} - for link_number in re.finditer(LINK_BEFORE + r'([0-9]*?)' + LINK_AFTER, text): + for link_number in re.finditer(LINK_MARKER + r'([0-9]*?)' + LINK_MARKER, text): present_links.append(link_number.group(1)) if INCLUDE_LINKS_IN_PLAINTEXT: - text = re.sub(LINK_BEFORE + link_number.group(1) + LINK_AFTER, " " + links[link_number.group(1)] + " ", text) + text = re.sub(LINK_MARKER + link_number.group(1) + LINK_MARKER, " " + links[link_number.group(1)] + " ", text) else: - text = re.sub(LINK_BEFORE + link_number.group(1) + LINK_AFTER, "", text) + text = re.sub(LINK_MARKER + link_number.group(1) + LINK_MARKER, "", text) for link_number in links.keys(): if link_number in present_links: From 0c10376f1f3d5ea56f3ddc32fa580ff436413a73 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 21 Aug 2024 16:20:41 +0200 Subject: [PATCH 068/145] making some changes to prepare to add paragraph level splitting tomorrow --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 742522e6e70d..1c13edc93e31 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -13,6 +13,8 @@ MIN_PARAGRAPH_LENGTH = 128 MAX_TITLE_DEPTH = 4 INCLUDE_LINKS_IN_PLAINTEXT = False +SPLIT_ON_TITLES = True +SPLIT_ON_PARAGRAPHS = False DEEP_DIRECTORIES = True # directories From f8ee8607545a5638de94787bb00046226e19cce0 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 21 Aug 2024 16:30:55 +0200 Subject: [PATCH 069/145] making some changes to prepare to add paragraph level splitting tomorrow --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 1c13edc93e31..561e112d28e3 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -640,14 +640,14 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe # write metadata if title_order_number != 0: - metadata[PREVIOUS_TITLE] = title_order[title_order_number - 1] + metadata[PREVIOUS_SUBTITLE] = title_order[title_order_number - 1] else: - metadata[PREVIOUS_TITLE] = None + metadata[PREVIOUS_SUBTITLE] = None if title_order_number != len(title_order) - 1: - metadata[NEXT_TITLE] = title_order[title_order_number + 1] + metadata[NEXT_SUBTITLE] = title_order[title_order_number + 1] else: - metadata[NEXT_TITLE] = None + metadata[NEXT_SUBTITLE] = None metadata[METADATA_OS] = OS From 6533733a4d462db37544251af7a9d33697ad63bb Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 21 Aug 2024 17:04:10 +0200 Subject: [PATCH 070/145] adapted the parsing script to allow for testing in a semi-efficient way --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 561e112d28e3..43cb93c5c086 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -531,7 +531,7 @@ def make_valid_title(title): :return valid_filename: the adapted title that can be used as filename """ # Define a regex pattern for invalid characters on both Windows and Linux - invalid_chars = r'[<>:"/\\|?*\0()]' + invalid_chars = r'[<>:"/\\|?*\0]' # get rid of extra information between {} brackets title = re.sub(r'\{.*?}', '', title) @@ -805,6 +805,7 @@ def main(): ################### run the script ################### -print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.") -main() -print("Parsing finished successfully") +if __name__ == '__main__': + print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.") + main() + print("Parsing finished successfully") From 2e7a00f1b724e77249caef30c62e8aa6c6c9f628 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 21 Aug 2024 17:04:35 +0200 Subject: [PATCH 071/145] added test for make_valid_title --- .../tests/test_make_valid_title.py | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py b/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py new file mode 100644 index 000000000000..f3c423ed9c37 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py @@ -0,0 +1,20 @@ +import pytest +import shutil +from chatbot_parser import make_valid_title + + +@pytest.mark.parametrize("input_string,expected", [ + ("", ""), + ("A-good-filename-with-dashes", "A-good-filename-with-dashes"), + (" A very good filename beginning and ending in a space ", "A very good filename beginning and ending in a space"), + ("-A-very-good-filename-beginning-and-ending-in-a-dash-", "A-very-good-filename-beginning-and-ending-in-a-dash"), + ("A filename containing bad characters <>:\"/\\|?*\0", "A filename containing bad characters"), + ("A filename ending with {some jinja garbage}", "A filename ending with") +]) +def test_make_valid_title(input_string, expected): + assert make_valid_title(input_string) == expected + + +shutil.rmtree("parsed_mds", ignore_errors=True) +shutil.rmtree("copies", ignore_errors=True) +shutil.rmtree("if_mangled_files", ignore_errors=True) From f5e0579fb6a83f1a8e643fc5b1b77309080bf0e3 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 21 Aug 2024 17:07:49 +0200 Subject: [PATCH 072/145] removed useless lines from testscript --- .../HPC_chatbot_preprocessor/tests/test_make_valid_title.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py b/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py index f3c423ed9c37..aebecddd0f3f 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py +++ b/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py @@ -13,8 +13,3 @@ ]) def test_make_valid_title(input_string, expected): assert make_valid_title(input_string) == expected - - -shutil.rmtree("parsed_mds", ignore_errors=True) -shutil.rmtree("copies", ignore_errors=True) -shutil.rmtree("if_mangled_files", ignore_errors=True) From 6757b4f5eba4a105a1b5b94c6a9c720c25e74f2a Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Thu, 22 Aug 2024 11:08:12 +0200 Subject: [PATCH 073/145] First attempt at splitting in paragraphs (need for other fixes for title-based-split first --- .../chatbot_parser.py | 45 +++++++++++++++---- 1 file changed, 36 insertions(+), 9 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 43cb93c5c086..a148e7b2bbd4 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -10,12 +10,12 @@ #################### define macro's #################### # customizable macros -MIN_PARAGRAPH_LENGTH = 128 +MIN_PARAGRAPH_LENGTH = 160 MAX_TITLE_DEPTH = 4 INCLUDE_LINKS_IN_PLAINTEXT = False SPLIT_ON_TITLES = True -SPLIT_ON_PARAGRAPHS = False -DEEP_DIRECTORIES = True +SPLIT_ON_PARAGRAPHS = not SPLIT_ON_TITLES +DEEP_DIRECTORIES = True and SPLIT_ON_TITLES # Should always be False if SPLIT_ON_TITLES is False # directories PARSED_MDS = "parsed_mds" @@ -219,8 +219,10 @@ def split_text(file, main_title): :return subtitle_order: list containing all encountered subtitles in order of appearance """ - # start of assuming we haven't encountered a title + # start of assuming we haven't encountered a title and the first paragraph hasn't appeared yet after_first_title = False + after_first_paragraph = False + paragraph_number = 1 # start of assuming we are not in a code_block in_code_block = False @@ -241,6 +243,12 @@ def split_text(file, main_title): # variable to allow for if statements to "continue" over multiple paragraphs open_ifs = "" + # initialise the first paragraph if SPLIT_ON_PARAGRAPH is True + if SPLIT_ON_PARAGRAPHS: + title = main_title + "_paragraph_" + str(paragraph_number) + paragraphs_text[title] = "" + subtitle_order.append(title) + # list to keep track of most recent directories on each title level if LINUX_TUTORIAL not in file: curr_dirs = [main_title for _ in range(MAX_TITLE_DEPTH + 1)] @@ -258,18 +266,18 @@ def split_text(file, main_title): title_level = check_for_title(line, in_code_block, curr_dirs) - # detect codeblocks to make sure titles aren't detected in them + # detect codeblocks to make sure titles and beginnings of paragraphs aren't detected in them if '```' in line or (('
' in line) ^ ('
' in line)): in_code_block = not in_code_block # line is a title with a maximum depth of 4 - if title_level > 0: + if title_level > 0 and SPLIT_ON_TITLES: if after_first_title: paragraphs_text[title], open_ifs = close_ifs(paragraphs_text[title]) paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, last_dir) title = make_valid_title(line[title_level + 1:-1]) - # create an entry for the file in the paragraphs text dictionary + # create an entry for the next file in the paragraphs text dictionary paragraphs_text[title] = open_ifs after_first_title = True @@ -278,8 +286,27 @@ def split_text(file, main_title): # reset link_list link_list = [] - # line is not a title - elif after_first_title: + elif title_level > 0 and not SPLIT_ON_TITLES: + paragraphs_text[title] += line[title_level + 1:] + + elif SPLIT_ON_PARAGRAPHS and line == "\n" and len(re.sub(r'\{' + IF_MANGLED_PART + '%.*?%' + IF_MANGLED_PART + '}', "", paragraphs_text[title])) >= MIN_PARAGRAPH_LENGTH: + # finish the previous file + paragraphs_text[title], open_ifs = close_ifs(paragraphs_text[title]) + paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, last_dir) + + # start a new file + paragraph_number += 1 + title = make_valid_title(main_title + "_paragraph_" + str(paragraph_number)) + subtitle_order.append(title) + + # create an entry for the next file in the paragraphs text dictionary + paragraphs_text[title] = open_ifs + + # reset link_list + link_list = [] + + # line is not a title or the ending of a sufficiently large paragraph + elif after_first_title or SPLIT_ON_PARAGRAPHS: line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title) if title in paragraphs_text.keys() and line != "\n": paragraphs_text[title] += line From 6d9558d1ccf2dd9950586d50b167d74637120e26 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Thu, 22 Aug 2024 11:21:42 +0200 Subject: [PATCH 074/145] make two functions for different ways of dividing the text --- .../chatbot_parser.py | 96 +++++++++++++++---- 1 file changed, 80 insertions(+), 16 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index a148e7b2bbd4..1f6b82e8a44c 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -219,10 +219,15 @@ def split_text(file, main_title): :return subtitle_order: list containing all encountered subtitles in order of appearance """ - # start of assuming we haven't encountered a title and the first paragraph hasn't appeared yet + if SPLIT_ON_TITLES: + return split_on_titles(file, main_title) + elif SPLIT_ON_PARAGRAPHS: + return split_on_paragraphs(file, main_title) + + +def split_on_titles(file, main_title): + # start of assuming we haven't encountered a title after_first_title = False - after_first_paragraph = False - paragraph_number = 1 # start of assuming we are not in a code_block in_code_block = False @@ -243,12 +248,6 @@ def split_text(file, main_title): # variable to allow for if statements to "continue" over multiple paragraphs open_ifs = "" - # initialise the first paragraph if SPLIT_ON_PARAGRAPH is True - if SPLIT_ON_PARAGRAPHS: - title = main_title + "_paragraph_" + str(paragraph_number) - paragraphs_text[title] = "" - subtitle_order.append(title) - # list to keep track of most recent directories on each title level if LINUX_TUTORIAL not in file: curr_dirs = [main_title for _ in range(MAX_TITLE_DEPTH + 1)] @@ -266,18 +265,19 @@ def split_text(file, main_title): title_level = check_for_title(line, in_code_block, curr_dirs) - # detect codeblocks to make sure titles and beginnings of paragraphs aren't detected in them + # detect codeblocks to make sure titles aren't detected in them if '```' in line or (('
' in line) ^ ('
' in line)): in_code_block = not in_code_block # line is a title with a maximum depth of 4 - if title_level > 0 and SPLIT_ON_TITLES: + if title_level > 0: if after_first_title: paragraphs_text[title], open_ifs = close_ifs(paragraphs_text[title]) - paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, last_dir) + paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, + last_dir) title = make_valid_title(line[title_level + 1:-1]) - # create an entry for the next file in the paragraphs text dictionary + # create an entry for the file in the paragraphs text dictionary paragraphs_text[title] = open_ifs after_first_title = True @@ -286,10 +286,74 @@ def split_text(file, main_title): # reset link_list link_list = [] - elif title_level > 0 and not SPLIT_ON_TITLES: + # line is not a title + elif after_first_title: + line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title) + if title in paragraphs_text.keys() and line != "\n": + paragraphs_text[title] += line + elif line != "\n": + paragraphs_text[title] = line + + # write metadata for the last file + paragraphs_metadata[title] = write_metadata(main_title, title, link_list, title_level, curr_dirs[last_title_level]) + + return paragraphs_text, paragraphs_metadata, subtitle_order + + +def split_on_paragraphs(file, main_title): + # start of assuming we haven't encountered a title and the first paragraph hasn't appeared yet + after_first_title = False + + # first paragraph number + paragraph_number = 1 + + # start of assuming we are not in a code_block + in_code_block = False + + # define initial dictionaries + paragraphs_text = {} + paragraphs_metadata = {} + + # list to keep track of links in the text + link_list = [] + + # list to keep track of the order of the subtitles + subtitle_order = [] + + # variable to keep track of the title level + title_level = 0 + + # initialise the first paragraph + title = main_title + "_paragraph_" + str(paragraph_number) + paragraphs_text[title] = "" + subtitle_order.append(title) + + # list to keep track of most recent directories on each title level + if LINUX_TUTORIAL not in file: + curr_dirs = [main_title for _ in range(MAX_TITLE_DEPTH + 1)] + else: + curr_dirs = [os.path.join(LINUX_TUTORIAL, main_title) for _ in range(MAX_TITLE_DEPTH + 1)] + + with open(file, 'r') as readfile: + + for line in readfile: + + # keep track of title level and directory to write to metadata upon discovering a new subtitle + if title_level > 0: + last_title_level = title_level + last_dir = curr_dirs[last_title_level] + + title_level = check_for_title(line, in_code_block, curr_dirs) + + # detect codeblocks to make sure titles and beginnings of paragraphs aren't detected in them + if '```' in line or (('
' in line) ^ ('
' in line)): + in_code_block = not in_code_block + + # line is a title with a maximum depth of 4 + if title_level > 0: paragraphs_text[title] += line[title_level + 1:] - elif SPLIT_ON_PARAGRAPHS and line == "\n" and len(re.sub(r'\{' + IF_MANGLED_PART + '%.*?%' + IF_MANGLED_PART + '}', "", paragraphs_text[title])) >= MIN_PARAGRAPH_LENGTH: + elif line == "\n" and len(re.sub(r'\{' + IF_MANGLED_PART + '%.*?%' + IF_MANGLED_PART + '}', "", paragraphs_text[title])) >= MIN_PARAGRAPH_LENGTH: # finish the previous file paragraphs_text[title], open_ifs = close_ifs(paragraphs_text[title]) paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, last_dir) @@ -306,7 +370,7 @@ def split_text(file, main_title): link_list = [] # line is not a title or the ending of a sufficiently large paragraph - elif after_first_title or SPLIT_ON_PARAGRAPHS: + else: line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title) if title in paragraphs_text.keys() and line != "\n": paragraphs_text[title] += line From 2c7025a8994fb0a2b0733be82185e706d1109fe9 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Thu, 22 Aug 2024 11:23:06 +0200 Subject: [PATCH 075/145] added docstrings to new functions --- .../chatbot_parser.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 1f6b82e8a44c..a9797026428e 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -212,6 +212,7 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title): def split_text(file, main_title): """ Function that splits the text into smaller sections and makes them into two dictionaries containing text and metadata + :param file: the filepath of the file to be split :param main_title: the main title of the file :return paragraphs_text: dictionary containing the split sections of text @@ -226,6 +227,15 @@ def split_text(file, main_title): def split_on_titles(file, main_title): + """ + Function that splits the text into smaller sections based on the subtitle structure and makes them into two dictionaries containing text and metadata + + :param file: the filepath of the file to be split + :param main_title: the main title of the file + :return paragraphs_text: dictionary containing the split sections of text + :return paragraphs_metadata: dictionary containing the metadata of each split section of text + :return subtitle_order: list containing all encountered subtitles in order of appearance + """ # start of assuming we haven't encountered a title after_first_title = False @@ -301,6 +311,15 @@ def split_on_titles(file, main_title): def split_on_paragraphs(file, main_title): + """ + Function that splits the text into smaller sections based on the paragraph structure and makes them into two dictionaries containing text and metadata + + :param file: the filepath of the file to be split + :param main_title: the main title of the file + :return paragraphs_text: dictionary containing the split sections of text + :return paragraphs_metadata: dictionary containing the metadata of each split section of text + :return subtitle_order: list containing all encountered subtitles in order of appearance + """ # start of assuming we haven't encountered a title and the first paragraph hasn't appeared yet after_first_title = False From ae99bb96f830da927f4dcded46d12404af8d16c1 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Thu, 22 Aug 2024 12:10:18 +0200 Subject: [PATCH 076/145] update test for valid titles --- .../HPC_chatbot_preprocessor/tests/test_make_valid_title.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py b/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py index aebecddd0f3f..fc704c84b316 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py +++ b/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py @@ -6,10 +6,10 @@ @pytest.mark.parametrize("input_string,expected", [ ("", ""), ("A-good-filename-with-dashes", "A-good-filename-with-dashes"), - (" A very good filename beginning and ending in a space ", "A very good filename beginning and ending in a space"), + (" A very good filename beginning and ending in a space ", "A-very-good-filename-beginning-and-ending-in-a-space"), ("-A-very-good-filename-beginning-and-ending-in-a-dash-", "A-very-good-filename-beginning-and-ending-in-a-dash"), - ("A filename containing bad characters <>:\"/\\|?*\0", "A filename containing bad characters"), - ("A filename ending with {some jinja garbage}", "A filename ending with") + ("A filename containing bad characters <>:\"/\\|?*\0", "A-filename-containing-bad-characters"), + ("A filename ending with {some jinja garbage}", "A-filename-ending-with") ]) def test_make_valid_title(input_string, expected): assert make_valid_title(input_string) == expected From 084b4210a261e89c81d8e23d31d3e0d1adb7f00b Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Thu, 22 Aug 2024 14:17:24 +0200 Subject: [PATCH 077/145] fixed problem with splitting os-specific text (metadata not fixed yet) --- .../chatbot_parser.py | 169 ++++++++++++------ 1 file changed, 117 insertions(+), 52 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index a9797026428e..5739f23fb31c 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -80,6 +80,13 @@ # link indicators LINK_MARKER = r'§link§link§' +# regex patterns +IF_MANGLED_PATTERNS = { + IF: r'({' + IF_MANGLED_PART + r'%[-\s]*if\s+OS\s*[!=]=\s*.+?[-\s]*%' + IF_MANGLED_PART + '})', + ELSE: r'({' + IF_MANGLED_PART + r'%\s*-?\s*else\s*-?\s*%' + IF_MANGLED_PART + '})', + ENDIF: r'({' + IF_MANGLED_PART + r'%\s*-?\s*endif\s*-?\s*%' + IF_MANGLED_PART + '})' + } + ################### define functions ################### @@ -243,9 +250,13 @@ def split_on_titles(file, main_title): in_code_block = False # define initial dictionaries - paragraphs_text = {} + paragraphs_os_free_text = {} + paragraphs_os_text = {} paragraphs_metadata = {} + # variable to keep track of the current paragraph + current_paragraph = "" + # list to keep track of links in the text link_list = [] @@ -258,6 +269,12 @@ def split_on_titles(file, main_title): # variable to allow for if statements to "continue" over multiple paragraphs open_ifs = "" + # variable to keep track of how many if-statements deep the current line is + in_if_statement = 0 + + # variable to indicate that previous section was one with if-statements + previous_contained_if = False + # list to keep track of most recent directories on each title level if LINUX_TUTORIAL not in file: curr_dirs = [main_title for _ in range(MAX_TITLE_DEPTH + 1)] @@ -268,46 +285,63 @@ def split_on_titles(file, main_title): for line in readfile: - # keep track of title level and directory to write to metadata upon discovering a new subtitle - if title_level > 0: - last_title_level = title_level - last_dir = curr_dirs[last_title_level] + # detect if-statements starting or ending on the current line + in_if_statement += len(re.findall(IF_MANGLED_PATTERNS[IF], line)) - len(re.findall(IF_MANGLED_PATTERNS[ENDIF], line)) - title_level = check_for_title(line, in_code_block, curr_dirs) + # only split up if current line is in a fully non-os-specific section + if in_if_statement == 0: - # detect codeblocks to make sure titles aren't detected in them - if '```' in line or (('
' in line) ^ ('
' in line)): - in_code_block = not in_code_block + title_level = check_for_title(line, in_code_block, curr_dirs) - # line is a title with a maximum depth of 4 - if title_level > 0: - if after_first_title: - paragraphs_text[title], open_ifs = close_ifs(paragraphs_text[title]) - paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, - last_dir) - title = make_valid_title(line[title_level + 1:-1]) + # detect codeblocks to make sure titles aren't detected in them + if '```' in line or (('
' in line) ^ ('
' in line)): + in_code_block = not in_code_block - # create an entry for the file in the paragraphs text dictionary - paragraphs_text[title] = open_ifs + # line is a title with a maximum depth of 4 + if title_level > 0: + if after_first_title: + if previous_contained_if: + paragraphs_os_text[title] = current_paragraph + else: + paragraphs_os_free_text[title] = current_paragraph + paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, last_dir) + title = make_valid_title(line[title_level + 1:-1]) - after_first_title = True - subtitle_order.append(title) + # create an entry for the file in the paragraphs text dictionary + current_paragraph = open_ifs - # reset link_list - link_list = [] + after_first_title = True + subtitle_order.append(title) + + # reset link_list + link_list = [] + + previous_contained_if = False - # line is not a title - elif after_first_title: + # line is not a title + elif after_first_title: + line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title) + if line != "\n": + current_paragraph += line + + # keep track of title level and directory to write to metadata upon discovering a new subtitle + if title_level > 0: + last_title_level = title_level + last_dir = curr_dirs[last_title_level] + else: + previous_contained_if = True line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title) - if title in paragraphs_text.keys() and line != "\n": - paragraphs_text[title] += line - elif line != "\n": - paragraphs_text[title] = line + if line != "\n": + current_paragraph += line - # write metadata for the last file - paragraphs_metadata[title] = write_metadata(main_title, title, link_list, title_level, curr_dirs[last_title_level]) + # write dictionaries for the last file + if previous_contained_if: + paragraphs_os_text[title] = current_paragraph + else: + paragraphs_os_free_text[title] = current_paragraph + paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, curr_dirs[last_title_level]) - return paragraphs_text, paragraphs_metadata, subtitle_order + return paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order def split_on_paragraphs(file, main_title): @@ -438,20 +472,15 @@ def close_ifs(text): :return prefix: the prefix for the next section """ - patterns = { - IF: r'({' + IF_MANGLED_PART + r'%[-\s]*if\s+OS\s*[!=]=\s*.+?[-\s]*%' + IF_MANGLED_PART + '})', - ENDIF: r'({' + IF_MANGLED_PART + r'%\s*-?\s*endif\s*-?\s*%' + IF_MANGLED_PART + '})', - ELSE: r'({' + IF_MANGLED_PART + r'%\s*-?\s*else\s*-?\s*%' + IF_MANGLED_PART + '})' - } - if_count = len(re.findall(patterns[IF], text.replace("\n", ""))) - endif_count = len(re.findall(patterns[ENDIF], text.replace("\n", ""))) + if_count = len(re.findall(IF_MANGLED_PATTERNS[IF], text.replace("\n", ""))) + endif_count = len(re.findall(IF_MANGLED_PATTERNS[ENDIF], text.replace("\n", ""))) if IF_MANGLED_PART not in text or if_count == endif_count: return text, "" else: # Find all matches for each pattern matches = [] - for key, pattern in patterns.items(): + for key, pattern in IF_MANGLED_PATTERNS.items(): for match in re.finditer(pattern, text): matches.append(match) @@ -470,11 +499,11 @@ def close_ifs(text): last_if = -1 last_else = -1 for i, if_part in enumerate(open_ifs): - if re.search(patterns[IF], if_part): + if re.search(IF_MANGLED_PATTERNS[IF], if_part): last_if = i - elif re.search(patterns[ELSE], if_part): + elif re.search(IF_MANGLED_PATTERNS[ELSE], if_part): last_else = i - elif re.search(patterns[ENDIF], if_part): + elif re.search(IF_MANGLED_PATTERNS[ENDIF], if_part): changed = True del open_ifs[i] if last_else > last_if: @@ -650,7 +679,7 @@ def make_valid_title(title): valid_filename = re.sub(invalid_chars, '', title) # Strip leading/trailing whitespace - valid_filename = valid_filename.strip().strip('-') + valid_filename = valid_filename.strip().strip('-').replace(' ', '-') return valid_filename @@ -700,7 +729,7 @@ def write_os_specific_file(title, paragraphs_text, paragraphs_metadata, title_or template = Template(paragraphs_text[title]) text[OS] = template.render(OS=OS) - # readjust text to correct overcorrections + # re-adjust text to correct overcorrections text[OS] = re.sub('"' + OS + '"', OS, text[OS]) # check that not all versions are the same @@ -740,8 +769,11 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe metadata = copy.deepcopy(paragraphs_metadata[title]) + file_title = paragraphs_metadata[title][MAIN_TITLE] + "_" + OS + "_paragraph_" + str(paragraph_numbers[OS]) + file_title = title + # write text file - with open(os.path.join(filepath, paragraphs_metadata[title][MAIN_TITLE] + "_" + OS + "_paragraph_" + str(paragraph_numbers[OS]) + ".txt"), 'w') as writefile: + with open(os.path.join(filepath, file_title + ".txt"), 'w') as writefile: if LINKS in paragraphs_metadata[title].keys(): adapted_text, metadata[LINKS] = insert_links(text, metadata[LINKS]) writefile.write(adapted_text) @@ -771,7 +803,7 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe os_part = OS + "/" metadata[REFERENCE_LINK] = DOCS_URL + "/" + os_part + linux_part + paragraphs_metadata[title][MAIN_TITLE] + "/#" + ''.join(char.lower() for char in title if char.isalnum() or char == '-').strip('-') - with open(os.path.join(filepath, paragraphs_metadata[title][MAIN_TITLE] + "_" + OS + "_paragraph_" + str(paragraph_numbers[OS]) + "_metadata.json"), 'w') as writefile: + with open(os.path.join(filepath, file_title + "_metadata.json"), 'w') as writefile: json.dump(metadata, writefile, indent=4) paragraph_numbers[OS] += 1 @@ -803,6 +835,39 @@ def insert_links(text, links): return text, new_links +def split_and_write_os_specific_section(text, metadata, subtitle_order, i, paragraph_numbers): + # add first subtitle in front of section again + text = "#" * metadata[TITLE_DEPTH] + " " + metadata[SUBTITLE] + "\n" + text + + # Unmangle if's to use jinja parser + text = re.sub(IF_MANGLED_PART, "", text) + + for OS in [LINUX, WINDOWS, MACOS]: + + # slightly alter if-statements to be able to use predefined macros + text = re.sub(OS, '"' + OS + '"', text) + + # Use jinja to render a different version of the text for each OS + template = Template(text) + jinja_text = template.render(OS=OS) + + # re-adjust text to correct overcorrections + jinja_text = re.sub('"' + OS + '"', OS, jinja_text) + + with open("jinja_file.txt", 'w') as writefile: + writefile.write(jinja_text) + + # split in right way + _, os_specific_text, os_specific_metadata, os_subtitle_order = split_text("jinja_file.txt", metadata[MAIN_TITLE]) + + # write to files + for os_i, os_subtitle in enumerate(os_subtitle_order): + filepath = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, OS, os_specific_metadata[os_subtitle][DIRECTORY]) + os.makedirs(filepath, exist_ok=True) + + write_files(os_subtitle, os_specific_text[os_subtitle], os_specific_metadata, subtitle_order[:i] + os_subtitle_order + subtitle_order[i+1:], os_i + i, filepath, OS, paragraph_numbers) + + def main(): """ main function @@ -828,7 +893,7 @@ def main(): ################### define loop-invariant variables ################### - # variable that keeps track of the source directories + # constant that keeps track of the source directories source_directories = [os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR), os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR, LINUX_TUTORIAL)] @@ -848,6 +913,7 @@ def main(): # filenames_generic = {} # filenames_linux = {} # filenames_generic["account.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/account.md" + # filenames_generic["example_text_1.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/scripts/HPC_chatbot_preprocessor/tests/example_files/example_text_1.md" # filenames_linux["beyond_the_basics.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/linux-tutorial/beyond_the_basics.md" # for loops over all files @@ -896,19 +962,18 @@ def main(): jinja_parser(filename, copy_file) # split the text in paragraphs - paragraphs_text, paragraphs_metadata, subtitle_order = split_text(copy_file, main_title) + paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order = split_text(copy_file, main_title) # for every section, either make the whole section generic, or create an os-specific file for each OS for i, subtitle in enumerate(subtitle_order): - # print(subtitle) # generic - if IF_MANGLED_PART not in paragraphs_text[subtitle]: - write_generic_file(subtitle, paragraphs_text, paragraphs_metadata, subtitle_order, i, paragraph_numbers) + if subtitle in paragraphs_os_free_text.keys(): + write_generic_file(subtitle, paragraphs_os_free_text, paragraphs_metadata, subtitle_order, i, paragraph_numbers) # os-specific else: - write_os_specific_file(subtitle, paragraphs_text, paragraphs_metadata, subtitle_order, i, paragraph_numbers) + split_and_write_os_specific_section(paragraphs_os_text[subtitle], paragraphs_metadata[subtitle], subtitle_order, i, paragraph_numbers) shutil.rmtree(COPIES, ignore_errors=True) shutil.rmtree(IF_MANGLED_FILES, ignore_errors=True) From cf7f5f0c8a56303c155aea51268abe6ddbfe2944 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Thu, 22 Aug 2024 15:33:50 +0200 Subject: [PATCH 078/145] fix for metadata of os-specific sections --- .../chatbot_parser.py | 93 +++++++++++++------ 1 file changed, 65 insertions(+), 28 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 5739f23fb31c..10c617252440 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -62,7 +62,7 @@ # Metadata attributes MAIN_TITLE = "main_title" -SUBTITLE = "subtitle (incorrect in some cases, working on a fix)" +SUBTITLE = "subtitle" TITLE_DEPTH = "title_depth" DIRECTORY = "directory" LINKS = "links" @@ -300,11 +300,17 @@ def split_on_titles(file, main_title): # line is a title with a maximum depth of 4 if title_level > 0: if after_first_title: + + # write text of previous file if previous_contained_if: paragraphs_os_text[title] = current_paragraph else: paragraphs_os_free_text[title] = current_paragraph + + # write metadata of previous file paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, last_dir) + + # make a new title title = make_valid_title(line[title_level + 1:-1]) # create an entry for the file in the paragraphs text dictionary @@ -697,11 +703,15 @@ def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, :return: """ - # make the directory needed for the files that will be written - filepath = os.path.join(PARSED_MDS, GENERIC_DIR, paragraphs_metadata[title][DIRECTORY]) - os.makedirs(filepath, exist_ok=True) + if len(paragraphs_text[title]) > 0: + # make the directory needed for the files that will be written + filepath = os.path.join(PARSED_MDS, GENERIC_DIR, paragraphs_metadata[title][DIRECTORY]) + os.makedirs(filepath, exist_ok=True) - write_files(title, paragraphs_text[title], paragraphs_metadata, title_order, title_order_number, filepath, OS=GENERIC, paragraph_numbers=paragraph_numbers) + write_files(title, paragraphs_text[title], paragraphs_metadata, title_order, title_order_number, filepath, OS=GENERIC, paragraph_numbers=paragraph_numbers) + else: + # don't write empty files + pass def write_os_specific_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number, paragraph_numbers): @@ -835,7 +845,7 @@ def insert_links(text, links): return text, new_links -def split_and_write_os_specific_section(text, metadata, subtitle_order, i, paragraph_numbers): +def split_and_write_os_specific_section(text, metadata, subtitle_order, i, paragraph_numbers, all_metadata): # add first subtitle in front of section again text = "#" * metadata[TITLE_DEPTH] + " " + metadata[SUBTITLE] + "\n" + text @@ -860,12 +870,39 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, i, parag # split in right way _, os_specific_text, os_specific_metadata, os_subtitle_order = split_text("jinja_file.txt", metadata[MAIN_TITLE]) + # prepare variables to fix metadata + total_subtitle_order = subtitle_order[:i] + os_subtitle_order + subtitle_order[i+1:] + copy_all_metadata = {**os_specific_metadata, **all_metadata} + # write to files for os_i, os_subtitle in enumerate(os_subtitle_order): - filepath = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, OS, os_specific_metadata[os_subtitle][DIRECTORY]) - os.makedirs(filepath, exist_ok=True) + # check that file actually has some content + if len(os_specific_text[os_subtitle]) > 0: + # add the links to the metadata + os_specific_metadata[os_subtitle][LINKS] = metadata[LINKS] + + # fix parent in the metadata + parent_i = 0 + parent_depth = os_specific_metadata[os_subtitle][TITLE_DEPTH] - 1 + parent = os_specific_metadata[os_subtitle][MAIN_TITLE] + while total_subtitle_order[parent_i] != os_subtitle and parent_i != len(total_subtitle_order): + if copy_all_metadata[total_subtitle_order[parent_i]][TITLE_DEPTH] == parent_depth: + parent = total_subtitle_order[parent_i] + parent_i += 1 + os_specific_metadata[os_subtitle][PARENT_TITLE] = parent + + # fix directory in the metadata + os_specific_metadata[os_subtitle][DIRECTORY] = os.path.join(copy_all_metadata[parent][DIRECTORY], os_specific_metadata[os_subtitle][SUBTITLE]) + + # make a directory to save the files + filepath = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, OS, os_specific_metadata[os_subtitle][DIRECTORY]) + os.makedirs(filepath, exist_ok=True) - write_files(os_subtitle, os_specific_text[os_subtitle], os_specific_metadata, subtitle_order[:i] + os_subtitle_order + subtitle_order[i+1:], os_i + i, filepath, OS, paragraph_numbers) + # write to files + write_files(os_subtitle, os_specific_text[os_subtitle], os_specific_metadata, total_subtitle_order, os_i + i, filepath, OS, paragraph_numbers) + else: + # don't write empty files + pass def main(): @@ -893,27 +930,27 @@ def main(): ################### define loop-invariant variables ################### - # constant that keeps track of the source directories - source_directories = [os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR), - os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR, LINUX_TUTORIAL)] - - # list of all the filenames - filenames_generic = {} - filenames_linux = {} - for source_directory in source_directories: - all_items = os.listdir(source_directory) - files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]] - for file in files: - if LINUX_TUTORIAL in source_directory: - filenames_linux[file] = os.path.join(source_directory, file) - else: - filenames_generic[file] = os.path.join(source_directory, file) - - # # Temporary variables to test with just one singular file + # # constant that keeps track of the source directories + # source_directories = [os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR), + # os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR, LINUX_TUTORIAL)] + # + # # list of all the filenames # filenames_generic = {} # filenames_linux = {} + # for source_directory in source_directories: + # all_items = os.listdir(source_directory) + # files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]] + # for file in files: + # if LINUX_TUTORIAL in source_directory: + # filenames_linux[file] = os.path.join(source_directory, file) + # else: + # filenames_generic[file] = os.path.join(source_directory, file) + + # Temporary variables to test with just one singular file + filenames_generic = {} + filenames_linux = {} # filenames_generic["account.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/account.md" - # filenames_generic["example_text_1.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/scripts/HPC_chatbot_preprocessor/tests/example_files/example_text_1.md" + filenames_generic["example_text_1.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/scripts/HPC_chatbot_preprocessor/tests/example_files/example_text_1.md" # filenames_linux["beyond_the_basics.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/linux-tutorial/beyond_the_basics.md" # for loops over all files @@ -973,7 +1010,7 @@ def main(): # os-specific else: - split_and_write_os_specific_section(paragraphs_os_text[subtitle], paragraphs_metadata[subtitle], subtitle_order, i, paragraph_numbers) + split_and_write_os_specific_section(paragraphs_os_text[subtitle], paragraphs_metadata[subtitle], subtitle_order, i, paragraph_numbers, paragraphs_metadata) shutil.rmtree(COPIES, ignore_errors=True) shutil.rmtree(IF_MANGLED_FILES, ignore_errors=True) From b7c10d3c2764ad91880c5c17aa60d14cd337bc51 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Thu, 22 Aug 2024 16:03:52 +0200 Subject: [PATCH 079/145] clean up temporary version --- .../chatbot_parser.py | 115 +++++++----------- 1 file changed, 42 insertions(+), 73 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 10c617252440..5c1a4b3facd3 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -714,54 +714,6 @@ def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, pass -def write_os_specific_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number, paragraph_numbers): - """ - Function that writes text and metadata of os-specific files - - :param title: title of section - :param paragraphs_text: dictionary containing all paragraphs of text - :param paragraphs_metadata: dictionary containing the metadata for all paragraphs of text - :param title_order: list containing all subtitles in order - :param title_order_number: order number of the title of the section that is being written - :param paragraph_numbers: dictionary keeping track of the amount of paragraphs that have been written for each OS - :return: - """ - text = {} - for OS in [LINUX, WINDOWS, MACOS]: - - # Unmangle if's to use jinja parser - paragraphs_text[title] = re.sub(IF_MANGLED_PART, "", paragraphs_text[title]) - - # slightly alter if-statements to be able to use predefined macros - paragraphs_text[title] = re.sub(OS, '"' + OS + '"', paragraphs_text[title]) - - # Use jinja to render a different version of the text for each OS - template = Template(paragraphs_text[title]) - text[OS] = template.render(OS=OS) - - # re-adjust text to correct overcorrections - text[OS] = re.sub('"' + OS + '"', OS, text[OS]) - - # check that not all versions are the same - unique_texts = set(text.values()) - if len(unique_texts) == 1: - paragraphs_text[title] = text[OS] - write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number, paragraph_numbers) - else: - for OS in [LINUX, WINDOWS, MACOS]: - # check that file actually has some content - if len(text[OS]) > 0: - # define the filepath - filepath = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, OS, paragraphs_metadata[title][DIRECTORY]) - os.makedirs(filepath, exist_ok=True) - - # write the files - write_files(title, text[OS], paragraphs_metadata, title_order, title_order_number, filepath, OS, paragraph_numbers=paragraph_numbers) - else: - # don't write empty files - pass - - def write_files(title, text, paragraphs_metadata, title_order, title_order_number, filepath, OS, paragraph_numbers): """ Function to write files to a certain filepath @@ -845,7 +797,18 @@ def insert_links(text, links): return text, new_links -def split_and_write_os_specific_section(text, metadata, subtitle_order, i, paragraph_numbers, all_metadata): +def split_and_write_os_specific_section(text, metadata, subtitle_order, title_order_number, paragraph_numbers, all_metadata): + """ + Function that splits os-specific sections into subtitles, parses them using jinja and writes them away + + :param text: full os specific section + :param metadata: metadata generated for the full os specific section + :param subtitle_order: order of the subtitles generated by the splitter + :param title_order_number: order number of the section + :param paragraph_numbers: dictionary keeping track of the amount of paragraphs that have been written for each OS + :param all_metadata: all metadata generated by the splitter + :return: + """ # add first subtitle in front of section again text = "#" * metadata[TITLE_DEPTH] + " " + metadata[SUBTITLE] + "\n" + text @@ -871,35 +834,39 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, i, parag _, os_specific_text, os_specific_metadata, os_subtitle_order = split_text("jinja_file.txt", metadata[MAIN_TITLE]) # prepare variables to fix metadata - total_subtitle_order = subtitle_order[:i] + os_subtitle_order + subtitle_order[i+1:] - copy_all_metadata = {**os_specific_metadata, **all_metadata} + total_subtitle_order = subtitle_order[:title_order_number] + os_subtitle_order + subtitle_order[title_order_number+1:] + all_metadata.update(os_specific_metadata) # write to files for os_i, os_subtitle in enumerate(os_subtitle_order): # check that file actually has some content if len(os_specific_text[os_subtitle]) > 0: # add the links to the metadata - os_specific_metadata[os_subtitle][LINKS] = metadata[LINKS] + if LINKS in metadata.keys(): + os_specific_metadata[os_subtitle][LINKS] = metadata[LINKS] # fix parent in the metadata parent_i = 0 parent_depth = os_specific_metadata[os_subtitle][TITLE_DEPTH] - 1 parent = os_specific_metadata[os_subtitle][MAIN_TITLE] while total_subtitle_order[parent_i] != os_subtitle and parent_i != len(total_subtitle_order): - if copy_all_metadata[total_subtitle_order[parent_i]][TITLE_DEPTH] == parent_depth: + if all_metadata[total_subtitle_order[parent_i]][TITLE_DEPTH] == parent_depth: parent = total_subtitle_order[parent_i] parent_i += 1 os_specific_metadata[os_subtitle][PARENT_TITLE] = parent # fix directory in the metadata - os_specific_metadata[os_subtitle][DIRECTORY] = os.path.join(copy_all_metadata[parent][DIRECTORY], os_specific_metadata[os_subtitle][SUBTITLE]) + if parent == os_specific_metadata[os_subtitle][MAIN_TITLE]: + os_specific_metadata[os_subtitle][DIRECTORY] = os.path.join(parent, os_specific_metadata[os_subtitle][SUBTITLE]) + else: + os_specific_metadata[os_subtitle][DIRECTORY] = os.path.join(all_metadata[parent][DIRECTORY], os_specific_metadata[os_subtitle][SUBTITLE]) # make a directory to save the files filepath = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, OS, os_specific_metadata[os_subtitle][DIRECTORY]) os.makedirs(filepath, exist_ok=True) # write to files - write_files(os_subtitle, os_specific_text[os_subtitle], os_specific_metadata, total_subtitle_order, os_i + i, filepath, OS, paragraph_numbers) + write_files(os_subtitle, os_specific_text[os_subtitle], os_specific_metadata, total_subtitle_order, os_i + title_order_number, filepath, OS, paragraph_numbers) else: # don't write empty files pass @@ -930,27 +897,27 @@ def main(): ################### define loop-invariant variables ################### - # # constant that keeps track of the source directories - # source_directories = [os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR), - # os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR, LINUX_TUTORIAL)] - # - # # list of all the filenames - # filenames_generic = {} - # filenames_linux = {} - # for source_directory in source_directories: - # all_items = os.listdir(source_directory) - # files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]] - # for file in files: - # if LINUX_TUTORIAL in source_directory: - # filenames_linux[file] = os.path.join(source_directory, file) - # else: - # filenames_generic[file] = os.path.join(source_directory, file) - - # Temporary variables to test with just one singular file + # constant that keeps track of the source directories + source_directories = [os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR), + os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR, LINUX_TUTORIAL)] + + # list of all the filenames filenames_generic = {} filenames_linux = {} + for source_directory in source_directories: + all_items = os.listdir(source_directory) + files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]] + for file in files: + if LINUX_TUTORIAL in source_directory: + filenames_linux[file] = os.path.join(source_directory, file) + else: + filenames_generic[file] = os.path.join(source_directory, file) + + # # Temporary variables to test with just one singular file + # filenames_generic = {} + # filenames_linux = {} # filenames_generic["account.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/account.md" - filenames_generic["example_text_1.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/scripts/HPC_chatbot_preprocessor/tests/example_files/example_text_1.md" + # filenames_generic["example_text_1.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/scripts/HPC_chatbot_preprocessor/tests/example_files/example_text_1.md" # filenames_linux["beyond_the_basics.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/linux-tutorial/beyond_the_basics.md" # for loops over all files @@ -1012,8 +979,10 @@ def main(): else: split_and_write_os_specific_section(paragraphs_os_text[subtitle], paragraphs_metadata[subtitle], subtitle_order, i, paragraph_numbers, paragraphs_metadata) + # clean up temporary directories and files shutil.rmtree(COPIES, ignore_errors=True) shutil.rmtree(IF_MANGLED_FILES, ignore_errors=True) + os.remove("jinja_file.txt") ################### run the script ################### From 4a441f34ca1ad6f296817fface9c86fe76585250 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Thu, 22 Aug 2024 16:23:57 +0200 Subject: [PATCH 080/145] added command line options for custom macros --- .../chatbot_parser.py | 32 +++++++++++++++---- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 5c1a4b3facd3..e15a76318c47 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -1,3 +1,4 @@ +import argparse import copy import json import os @@ -9,13 +10,7 @@ from jinja2 import FileSystemLoader, Environment, ChoiceLoader, FunctionLoader, Template #################### define macro's #################### -# customizable macros -MIN_PARAGRAPH_LENGTH = 160 -MAX_TITLE_DEPTH = 4 -INCLUDE_LINKS_IN_PLAINTEXT = False -SPLIT_ON_TITLES = True -SPLIT_ON_PARAGRAPHS = not SPLIT_ON_TITLES -DEEP_DIRECTORIES = True and SPLIT_ON_TITLES # Should always be False if SPLIT_ON_TITLES is False +# customizable macros (default values are defined at the bottom of the script) # directories PARSED_MDS = "parsed_mds" @@ -987,6 +982,29 @@ def main(): ################### run the script ################### if __name__ == '__main__': + parser = argparse.ArgumentParser(description="Preprocessing script for the chatbot") + + # adding command-line options + + parser.add_argument("-st", "--split_on_titles", type=int, default=1, help="Set to 1 if source files should be split on titles of maximum depth title_depth, set to 0 if source files should be split on paragraphs of minimum length paragraph_length (default: 1)") + parser.add_argument("-pl", "--paragraph_length", type=int, default=160, help="Minimum length of a paragraph, only works if split on titles is set to zero (default: 160)") + parser.add_argument("-td", "--title_depth", type=int, default=4, help="Maximum depth of titles that divide the source text into sections, only works if split on titles is set to one (default: 4)") + parser.add_argument("-l", "--links", action="store_true", help="Add links to the output texts") + + args = parser.parse_args() + + SPLIT_ON_TITLES = bool(args.split_on_titles) + MIN_PARAGRAPH_LENGTH = args.paragraph_length + MAX_TITLE_DEPTH = args.title_depth + INCLUDE_LINKS_IN_PLAINTEXT = args.links + SPLIT_ON_PARAGRAPHS = not SPLIT_ON_TITLES + DEEP_DIRECTORIES = True and SPLIT_ON_TITLES # Should always be False if SPLIT_ON_TITLES is False + + print(SPLIT_ON_TITLES) + print(MIN_PARAGRAPH_LENGTH) + print(MAX_TITLE_DEPTH) + print(INCLUDE_LINKS_IN_PLAINTEXT) + print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.") main() print("Parsing finished successfully") From 662134fbf7b7bfd53a358f40d43c5a329fd5bab8 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Thu, 22 Aug 2024 16:25:34 +0200 Subject: [PATCH 081/145] small fix to macros --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index e15a76318c47..0f7345e81498 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -96,7 +96,7 @@ def check_for_title(line, in_code_block, curr_dirs): """ # detect titles match = re.match(r'^#+ ', line) - if match and len(match.group(0)) <= 5 and not in_code_block: + if match and len(match.group(0)) <= MAX_TITLE_DEPTH + 1 and not in_code_block: title_length = len(match.group(0)) - 1 if DEEP_DIRECTORIES: curr_dirs[title_length] = os.path.join(curr_dirs[title_length - 1], make_valid_title(line[title_length + 1:-1].replace(' ', '-'))) From 05eab4ae23dc86f45b6eccbef36e31e8869a30c9 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Thu, 22 Aug 2024 16:46:30 +0200 Subject: [PATCH 082/145] clean up test for valid title --- scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py b/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py index fc704c84b316..225c368477d9 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py +++ b/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py @@ -1,5 +1,4 @@ import pytest -import shutil from chatbot_parser import make_valid_title From b85a8fba96a7a5bd02dccef7a7f3cae34420f9b1 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Thu, 22 Aug 2024 16:55:33 +0200 Subject: [PATCH 083/145] add a test for write_metadata --- .../tests/test_write_metadata.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_write_metadata.py diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_write_metadata.py b/scripts/HPC_chatbot_preprocessor/tests/test_write_metadata.py new file mode 100644 index 000000000000..68f1772cb242 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_write_metadata.py @@ -0,0 +1,15 @@ +import pytest +import os +from chatbot_parser import write_metadata + + +@pytest.mark.parametrize("main_title,subtitle,links,title_level,directory,output", [ + ("", "", [], 1, "", {"main_title": "", "subtitle": "", "title_depth": 1, "directory": "", "parent_title": ""}), + ("A_very_good_main_title", "An_extremely_good_subtitle", ["the_first.link", "the_second.link"], 2, + os.path.join("A_very_good_main_title", "An_awesome_parent_file", "An_extremely_good_subtitle"), + {"main_title": "A_very_good_main_title", "subtitle": "An_extremely_good_subtitle", "title_depth": 2, + "directory": os.path.join("A_very_good_main_title", "An_awesome_parent_file", "An_extremely_good_subtitle"), + "parent_title": "An_awesome_parent_file", "links": {"0": "the_first.link", "1": "the_second.link"}}) +]) +def test_write_metadata(main_title, subtitle, links, title_level, directory, output): + assert write_metadata(main_title, subtitle, links, title_level, directory) == output From 39a3c99f68464b1614da8bbaaa68adac0aeea889 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 23 Aug 2024 11:41:20 +0200 Subject: [PATCH 084/145] added functionality to split on paragraphs --- .../chatbot_parser.py | 353 +++++++++--------- 1 file changed, 180 insertions(+), 173 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 0f7345e81498..9b6fced36364 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -10,7 +10,13 @@ from jinja2 import FileSystemLoader, Environment, ChoiceLoader, FunctionLoader, Template #################### define macro's #################### -# customizable macros (default values are defined at the bottom of the script) +# customizable macros (customization made possible at the bottom of the script) +SPLIT_ON_TITLES = True +MIN_PARAGRAPH_LENGTH = 160 +MAX_TITLE_DEPTH = 4 +INCLUDE_LINKS_IN_PLAINTEXT = False +SPLIT_ON_PARAGRAPHS = not SPLIT_ON_TITLES +DEEP_DIRECTORIES = True and SPLIT_ON_TITLES # Should always be False if SPLIT_ON_TITLES is False # directories PARSED_MDS = "parsed_mds" @@ -31,6 +37,7 @@ WINDOWS = "windows" MACOS = "macos" GENERIC = "generic" +LINK_OS = {LINUX: "Linux", WINDOWS: "Windows", MACOS: "macOS"} # OS needs different capitalisation for use in links # urls REPO_URL = 'https://github.com/hpcugent/vsc_user_docs' @@ -82,6 +89,11 @@ ENDIF: r'({' + IF_MANGLED_PART + r'%\s*-?\s*endif\s*-?\s*%' + IF_MANGLED_PART + '})' } +# filenames (and parts of filenames) +TEMP_JINJA_FILE = "jinja_file.txt" +_PARAGRAPH_ = "_paragraph_" +METADATA_EXTENSION = "_metadata" + ################### define functions ################### @@ -211,12 +223,14 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title): return curr_line, linklist -def split_text(file, main_title): +def split_text(file, main_title, current_paragraph_number=-1, OS=GENERIC): """ Function that splits the text into smaller sections and makes them into two dictionaries containing text and metadata :param file: the filepath of the file to be split :param main_title: the main title of the file + :param current_paragraph_number: number of the paragraph that is being split, only applicable when splitting an os-specific paragraph on paragraph level + :param OS: the OS of the file to be split, only applicable when splitting an os-specific paragraph on paragraph level :return paragraphs_text: dictionary containing the split sections of text :return paragraphs_metadata: dictionary containing the metadata of each split section of text :return subtitle_order: list containing all encountered subtitles in order of appearance @@ -225,7 +239,7 @@ def split_text(file, main_title): if SPLIT_ON_TITLES: return split_on_titles(file, main_title) elif SPLIT_ON_PARAGRAPHS: - return split_on_paragraphs(file, main_title) + return split_on_paragraphs(file, main_title, current_paragraph_number, OS) def split_on_titles(file, main_title): @@ -258,12 +272,6 @@ def split_on_titles(file, main_title): # list to keep track of the order of the subtitles subtitle_order = [] - # variable to keep track of the title level - title_level = 0 - - # variable to allow for if statements to "continue" over multiple paragraphs - open_ifs = "" - # variable to keep track of how many if-statements deep the current line is in_if_statement = 0 @@ -309,7 +317,7 @@ def split_on_titles(file, main_title): title = make_valid_title(line[title_level + 1:-1]) # create an entry for the file in the paragraphs text dictionary - current_paragraph = open_ifs + current_paragraph = "" after_first_title = True subtitle_order.append(title) @@ -345,42 +353,46 @@ def split_on_titles(file, main_title): return paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order -def split_on_paragraphs(file, main_title): +def split_on_paragraphs(file, main_title, current_paragraph_number=-1, OS=GENERIC): """ Function that splits the text into smaller sections based on the paragraph structure and makes them into two dictionaries containing text and metadata :param file: the filepath of the file to be split :param main_title: the main title of the file + :param current_paragraph_number: number of the paragraph that is being split, only applicable when splitting an os-specific paragraph + :param OS: the OS of the file to be split, only applicable when splitting an os-specific paragraph :return paragraphs_text: dictionary containing the split sections of text :return paragraphs_metadata: dictionary containing the metadata of each split section of text :return subtitle_order: list containing all encountered subtitles in order of appearance """ - # start of assuming we haven't encountered a title and the first paragraph hasn't appeared yet - after_first_title = False - - # first paragraph number - paragraph_number = 1 - # start of assuming we are not in a code_block in_code_block = False # define initial dictionaries - paragraphs_text = {} + paragraphs_os_free_text = {} + paragraphs_os_text = {} paragraphs_metadata = {} + # variable to keep track of the current paragraph + current_paragraph = "" + # list to keep track of links in the text link_list = [] # list to keep track of the order of the subtitles subtitle_order = [] - # variable to keep track of the title level - title_level = 0 + # variable to keep track of how many if-statements deep the current line is + in_if_statement = 0 + + # variable to indicate that previous section was one with if-statements + previous_contained_if = False + + # paragraph number to add to title + paragraph_number = 1 - # initialise the first paragraph - title = main_title + "_paragraph_" + str(paragraph_number) - paragraphs_text[title] = "" - subtitle_order.append(title) + # metadata title + metadata_title = main_title # list to keep track of most recent directories on each title level if LINUX_TUTORIAL not in file: @@ -392,49 +404,85 @@ def split_on_paragraphs(file, main_title): for line in readfile: - # keep track of title level and directory to write to metadata upon discovering a new subtitle - if title_level > 0: - last_title_level = title_level - last_dir = curr_dirs[last_title_level] + # detect if-statements starting or ending on the current line + in_if_statement += len(re.findall(IF_MANGLED_PATTERNS[IF], line)) - len( + re.findall(IF_MANGLED_PATTERNS[ENDIF], line)) + + # only split up if current line is in a fully non-os-specific section + if in_if_statement == 0: + + title_level = check_for_title(line, in_code_block, curr_dirs) + + # detect codeblocks to make sure titles aren't detected in them + if '```' in line or (('
' in line) ^ ('
' in line)): + in_code_block = not in_code_block - title_level = check_for_title(line, in_code_block, curr_dirs) + # check whether a new paragraph should be started + if line == "\n" and len(re.sub(r'\{' + IF_MANGLED_PART + '%.*?%' + IF_MANGLED_PART + '}', "", current_paragraph)) >= MIN_PARAGRAPH_LENGTH and not in_code_block: - # detect codeblocks to make sure titles and beginnings of paragraphs aren't detected in them - if '```' in line or (('
' in line) ^ ('
' in line)): - in_code_block = not in_code_block + # create a title for the previous paragraph + if current_paragraph_number == -1: + paragraph_title = main_title + _PARAGRAPH_ + str(paragraph_number) + else: + paragraph_title = main_title + "_" + OS + _PARAGRAPH_ + str(current_paragraph_number) + '.' + str(paragraph_number) + paragraph_number += 1 - # line is a title with a maximum depth of 4 - if title_level > 0: - paragraphs_text[title] += line[title_level + 1:] + # write text of previous file + if previous_contained_if: + paragraphs_os_text[paragraph_title] = current_paragraph + else: + paragraphs_os_free_text[paragraph_title] = current_paragraph - elif line == "\n" and len(re.sub(r'\{' + IF_MANGLED_PART + '%.*?%' + IF_MANGLED_PART + '}', "", paragraphs_text[title])) >= MIN_PARAGRAPH_LENGTH: - # finish the previous file - paragraphs_text[title], open_ifs = close_ifs(paragraphs_text[title]) - paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, last_dir) + # write metadata of previous file + paragraphs_metadata[paragraph_title] = write_metadata(main_title, metadata_title, link_list, last_title_level, last_dir) + subtitle_order.append(paragraph_title) - # start a new file - paragraph_number += 1 - title = make_valid_title(main_title + "_paragraph_" + str(paragraph_number)) - subtitle_order.append(title) + # reset the current paragraph + current_paragraph = "" - # create an entry for the next file in the paragraphs text dictionary - paragraphs_text[title] = open_ifs + # reset link_list + link_list = [] - # reset link_list - link_list = [] + previous_contained_if = False - # line is not a title or the ending of a sufficiently large paragraph + # line is a title with a maximum depth of 4 + elif title_level > 0: + + # make a new title + metadata_title = make_valid_title(line[title_level + 1:-1]) + + line, link_list = replace_markdown_markers(line[title_level + 1:], link_list, in_code_block, main_title) + current_paragraph += line + + # line is not a title or the beginning of a new paragraph + elif line != "\n" or previous_contained_if: + line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title) + current_paragraph += line + + # keep track of title level and directory to write to metadata upon discovering a new subtitle + if title_level > 0: + last_title_level = title_level + last_dir = curr_dirs[last_title_level] else: + previous_contained_if = True line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title) - if title in paragraphs_text.keys() and line != "\n": - paragraphs_text[title] += line - elif line != "\n": - paragraphs_text[title] = line + current_paragraph += line - # write metadata for the last file - paragraphs_metadata[title] = write_metadata(main_title, title, link_list, title_level, curr_dirs[last_title_level]) + # create a title for the last paragraph + if current_paragraph_number == -1: + paragraph_title = main_title + _PARAGRAPH_ + str(paragraph_number) + else: + paragraph_title = main_title + "_" + OS + _PARAGRAPH_ + str(current_paragraph_number) + '.' + str(paragraph_number) - return paragraphs_text, paragraphs_metadata, subtitle_order + # write dictionaries for the last file + if previous_contained_if: + paragraphs_os_text[paragraph_title] = current_paragraph + else: + paragraphs_os_free_text[paragraph_title] = current_paragraph + paragraphs_metadata[paragraph_title] = write_metadata(main_title, metadata_title, link_list, last_title_level, curr_dirs[last_title_level]) + subtitle_order.append(paragraph_title) + + return paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order def write_metadata(main_title, subtitle, links, title_level, directory): @@ -461,63 +509,6 @@ def write_metadata(main_title, subtitle, links, title_level, directory): return paragraph_metadata -def close_ifs(text): - """ - Function to check whether all if-statements in a section are closed properly. If that is not the case, the function - closes all if-statements at the end of the section and returns a prefix for the next section containing all if-statements - of the section it is processing. This needs to be done because the start of the next section would also be contained within the - last unclosed if-statement of its previous section. - - :param text: the text of the section it checks - :return text: the adapted text where all if-statements are closed - :return prefix: the prefix for the next section - """ - - if_count = len(re.findall(IF_MANGLED_PATTERNS[IF], text.replace("\n", ""))) - endif_count = len(re.findall(IF_MANGLED_PATTERNS[ENDIF], text.replace("\n", ""))) - if IF_MANGLED_PART not in text or if_count == endif_count: - return text, "" - else: - - # Find all matches for each pattern - matches = [] - for key, pattern in IF_MANGLED_PATTERNS.items(): - for match in re.finditer(pattern, text): - matches.append(match) - - # sort the matches according to their start index - matches.sort(key=lambda x: x.start()) - - # extract the strings from the matches - open_ifs = [] - for match in matches: - open_ifs.append(match.group(0)) - - # only include the non-closed if-statements - changed = True - while changed: - changed = False - last_if = -1 - last_else = -1 - for i, if_part in enumerate(open_ifs): - if re.search(IF_MANGLED_PATTERNS[IF], if_part): - last_if = i - elif re.search(IF_MANGLED_PATTERNS[ELSE], if_part): - last_else = i - elif re.search(IF_MANGLED_PATTERNS[ENDIF], if_part): - changed = True - del open_ifs[i] - if last_else > last_if: - del open_ifs[last_else] - del open_ifs[last_if] - break - - # Concatenate all matches into a single string - open_ifs = ''.join(open_ifs) - - return text + (r'{' + IF_MANGLED_PART + '% endif %' + IF_MANGLED_PART + '}')*(if_count - endif_count), open_ifs - - def jinja_parser(filename, copy_location): """ function that let's jinja do its thing to format the files except for the os-related if-statements @@ -596,7 +587,7 @@ def mangle_os_ifs(line, is_os): constr_match = re.search(r'\{%.*?%}', match.string) if_match = re.search(r'if ', match.group(1)) - if_os_match = re.search(r'if OS ', match.group(1)) + if_os_match = re.search(r'if OS', match.group(1)) endif_match = re.search(r'endif', match.group(1)) else_match = re.search(r'else', match.group(1)) @@ -726,7 +717,6 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe metadata = copy.deepcopy(paragraphs_metadata[title]) - file_title = paragraphs_metadata[title][MAIN_TITLE] + "_" + OS + "_paragraph_" + str(paragraph_numbers[OS]) file_title = title # write text file @@ -757,10 +747,10 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe if OS == GENERIC: os_part = "" else: - os_part = OS + "/" - metadata[REFERENCE_LINK] = DOCS_URL + "/" + os_part + linux_part + paragraphs_metadata[title][MAIN_TITLE] + "/#" + ''.join(char.lower() for char in title if char.isalnum() or char == '-').strip('-') + os_part = LINK_OS[OS] + "/" + metadata[REFERENCE_LINK] = DOCS_URL + "/" + os_part + linux_part + paragraphs_metadata[title][MAIN_TITLE] + "/#" + ''.join(char.lower() for char in paragraphs_metadata[title][SUBTITLE] if char.isalnum() or char == '-').strip('-') - with open(os.path.join(filepath, file_title + "_metadata.json"), 'w') as writefile: + with open(os.path.join(filepath, file_title + METADATA_EXTENSION + ".json"), 'w') as writefile: json.dump(metadata, writefile, indent=4) paragraph_numbers[OS] += 1 @@ -804,8 +794,6 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or :param all_metadata: all metadata generated by the splitter :return: """ - # add first subtitle in front of section again - text = "#" * metadata[TITLE_DEPTH] + " " + metadata[SUBTITLE] + "\n" + text # Unmangle if's to use jinja parser text = re.sub(IF_MANGLED_PART, "", text) @@ -819,52 +807,74 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or template = Template(text) jinja_text = template.render(OS=OS) - # re-adjust text to correct overcorrections - jinja_text = re.sub('"' + OS + '"', OS, jinja_text) - - with open("jinja_file.txt", 'w') as writefile: - writefile.write(jinja_text) - - # split in right way - _, os_specific_text, os_specific_metadata, os_subtitle_order = split_text("jinja_file.txt", metadata[MAIN_TITLE]) - - # prepare variables to fix metadata - total_subtitle_order = subtitle_order[:title_order_number] + os_subtitle_order + subtitle_order[title_order_number+1:] - all_metadata.update(os_specific_metadata) - - # write to files - for os_i, os_subtitle in enumerate(os_subtitle_order): - # check that file actually has some content - if len(os_specific_text[os_subtitle]) > 0: - # add the links to the metadata - if LINKS in metadata.keys(): - os_specific_metadata[os_subtitle][LINKS] = metadata[LINKS] - - # fix parent in the metadata - parent_i = 0 - parent_depth = os_specific_metadata[os_subtitle][TITLE_DEPTH] - 1 - parent = os_specific_metadata[os_subtitle][MAIN_TITLE] - while total_subtitle_order[parent_i] != os_subtitle and parent_i != len(total_subtitle_order): - if all_metadata[total_subtitle_order[parent_i]][TITLE_DEPTH] == parent_depth: - parent = total_subtitle_order[parent_i] - parent_i += 1 - os_specific_metadata[os_subtitle][PARENT_TITLE] = parent - - # fix directory in the metadata - if parent == os_specific_metadata[os_subtitle][MAIN_TITLE]: - os_specific_metadata[os_subtitle][DIRECTORY] = os.path.join(parent, os_specific_metadata[os_subtitle][SUBTITLE]) - else: - os_specific_metadata[os_subtitle][DIRECTORY] = os.path.join(all_metadata[parent][DIRECTORY], os_specific_metadata[os_subtitle][SUBTITLE]) + # add first subtitle in front of section again + if len(jinja_text) != 0: + jinja_text = "#" * metadata[TITLE_DEPTH] + " " + metadata[SUBTITLE] + "\n" + jinja_text - # make a directory to save the files - filepath = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, OS, os_specific_metadata[os_subtitle][DIRECTORY]) - os.makedirs(filepath, exist_ok=True) + # re-adjust text to correct overcorrections + jinja_text = re.sub('"' + OS + '"', OS, jinja_text) + + if LINUX_TUTORIAL not in metadata[DIRECTORY]: + with open(TEMP_JINJA_FILE, 'w') as writefile: + writefile.write(jinja_text) + + # split in right way + _, os_specific_text, os_specific_metadata, os_subtitle_order = split_text(TEMP_JINJA_FILE, metadata[MAIN_TITLE], current_paragraph_number=subtitle_order[title_order_number].split('_')[-1], OS=OS) - # write to files - write_files(os_subtitle, os_specific_text[os_subtitle], os_specific_metadata, total_subtitle_order, os_i + title_order_number, filepath, OS, paragraph_numbers) else: - # don't write empty files - pass + os.makedirs(LINUX_TUTORIAL, exist_ok=True) + with open(os.path.join(LINUX_TUTORIAL, TEMP_JINJA_FILE), 'w') as writefile: + writefile.write(jinja_text) + + # split in right way + _, os_specific_text, os_specific_metadata, os_subtitle_order = split_text(os.path.join(LINUX_TUTORIAL, TEMP_JINJA_FILE), metadata[MAIN_TITLE], current_paragraph_number=subtitle_order[title_order_number].split('_')[-1], OS=OS) + + # prepare variables to fix metadata + total_subtitle_order = subtitle_order[:title_order_number] + os_subtitle_order + subtitle_order[title_order_number+1:] + all_metadata.update(os_specific_metadata) + + # write to files + for os_i, os_subtitle in enumerate(os_subtitle_order): + # check that file actually has some content + if len(os_specific_text[os_subtitle]) > 0: + # add the links to the metadata + if LINKS in metadata.keys(): + os_specific_metadata[os_subtitle][LINKS] = metadata[LINKS] + + # fix parent in the metadata + parent_i = 0 + parent_depth = os_specific_metadata[os_subtitle][TITLE_DEPTH] - 1 + parent = os_specific_metadata[os_subtitle][MAIN_TITLE] + + while total_subtitle_order[parent_i] != os_subtitle and parent_i != len(total_subtitle_order): + if all_metadata[total_subtitle_order[parent_i]][TITLE_DEPTH] == parent_depth: + parent = total_subtitle_order[parent_i] + parent_i += 1 + + if SPLIT_ON_PARAGRAPHS and parent != os_specific_metadata[os_subtitle][MAIN_TITLE]: + os_specific_metadata[os_subtitle][PARENT_TITLE] = all_metadata[parent][SUBTITLE] + else: + os_specific_metadata[os_subtitle][PARENT_TITLE] = parent + + # fix directory in the metadata if needed + if DEEP_DIRECTORIES: + if parent == os_specific_metadata[os_subtitle][MAIN_TITLE]: + os_specific_metadata[os_subtitle][DIRECTORY] = os.path.join(parent, os_specific_metadata[os_subtitle][SUBTITLE]) + else: + os_specific_metadata[os_subtitle][DIRECTORY] = os.path.join(all_metadata[parent][DIRECTORY], os_specific_metadata[os_subtitle][SUBTITLE]) + + # make a directory to save the files + filepath = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, OS, os_specific_metadata[os_subtitle][DIRECTORY]) + os.makedirs(filepath, exist_ok=True) + + # write to files + write_files(os_subtitle, os_specific_text[os_subtitle], os_specific_metadata, total_subtitle_order, os_i + title_order_number, filepath, OS, paragraph_numbers) + else: + # don't write empty files + pass + else: + # don't split empty texts + pass def main(): @@ -913,7 +923,7 @@ def main(): # filenames_linux = {} # filenames_generic["account.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/account.md" # filenames_generic["example_text_1.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/scripts/HPC_chatbot_preprocessor/tests/example_files/example_text_1.md" - # filenames_linux["beyond_the_basics.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/linux-tutorial/beyond_the_basics.md" + # filenames_linux["common_pitfalls.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/linux-tutorial/common_pitfalls.md" # for loops over all files for filenames in [filenames_generic, filenames_linux]: @@ -977,7 +987,9 @@ def main(): # clean up temporary directories and files shutil.rmtree(COPIES, ignore_errors=True) shutil.rmtree(IF_MANGLED_FILES, ignore_errors=True) - os.remove("jinja_file.txt") + shutil.rmtree(LINUX_TUTORIAL, ignore_errors=True) + if os.path.exists(TEMP_JINJA_FILE): + os.remove(TEMP_JINJA_FILE) ################### run the script ################### @@ -987,24 +999,19 @@ def main(): # adding command-line options parser.add_argument("-st", "--split_on_titles", type=int, default=1, help="Set to 1 if source files should be split on titles of maximum depth title_depth, set to 0 if source files should be split on paragraphs of minimum length paragraph_length (default: 1)") - parser.add_argument("-pl", "--paragraph_length", type=int, default=160, help="Minimum length of a paragraph, only works if split on titles is set to zero (default: 160)") - parser.add_argument("-td", "--title_depth", type=int, default=4, help="Maximum depth of titles that divide the source text into sections, only works if split on titles is set to one (default: 4)") + parser.add_argument("-pl", "--min_paragraph_length", type=int, default=160, help="Minimum length of a paragraph, only works if split on titles is set to zero (default: 160)") + parser.add_argument("-td", "--max_title_depth", type=int, default=4, help="Maximum depth of titles that divide the source text into sections, only works if split on titles is set to one (default: 4)") parser.add_argument("-l", "--links", action="store_true", help="Add links to the output texts") args = parser.parse_args() SPLIT_ON_TITLES = bool(args.split_on_titles) - MIN_PARAGRAPH_LENGTH = args.paragraph_length - MAX_TITLE_DEPTH = args.title_depth + MIN_PARAGRAPH_LENGTH = args.min_paragraph_length + MAX_TITLE_DEPTH = args.max_title_depth INCLUDE_LINKS_IN_PLAINTEXT = args.links SPLIT_ON_PARAGRAPHS = not SPLIT_ON_TITLES DEEP_DIRECTORIES = True and SPLIT_ON_TITLES # Should always be False if SPLIT_ON_TITLES is False - print(SPLIT_ON_TITLES) - print(MIN_PARAGRAPH_LENGTH) - print(MAX_TITLE_DEPTH) - print(INCLUDE_LINKS_IN_PLAINTEXT) - print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.") main() print("Parsing finished successfully") From af9e6cca6ead2ded6ac54500e7e84ed26939aa12 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 23 Aug 2024 12:02:22 +0200 Subject: [PATCH 085/145] clean up --- .../chatbot_parser.py | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 9b6fced36364..ca861b86e815 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -676,7 +676,7 @@ def make_valid_title(title): return valid_filename -def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number, paragraph_numbers): +def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number): """ Function that writes text and metadata of a generic (non-os-specific) file @@ -685,7 +685,6 @@ def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, :param paragraphs_metadata: dictionary containing the metadata for all paragraphs of text :param title_order: list containing all subtitles in order :param title_order_number: order number of the title of the section that is being written - :param paragraph_numbers: dictionary keeping track of the amount of paragraphs that have been written for each OS :return: """ @@ -694,13 +693,13 @@ def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, filepath = os.path.join(PARSED_MDS, GENERIC_DIR, paragraphs_metadata[title][DIRECTORY]) os.makedirs(filepath, exist_ok=True) - write_files(title, paragraphs_text[title], paragraphs_metadata, title_order, title_order_number, filepath, OS=GENERIC, paragraph_numbers=paragraph_numbers) + write_files(title, paragraphs_text[title], paragraphs_metadata, title_order, title_order_number, filepath, OS=GENERIC) else: # don't write empty files pass -def write_files(title, text, paragraphs_metadata, title_order, title_order_number, filepath, OS, paragraph_numbers): +def write_files(title, text, paragraphs_metadata, title_order, title_order_number, filepath, OS): """ Function to write files to a certain filepath @@ -711,7 +710,6 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe :param title_order_number: order number of the title of the section that is being written :param filepath: filepath to write files to :param OS: OS to be included in the metadata - :param paragraph_numbers: dictionary keeping track of the amount of paragraphs that have been written for each OS :return: """ @@ -728,18 +726,22 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe writefile.write(text) # write metadata + # add previous subtitle if title_order_number != 0: metadata[PREVIOUS_SUBTITLE] = title_order[title_order_number - 1] else: metadata[PREVIOUS_SUBTITLE] = None + # add next subtitle if title_order_number != len(title_order) - 1: metadata[NEXT_SUBTITLE] = title_order[title_order_number + 1] else: metadata[NEXT_SUBTITLE] = None + # add OS metadata[METADATA_OS] = OS + # add reference link if bool(LINUX_TUTORIAL in paragraphs_metadata[title][DIRECTORY]): linux_part = LINUX_TUTORIAL + "/" else: @@ -750,11 +752,10 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe os_part = LINK_OS[OS] + "/" metadata[REFERENCE_LINK] = DOCS_URL + "/" + os_part + linux_part + paragraphs_metadata[title][MAIN_TITLE] + "/#" + ''.join(char.lower() for char in paragraphs_metadata[title][SUBTITLE] if char.isalnum() or char == '-').strip('-') + # write metadata to file with open(os.path.join(filepath, file_title + METADATA_EXTENSION + ".json"), 'w') as writefile: json.dump(metadata, writefile, indent=4) - paragraph_numbers[OS] += 1 - def insert_links(text, links): """ @@ -782,7 +783,7 @@ def insert_links(text, links): return text, new_links -def split_and_write_os_specific_section(text, metadata, subtitle_order, title_order_number, paragraph_numbers, all_metadata): +def split_and_write_os_specific_section(text, metadata, subtitle_order, title_order_number, all_metadata): """ Function that splits os-specific sections into subtitles, parses them using jinja and writes them away @@ -790,7 +791,6 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or :param metadata: metadata generated for the full os specific section :param subtitle_order: order of the subtitles generated by the splitter :param title_order_number: order number of the section - :param paragraph_numbers: dictionary keeping track of the amount of paragraphs that have been written for each OS :param all_metadata: all metadata generated by the splitter :return: """ @@ -868,7 +868,7 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or os.makedirs(filepath, exist_ok=True) # write to files - write_files(os_subtitle, os_specific_text[os_subtitle], os_specific_metadata, total_subtitle_order, os_i + title_order_number, filepath, OS, paragraph_numbers) + write_files(os_subtitle, os_specific_text[os_subtitle], os_specific_metadata, total_subtitle_order, os_i + title_order_number, filepath, OS) else: # don't write empty files pass @@ -978,11 +978,11 @@ def main(): # generic if subtitle in paragraphs_os_free_text.keys(): - write_generic_file(subtitle, paragraphs_os_free_text, paragraphs_metadata, subtitle_order, i, paragraph_numbers) + write_generic_file(subtitle, paragraphs_os_free_text, paragraphs_metadata, subtitle_order, i) # os-specific else: - split_and_write_os_specific_section(paragraphs_os_text[subtitle], paragraphs_metadata[subtitle], subtitle_order, i, paragraph_numbers, paragraphs_metadata) + split_and_write_os_specific_section(paragraphs_os_text[subtitle], paragraphs_metadata[subtitle], subtitle_order, i, paragraph_numbers) # clean up temporary directories and files shutil.rmtree(COPIES, ignore_errors=True) From f4163a7d3cb94ab4962f7c24d8a78906064d59a6 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 23 Aug 2024 12:04:32 +0200 Subject: [PATCH 086/145] clean up --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index ca861b86e815..6be841ae2e58 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -994,7 +994,7 @@ def main(): ################### run the script ################### if __name__ == '__main__': - parser = argparse.ArgumentParser(description="Preprocessing script for the chatbot") + parser = argparse.ArgumentParser(description="Preprocessing script for the chatbot\n") # adding command-line options From 833f96488dacf631782afe14081202021f50e9f0 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 23 Aug 2024 12:12:48 +0200 Subject: [PATCH 087/145] further clean up and added shebang --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 6be841ae2e58..76627065004d 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python3 + import argparse import copy import json @@ -958,9 +960,6 @@ def main(): # variable that keeps track of the directories that are used to write in at different levels curr_dirs = [filename[:-3] for _ in range(5)] - # dictionary that keeps track of the paragraph numbers - paragraph_numbers = {GENERIC: 1, LINUX: 1, WINDOWS: 1, MACOS: 1} - ################### actually parse the md file ################### # create directories for the source markdown file @@ -982,7 +981,7 @@ def main(): # os-specific else: - split_and_write_os_specific_section(paragraphs_os_text[subtitle], paragraphs_metadata[subtitle], subtitle_order, i, paragraph_numbers) + split_and_write_os_specific_section(paragraphs_os_text[subtitle], paragraphs_metadata[subtitle], subtitle_order, i, paragraphs_metadata) # clean up temporary directories and files shutil.rmtree(COPIES, ignore_errors=True) From 79b1a56d5a4742c5e96663f1b4c0b41fba68728d Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 23 Aug 2024 12:13:50 +0200 Subject: [PATCH 088/145] clean up --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 76627065004d..c7dbe2057373 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -1010,7 +1010,7 @@ def main(): INCLUDE_LINKS_IN_PLAINTEXT = args.links SPLIT_ON_PARAGRAPHS = not SPLIT_ON_TITLES DEEP_DIRECTORIES = True and SPLIT_ON_TITLES # Should always be False if SPLIT_ON_TITLES is False - - print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.") + if DEEP_DIRECTORIES: + print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.") main() print("Parsing finished successfully") From cec154c64ac9cf4bb34cc9e1ccd9f3d96ea656e7 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 23 Aug 2024 13:54:56 +0200 Subject: [PATCH 089/145] added test for if mangler --- .../tests/example_files/example_text_1.md | 31 +++++++++++ .../if_mangler_1_input.md | 4 ++ .../if_mangler_1_output.md | 4 ++ .../if_mangler_2_input.md | 7 +++ .../if_mangler_2_output.md | 7 +++ .../if_mangler_3_input.md | 6 ++ .../if_mangler_3_output.md | 6 ++ .../if_mangler_4_input.md | 4 ++ .../if_mangler_4_output.md | 4 ++ .../if_mangler_5_input.md | 11 ++++ .../if_mangler_5_output.md | 11 ++++ .../if_mangler_6_input.md | 8 +++ .../if_mangler_6_output.md | 8 +++ .../if_mangler_7_input.md | 9 +++ .../if_mangler_7_output.md | 9 +++ .../if_mangler_test_files/if_mangler_input.md | 55 +++++++++++++++++++ .../if_mangler_output.md | 55 +++++++++++++++++++ .../tests/test_if_mangler.py | 32 +++++++++++ 18 files changed, 271 insertions(+) create mode 100644 scripts/HPC_chatbot_preprocessor/tests/example_files/example_text_1.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_1_input.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_1_output.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_2_input.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_2_output.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_3_input.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_3_output.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_4_input.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_4_output.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_5_input.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_5_output.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_6_input.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_6_output.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_7_input.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_7_output.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_input.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_output.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/example_text_1.md b/scripts/HPC_chatbot_preprocessor/tests/example_files/example_text_1.md new file mode 100644 index 000000000000..9b810c3f41af --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/example_files/example_text_1.md @@ -0,0 +1,31 @@ +# Main title + +## Subtitle 1 + +blablabla +blablablabla + +### Subtitle 2 partly generic + +blablabla generic +blablabla generic +{% if OS == windows %}blablabla windows +blablabla windows with a [link](windows.md) + +#### Subtitle 3 Windows specific + +blablabla windows +blablablabla windows +{% else %}blablabla Linux macOS +blablablabla Linux macOS with a [link](linuxmacos.md) + +#### Subtitle 4 Linux and macOS specific + +blablabla Linux macOS +blablablabla Linux macOS +{% endif %} +blablabla generic with a [link](generic.md) + +## Subtitle 5 generic + +blablabla \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_1_input.md b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_1_input.md new file mode 100644 index 000000000000..6a74b3c0181b --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_1_input.md @@ -0,0 +1,4 @@ +test1: OS_IF +{% if OS == windows %} +test1 +{% endif %} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_1_output.md b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_1_output.md new file mode 100644 index 000000000000..2f9cdc38294b --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_1_output.md @@ -0,0 +1,4 @@ +test1: OS_IF +{-if-% if OS == windows %-if-} +test1 +{-if-% endif %-if-} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_2_input.md b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_2_input.md new file mode 100644 index 000000000000..360a4a59ba38 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_2_input.md @@ -0,0 +1,7 @@ +test2: OS_IF in NON_OS_IF +{% if site == Gent %} +test2 +{% if OS == windows %} +test2 +{% endif %} +{% endif %} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_2_output.md b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_2_output.md new file mode 100644 index 000000000000..798dcf6db24a --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_2_output.md @@ -0,0 +1,7 @@ +test2: OS_IF in NON_OS_IF +{% if site == Gent %} +test2 +{-if-% if OS == windows %-if-} +test2 +{-if-% endif %-if-} +{% endif %} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_3_input.md b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_3_input.md new file mode 100644 index 000000000000..d93125a59716 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_3_input.md @@ -0,0 +1,6 @@ +test3: OS_IF with else +{% if OS == linux %} +test3 +{% else %} +test3 +{% endif %} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_3_output.md b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_3_output.md new file mode 100644 index 000000000000..02141961338d --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_3_output.md @@ -0,0 +1,6 @@ +test3: OS_IF with else +{-if-% if OS == linux %-if-} +test3 +{-if-% else %-if-} +test3 +{-if-% endif %-if-} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_4_input.md b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_4_input.md new file mode 100644 index 000000000000..cc15fae1df11 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_4_input.md @@ -0,0 +1,4 @@ +test4: OS_IF with wrong syntax +{ if OS == macos } +test4 +{ endif } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_4_output.md b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_4_output.md new file mode 100644 index 000000000000..cc15fae1df11 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_4_output.md @@ -0,0 +1,4 @@ +test4: OS_IF with wrong syntax +{ if OS == macos } +test4 +{ endif } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_5_input.md b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_5_input.md new file mode 100644 index 000000000000..bdb288474e24 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_5_input.md @@ -0,0 +1,11 @@ +test5: OS_IF in OS_IF +{% if OS == windows %} +test5 +{% else %} +{% if OS == linux %} +test5 +{% else %} +test5 +{% endif %} +test5 +{% endif %} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_5_output.md b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_5_output.md new file mode 100644 index 000000000000..10443eb67a4f --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_5_output.md @@ -0,0 +1,11 @@ +test5: OS_IF in OS_IF +{-if-% if OS == windows %-if-} +test5 +{-if-% else %-if-} +{-if-% if OS == linux %-if-} +test5 +{-if-% else %-if-} +test5 +{-if-% endif %-if-} +test5 +{-if-% endif %-if-} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_6_input.md b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_6_input.md new file mode 100644 index 000000000000..0731ee3588ce --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_6_input.md @@ -0,0 +1,8 @@ +test6: NON_OS_IF in OS_IF +{% if OS == macos %} +test6 +{% if site == Gent %} +test6 +{% endif %} +test6 +{% endif %} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_6_output.md b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_6_output.md new file mode 100644 index 000000000000..cd37117cb004 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_6_output.md @@ -0,0 +1,8 @@ +test6: NON_OS_IF in OS_IF +{-if-% if OS == macos %-if-} +test6 +{% if site == Gent %} +test6 +{% endif %} +test6 +{-if-% endif %-if-} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_7_input.md b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_7_input.md new file mode 100644 index 000000000000..6a72a338527a --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_7_input.md @@ -0,0 +1,9 @@ +test7: weird spacing and dashes + {%if OS == windows %} + test7 +{%- else%} + test7 + {% if OS == linux%} +test7 + {%-endif %} +{%endif%} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_7_output.md b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_7_output.md new file mode 100644 index 000000000000..dfe342ebfb14 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_7_output.md @@ -0,0 +1,9 @@ +test7: weird spacing and dashes + {-if-%if OS == windows %-if-} + test7 +{-if-%- else%-if-} + test7 + {-if-% if OS == linux%-if-} +test7 + {-if-%-endif %-if-} +{-if-%endif%-if-} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_input.md b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_input.md new file mode 100644 index 000000000000..fb8c1f8b5396 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_input.md @@ -0,0 +1,55 @@ +test1: OS_IF +{% if OS == windows %} +test1 +{% endif %} + +test2: OS_IF in NON_OS_IF +{% if site == Gent %} +test2 +{% if OS == windows %} +test2 +{% endif %} +{% endif %} + +test3: OS_IF with else +{% if OS == linux %} +test3 +{% else %} +test3 +{% endif %} + +test4: OS_IF with wrong syntax +{ if OS == macos } +test4 +{ endif } + +test5: OS_IF in OS_IF +{% if OS == windows %} +test5 +{% else %} +{% if OS == linux %} +test5 +{% else %} +test5 +{% endif %} +test5 +{% endif %} + +test6: NON_OS_IF in OS_IF +{% if OS == macos %} +test6 +{% if site == Gent %} +test6 +{% endif %} +test6 +{% endif %} + +test7: weird spacing and dashes + {%if OS == windows %} + test7 +{%- else%} + test7 + {% if OS == linux%} +test7 + {%-endif %} +{%endif%} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_output.md b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_output.md new file mode 100644 index 000000000000..796e94348fa2 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_output.md @@ -0,0 +1,55 @@ +test1: OS_IF +{-if-% if OS == windows %-if-} +test1 +{-if-% endif %-if-} + +test2: OS_IF in NON_OS_IF +{% if site == Gent %} +test2 +{-if-% if OS == windows %-if-} +test2 +{-if-% endif %-if-} +{% endif %} + +test3: OS_IF with else +{-if-% if OS == linux %-if-} +test3 +{-if-% else %-if-} +test3 +{-if-% endif %-if-} + +test4: OS_IF with wrong syntax +{ if OS == macos } +test4 +{ endif } + +test5: OS_IF in OS_IF +{-if-% if OS == windows %-if-} +test5 +{-if-% else %-if-} +{-if-% if OS == linux %-if-} +test5 +{-if-% else %-if-} +test5 +{-if-% endif %-if-} +test5 +{-if-% endif %-if-} + +test6: NON_OS_IF in OS_IF +{-if-% if OS == macos %-if-} +test6 +{% if site == Gent %} +test6 +{% endif %} +test6 +{-if-% endif %-if-} + +test7: weird spacing and dashes + {-if-%if OS == windows %-if-} + test7 +{-if-%- else%-if-} + test7 + {-if-% if OS == linux%-if-} +test7 + {-if-%-endif %-if-} +{-if-%endif%-if-} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py b/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py new file mode 100644 index 000000000000..17053fe705c1 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py @@ -0,0 +1,32 @@ +import pytest +import os +import shutil +from chatbot_parser import mangle_ifs + + +@pytest.mark.parametrize("input_file,output_file", [ + ("if_mangler_1_input.md", "if_mangler_1_output.md"), + ("if_mangler_2_input.md", "if_mangler_2_output.md"), + ("if_mangler_3_input.md", "if_mangler_3_output.md"), + ("if_mangler_4_input.md", "if_mangler_4_output.md"), + ("if_mangler_5_input.md", "if_mangler_5_output.md"), + ("if_mangler_6_input.md", "if_mangler_6_output.md"), + ("if_mangler_7_input.md", "if_mangler_7_output.md") +]) +def test_if_mangler(input_file, output_file): + # make directory + os.makedirs(os.path.join("if_mangled_files"), exist_ok=True) + + # make filepaths + input_file_path = os.path.join("tests", "example_files", "if_mangler_test_files", input_file) + expected_output_file_path = os.path.join("tests", "example_files", "if_mangler_test_files", output_file) + actual_output_file_path = os.path.join("if_mangled_files", input_file) + mangle_ifs(input_file_path, input_file) + + # check every line + with open(expected_output_file_path, "r") as expected_read_file: + with open(actual_output_file_path, "r") as actual_read_file: + assert all([expected_line == actual_line for expected_line, actual_line in zip(expected_read_file, actual_read_file)]) + + # remove directory + shutil.rmtree("if_mangled_files", ignore_errors=True) From 2f4a277677ea9f20e8bad455b0e583dd1bf5b028 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 23 Aug 2024 16:26:10 +0200 Subject: [PATCH 090/145] clean up --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index c7dbe2057373..d91cd0df7d76 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -890,17 +890,9 @@ def main(): shutil.rmtree(IF_MANGLED_FILES, ignore_errors=True) # make the necessary directories - if not os.path.exists(COPIES): - os.mkdir(COPIES) - - if not os.path.exists(os.path.join(COPIES, LINUX_TUTORIAL)): - os.mkdir(os.path.join(COPIES, LINUX_TUTORIAL)) - - if not os.path.exists(PARSED_MDS): - os.mkdir(PARSED_MDS) - - if not os.path.exists(IF_MANGLED_FILES): - os.mkdir(IF_MANGLED_FILES) + for directory in [COPIES, os.path.join(COPIES, LINUX_TUTORIAL), PARSED_MDS, IF_MANGLED_FILES]: + if not os.path.exists(directory): + os.makedirs(directory) ################### define loop-invariant variables ################### From cd0c8ebad9ddc2ec25ef987d88d945215cdf5070 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 23 Aug 2024 17:20:20 +0200 Subject: [PATCH 091/145] clean up customizable options --- .../chatbot_parser.py | 132 ++++++++++-------- 1 file changed, 72 insertions(+), 60 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index d91cd0df7d76..c262f1127594 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -12,13 +12,13 @@ from jinja2 import FileSystemLoader, Environment, ChoiceLoader, FunctionLoader, Template #################### define macro's #################### -# customizable macros (customization made possible at the bottom of the script) -SPLIT_ON_TITLES = True -MIN_PARAGRAPH_LENGTH = 160 -MAX_TITLE_DEPTH = 4 -INCLUDE_LINKS_IN_PLAINTEXT = False -SPLIT_ON_PARAGRAPHS = not SPLIT_ON_TITLES -DEEP_DIRECTORIES = True and SPLIT_ON_TITLES # Should always be False if SPLIT_ON_TITLES is False +# options +SPLIT_ON_TITLES = "SPLIT_ON_TITLES" +MIN_PARAGRAPH_LENGTH = "MIN_PARAGRAPH_LENGTH" +MAX_TITLE_DEPTH = "MAX_TITLE_DEPTH" +INCLUDE_LINKS_IN_PLAINTEXT = "INCLUDE_LINKS_IN_PLAINTEXT" +SPLIT_ON_PARAGRAPHS = "SPLIT_ON_PARAGRAPHS" +DEEP_DIRECTORIES = "DEEP_DIRECTORIES" # directories PARSED_MDS = "parsed_mds" @@ -99,24 +99,25 @@ ################### define functions ################### -def check_for_title(line, in_code_block, curr_dirs): +def check_for_title(line, in_code_block, curr_dirs, options): """ function that checks for titles in the current line. Used by split_text to split the text among the subtitles :param line: the current line to be checked for a title :param in_code_block: boolean indicating whether the current line is part of a codeblock to be sure comments aren't counted as titles :param curr_dirs: the current working directories for each level of subtitle, to be updated when a new title is found + :param options: dictionary containing the options given by the user :return title_length: The amount of hashtags in front of the title on the current line """ # detect titles match = re.match(r'^#+ ', line) - if match and len(match.group(0)) <= MAX_TITLE_DEPTH + 1 and not in_code_block: + if match and len(match.group(0)) <= options[MAX_TITLE_DEPTH] + 1 and not in_code_block: title_length = len(match.group(0)) - 1 - if DEEP_DIRECTORIES: + if options[DEEP_DIRECTORIES]: curr_dirs[title_length] = os.path.join(curr_dirs[title_length - 1], make_valid_title(line[title_length + 1:-1].replace(' ', '-'))) # update the higher order current directories - for i in range(title_length + 1, MAX_TITLE_DEPTH + 1): + for i in range(title_length + 1, options[MAX_TITLE_DEPTH] + 1): curr_dirs[i] = curr_dirs[title_length] return title_length @@ -225,12 +226,13 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title): return curr_line, linklist -def split_text(file, main_title, current_paragraph_number=-1, OS=GENERIC): +def split_text(file, main_title, options, current_paragraph_number=-1, OS=GENERIC): """ Function that splits the text into smaller sections and makes them into two dictionaries containing text and metadata :param file: the filepath of the file to be split :param main_title: the main title of the file + :param options: dictionary containing the options given by the user :param current_paragraph_number: number of the paragraph that is being split, only applicable when splitting an os-specific paragraph on paragraph level :param OS: the OS of the file to be split, only applicable when splitting an os-specific paragraph on paragraph level :return paragraphs_text: dictionary containing the split sections of text @@ -238,18 +240,19 @@ def split_text(file, main_title, current_paragraph_number=-1, OS=GENERIC): :return subtitle_order: list containing all encountered subtitles in order of appearance """ - if SPLIT_ON_TITLES: - return split_on_titles(file, main_title) - elif SPLIT_ON_PARAGRAPHS: - return split_on_paragraphs(file, main_title, current_paragraph_number, OS) + if options[SPLIT_ON_TITLES]: + return split_on_titles(file, main_title, options) + elif options[SPLIT_ON_PARAGRAPHS]: + return split_on_paragraphs(file, main_title, options, current_paragraph_number, OS) -def split_on_titles(file, main_title): +def split_on_titles(file, main_title, options): """ Function that splits the text into smaller sections based on the subtitle structure and makes them into two dictionaries containing text and metadata :param file: the filepath of the file to be split :param main_title: the main title of the file + :param options: dictionary containing the options given by the user :return paragraphs_text: dictionary containing the split sections of text :return paragraphs_metadata: dictionary containing the metadata of each split section of text :return subtitle_order: list containing all encountered subtitles in order of appearance @@ -282,9 +285,9 @@ def split_on_titles(file, main_title): # list to keep track of most recent directories on each title level if LINUX_TUTORIAL not in file: - curr_dirs = [main_title for _ in range(MAX_TITLE_DEPTH + 1)] + curr_dirs = [main_title for _ in range(options[MAX_TITLE_DEPTH] + 1)] else: - curr_dirs = [os.path.join(LINUX_TUTORIAL, main_title) for _ in range(MAX_TITLE_DEPTH + 1)] + curr_dirs = [os.path.join(LINUX_TUTORIAL, main_title) for _ in range(options[MAX_TITLE_DEPTH] + 1)] with open(file, 'r') as readfile: @@ -296,7 +299,7 @@ def split_on_titles(file, main_title): # only split up if current line is in a fully non-os-specific section if in_if_statement == 0: - title_level = check_for_title(line, in_code_block, curr_dirs) + title_level = check_for_title(line, in_code_block, curr_dirs, options) # detect codeblocks to make sure titles aren't detected in them if '```' in line or (('
' in line) ^ ('
' in line)): @@ -355,12 +358,13 @@ def split_on_titles(file, main_title): return paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order -def split_on_paragraphs(file, main_title, current_paragraph_number=-1, OS=GENERIC): +def split_on_paragraphs(file, main_title, options, current_paragraph_number=-1, OS=GENERIC): """ Function that splits the text into smaller sections based on the paragraph structure and makes them into two dictionaries containing text and metadata :param file: the filepath of the file to be split :param main_title: the main title of the file + :param options: dictionary containing the options given by the user :param current_paragraph_number: number of the paragraph that is being split, only applicable when splitting an os-specific paragraph :param OS: the OS of the file to be split, only applicable when splitting an os-specific paragraph :return paragraphs_text: dictionary containing the split sections of text @@ -398,9 +402,9 @@ def split_on_paragraphs(file, main_title, current_paragraph_number=-1, OS=GENERI # list to keep track of most recent directories on each title level if LINUX_TUTORIAL not in file: - curr_dirs = [main_title for _ in range(MAX_TITLE_DEPTH + 1)] + curr_dirs = [main_title for _ in range(options[MAX_TITLE_DEPTH] + 1)] else: - curr_dirs = [os.path.join(LINUX_TUTORIAL, main_title) for _ in range(MAX_TITLE_DEPTH + 1)] + curr_dirs = [os.path.join(LINUX_TUTORIAL, main_title) for _ in range(options[MAX_TITLE_DEPTH] + 1)] with open(file, 'r') as readfile: @@ -413,14 +417,14 @@ def split_on_paragraphs(file, main_title, current_paragraph_number=-1, OS=GENERI # only split up if current line is in a fully non-os-specific section if in_if_statement == 0: - title_level = check_for_title(line, in_code_block, curr_dirs) + title_level = check_for_title(line, in_code_block, curr_dirs, options) # detect codeblocks to make sure titles aren't detected in them if '```' in line or (('
' in line) ^ ('
' in line)): in_code_block = not in_code_block # check whether a new paragraph should be started - if line == "\n" and len(re.sub(r'\{' + IF_MANGLED_PART + '%.*?%' + IF_MANGLED_PART + '}', "", current_paragraph)) >= MIN_PARAGRAPH_LENGTH and not in_code_block: + if line == "\n" and len(re.sub(r'\{' + IF_MANGLED_PART + '%.*?%' + IF_MANGLED_PART + '}', "", current_paragraph)) >= options[MIN_PARAGRAPH_LENGTH] and not in_code_block: # create a title for the previous paragraph if current_paragraph_number == -1: @@ -678,7 +682,7 @@ def make_valid_title(title): return valid_filename -def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number): +def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number, options): """ Function that writes text and metadata of a generic (non-os-specific) file @@ -687,6 +691,7 @@ def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, :param paragraphs_metadata: dictionary containing the metadata for all paragraphs of text :param title_order: list containing all subtitles in order :param title_order_number: order number of the title of the section that is being written + :param options: dictionary containing the options given by the user :return: """ @@ -695,13 +700,13 @@ def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, filepath = os.path.join(PARSED_MDS, GENERIC_DIR, paragraphs_metadata[title][DIRECTORY]) os.makedirs(filepath, exist_ok=True) - write_files(title, paragraphs_text[title], paragraphs_metadata, title_order, title_order_number, filepath, OS=GENERIC) + write_files(title, paragraphs_text[title], paragraphs_metadata, title_order, title_order_number, filepath, GENERIC, options) else: # don't write empty files pass -def write_files(title, text, paragraphs_metadata, title_order, title_order_number, filepath, OS): +def write_files(title, text, paragraphs_metadata, title_order, title_order_number, filepath, OS, options): """ Function to write files to a certain filepath @@ -712,6 +717,7 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe :param title_order_number: order number of the title of the section that is being written :param filepath: filepath to write files to :param OS: OS to be included in the metadata + :param options: dictionary containing the options given by the user :return: """ @@ -722,7 +728,7 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe # write text file with open(os.path.join(filepath, file_title + ".txt"), 'w') as writefile: if LINKS in paragraphs_metadata[title].keys(): - adapted_text, metadata[LINKS] = insert_links(text, metadata[LINKS]) + adapted_text, metadata[LINKS] = insert_links(text, metadata[LINKS], options) writefile.write(adapted_text) else: writefile.write(text) @@ -759,12 +765,13 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe json.dump(metadata, writefile, indent=4) -def insert_links(text, links): +def insert_links(text, links, options): """ Function that inserts links in the plaintext or takes out the references to the links depending on the value of INCLUDE_LINKS_IN_PLAINTEXT :param text: The plaintext that needs to be adapted :param links: The links that might need to be inserted + :param options: dictionary containing the options given by the user :return text: The adapted plaintext :return links: The links that were actually present in the text """ @@ -773,7 +780,7 @@ def insert_links(text, links): new_links = {} for link_number in re.finditer(LINK_MARKER + r'([0-9]*?)' + LINK_MARKER, text): present_links.append(link_number.group(1)) - if INCLUDE_LINKS_IN_PLAINTEXT: + if options[INCLUDE_LINKS_IN_PLAINTEXT]: text = re.sub(LINK_MARKER + link_number.group(1) + LINK_MARKER, " " + links[link_number.group(1)] + " ", text) else: text = re.sub(LINK_MARKER + link_number.group(1) + LINK_MARKER, "", text) @@ -785,7 +792,7 @@ def insert_links(text, links): return text, new_links -def split_and_write_os_specific_section(text, metadata, subtitle_order, title_order_number, all_metadata): +def split_and_write_os_specific_section(text, metadata, subtitle_order, title_order_number, all_metadata, options): """ Function that splits os-specific sections into subtitles, parses them using jinja and writes them away @@ -794,6 +801,7 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or :param subtitle_order: order of the subtitles generated by the splitter :param title_order_number: order number of the section :param all_metadata: all metadata generated by the splitter + :param options: dictionary containing the options given by the user :return: """ @@ -821,7 +829,7 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or writefile.write(jinja_text) # split in right way - _, os_specific_text, os_specific_metadata, os_subtitle_order = split_text(TEMP_JINJA_FILE, metadata[MAIN_TITLE], current_paragraph_number=subtitle_order[title_order_number].split('_')[-1], OS=OS) + _, os_specific_text, os_specific_metadata, os_subtitle_order = split_text(TEMP_JINJA_FILE, metadata[MAIN_TITLE], options, current_paragraph_number=subtitle_order[title_order_number].split('_')[-1], OS=OS) else: os.makedirs(LINUX_TUTORIAL, exist_ok=True) @@ -829,7 +837,7 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or writefile.write(jinja_text) # split in right way - _, os_specific_text, os_specific_metadata, os_subtitle_order = split_text(os.path.join(LINUX_TUTORIAL, TEMP_JINJA_FILE), metadata[MAIN_TITLE], current_paragraph_number=subtitle_order[title_order_number].split('_')[-1], OS=OS) + _, os_specific_text, os_specific_metadata, os_subtitle_order = split_text(os.path.join(LINUX_TUTORIAL, TEMP_JINJA_FILE), metadata[MAIN_TITLE], options, current_paragraph_number=subtitle_order[title_order_number].split('_')[-1], OS=OS) # prepare variables to fix metadata total_subtitle_order = subtitle_order[:title_order_number] + os_subtitle_order + subtitle_order[title_order_number+1:] @@ -853,13 +861,13 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or parent = total_subtitle_order[parent_i] parent_i += 1 - if SPLIT_ON_PARAGRAPHS and parent != os_specific_metadata[os_subtitle][MAIN_TITLE]: + if options[SPLIT_ON_PARAGRAPHS] and parent != os_specific_metadata[os_subtitle][MAIN_TITLE]: os_specific_metadata[os_subtitle][PARENT_TITLE] = all_metadata[parent][SUBTITLE] else: os_specific_metadata[os_subtitle][PARENT_TITLE] = parent # fix directory in the metadata if needed - if DEEP_DIRECTORIES: + if options[DEEP_DIRECTORIES]: if parent == os_specific_metadata[os_subtitle][MAIN_TITLE]: os_specific_metadata[os_subtitle][DIRECTORY] = os.path.join(parent, os_specific_metadata[os_subtitle][SUBTITLE]) else: @@ -870,7 +878,7 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or os.makedirs(filepath, exist_ok=True) # write to files - write_files(os_subtitle, os_specific_text[os_subtitle], os_specific_metadata, total_subtitle_order, os_i + title_order_number, filepath, OS) + write_files(os_subtitle, os_specific_text[os_subtitle], os_specific_metadata, total_subtitle_order, os_i + title_order_number, filepath, OS, options) else: # don't write empty files pass @@ -884,6 +892,28 @@ def main(): main function :return: """ + parser = argparse.ArgumentParser(description="Preprocessing script for the chatbot\n") + + # adding command-line options + + parser.add_argument("-st", "--split_on_titles", type=int, default=1, help="Set to 1 if source files should be split on titles of maximum depth title_depth, set to 0 if source files should be split on paragraphs of minimum length paragraph_length (default: 1)") + parser.add_argument("-pl", "--min_paragraph_length", type=int, default=160, help="Minimum length of a paragraph, only works if split on titles is set to zero (default: 160)") + parser.add_argument("-td", "--max_title_depth", type=int, default=4, help="Maximum depth of titles that divide the source text into sections, only works if split on titles is set to one (default: 4)") + parser.add_argument("-l", "--links", action="store_true", help="Add links to the output texts") + parser.add_argument("-dd", "--deep_directories", action="store_true", help="Generate a nested directory structure following the structure of the subtitles. Only works if split on titles is set to one") + + args = parser.parse_args() + + options = {SPLIT_ON_TITLES: bool(args.split_on_titles), + SPLIT_ON_PARAGRAPHS: not args.split_on_titles, + MIN_PARAGRAPH_LENGTH: args.min_paragraph_length, + MAX_TITLE_DEPTH: args.max_title_depth, + INCLUDE_LINKS_IN_PLAINTEXT: args.links, + DEEP_DIRECTORIES: args.deep_directories and args.split_on_titles} + + if options[DEEP_DIRECTORIES]: + print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.") + # remove the directories from a previous run of the parser if they weren't cleaned up properly for some reason shutil.rmtree(PARSED_MDS, ignore_errors=True) shutil.rmtree(COPIES, ignore_errors=True) @@ -962,18 +992,18 @@ def main(): jinja_parser(filename, copy_file) # split the text in paragraphs - paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order = split_text(copy_file, main_title) + paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order = split_text(copy_file, main_title, options) # for every section, either make the whole section generic, or create an os-specific file for each OS for i, subtitle in enumerate(subtitle_order): # generic if subtitle in paragraphs_os_free_text.keys(): - write_generic_file(subtitle, paragraphs_os_free_text, paragraphs_metadata, subtitle_order, i) + write_generic_file(subtitle, paragraphs_os_free_text, paragraphs_metadata, subtitle_order, i, options) # os-specific else: - split_and_write_os_specific_section(paragraphs_os_text[subtitle], paragraphs_metadata[subtitle], subtitle_order, i, paragraphs_metadata) + split_and_write_os_specific_section(paragraphs_os_text[subtitle], paragraphs_metadata[subtitle], subtitle_order, i, paragraphs_metadata, options) # clean up temporary directories and files shutil.rmtree(COPIES, ignore_errors=True) @@ -982,27 +1012,9 @@ def main(): if os.path.exists(TEMP_JINJA_FILE): os.remove(TEMP_JINJA_FILE) + print("Parsing finished successfully") + ################### run the script ################### if __name__ == '__main__': - parser = argparse.ArgumentParser(description="Preprocessing script for the chatbot\n") - - # adding command-line options - - parser.add_argument("-st", "--split_on_titles", type=int, default=1, help="Set to 1 if source files should be split on titles of maximum depth title_depth, set to 0 if source files should be split on paragraphs of minimum length paragraph_length (default: 1)") - parser.add_argument("-pl", "--min_paragraph_length", type=int, default=160, help="Minimum length of a paragraph, only works if split on titles is set to zero (default: 160)") - parser.add_argument("-td", "--max_title_depth", type=int, default=4, help="Maximum depth of titles that divide the source text into sections, only works if split on titles is set to one (default: 4)") - parser.add_argument("-l", "--links", action="store_true", help="Add links to the output texts") - - args = parser.parse_args() - - SPLIT_ON_TITLES = bool(args.split_on_titles) - MIN_PARAGRAPH_LENGTH = args.min_paragraph_length - MAX_TITLE_DEPTH = args.max_title_depth - INCLUDE_LINKS_IN_PLAINTEXT = args.links - SPLIT_ON_PARAGRAPHS = not SPLIT_ON_TITLES - DEEP_DIRECTORIES = True and SPLIT_ON_TITLES # Should always be False if SPLIT_ON_TITLES is False - if DEEP_DIRECTORIES: - print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.") main() - print("Parsing finished successfully") From 3be262a84c2a574239a554a08b1e760322b470ee Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 26 Aug 2024 09:57:58 +0200 Subject: [PATCH 092/145] further adapt the script to be able to test it --- .../chatbot_parser.py | 41 ++++++++++--------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index c262f1127594..951fea423020 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -887,29 +887,11 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or pass -def main(): +def main(options): """ main function :return: """ - parser = argparse.ArgumentParser(description="Preprocessing script for the chatbot\n") - - # adding command-line options - - parser.add_argument("-st", "--split_on_titles", type=int, default=1, help="Set to 1 if source files should be split on titles of maximum depth title_depth, set to 0 if source files should be split on paragraphs of minimum length paragraph_length (default: 1)") - parser.add_argument("-pl", "--min_paragraph_length", type=int, default=160, help="Minimum length of a paragraph, only works if split on titles is set to zero (default: 160)") - parser.add_argument("-td", "--max_title_depth", type=int, default=4, help="Maximum depth of titles that divide the source text into sections, only works if split on titles is set to one (default: 4)") - parser.add_argument("-l", "--links", action="store_true", help="Add links to the output texts") - parser.add_argument("-dd", "--deep_directories", action="store_true", help="Generate a nested directory structure following the structure of the subtitles. Only works if split on titles is set to one") - - args = parser.parse_args() - - options = {SPLIT_ON_TITLES: bool(args.split_on_titles), - SPLIT_ON_PARAGRAPHS: not args.split_on_titles, - MIN_PARAGRAPH_LENGTH: args.min_paragraph_length, - MAX_TITLE_DEPTH: args.max_title_depth, - INCLUDE_LINKS_IN_PLAINTEXT: args.links, - DEEP_DIRECTORIES: args.deep_directories and args.split_on_titles} if options[DEEP_DIRECTORIES]: print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.") @@ -1017,4 +999,23 @@ def main(): ################### run the script ################### if __name__ == '__main__': - main() + parser = argparse.ArgumentParser(description="Preprocessing script for the chatbot\n") + + # adding command-line options + + parser.add_argument("-st", "--split_on_titles", type=int, default=1, help="Set to 1 if source files should be split on titles of maximum depth title_depth, set to 0 if source files should be split on paragraphs of minimum length paragraph_length (default: 1)") + parser.add_argument("-pl", "--min_paragraph_length", type=int, default=160, help="Minimum length of a paragraph, only works if split on titles is set to zero (default: 160)") + parser.add_argument("-td", "--max_title_depth", type=int, default=4, help="Maximum depth of titles that divide the source text into sections, only works if split on titles is set to one (default: 4)") + parser.add_argument("-l", "--links", action="store_true", help="Add links to the output texts") + parser.add_argument("-dd", "--deep_directories", action="store_true", help="Generate a nested directory structure following the structure of the subtitles. Only works if split on titles is set to one") + + args = parser.parse_args() + + options = {SPLIT_ON_TITLES: bool(args.split_on_titles), + SPLIT_ON_PARAGRAPHS: not args.split_on_titles, + MIN_PARAGRAPH_LENGTH: args.min_paragraph_length, + MAX_TITLE_DEPTH: args.max_title_depth, + INCLUDE_LINKS_IN_PLAINTEXT: args.links, + DEEP_DIRECTORIES: args.deep_directories and args.split_on_titles} + + main(options) From 1d32aab468c7d3698c69761c0783efd99196cdf1 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 26 Aug 2024 10:11:32 +0200 Subject: [PATCH 093/145] make changes to usage in command line to be more intuitive --- .../chatbot_parser.py | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 951fea423020..e0741a9a3473 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -1003,19 +1003,19 @@ def main(options): # adding command-line options - parser.add_argument("-st", "--split_on_titles", type=int, default=1, help="Set to 1 if source files should be split on titles of maximum depth title_depth, set to 0 if source files should be split on paragraphs of minimum length paragraph_length (default: 1)") - parser.add_argument("-pl", "--min_paragraph_length", type=int, default=160, help="Minimum length of a paragraph, only works if split on titles is set to zero (default: 160)") - parser.add_argument("-td", "--max_title_depth", type=int, default=4, help="Maximum depth of titles that divide the source text into sections, only works if split on titles is set to one (default: 4)") + parser.add_argument("-st", "--split_on_titles", action="store_true", help="Splits the text based on titles and subtitles instead of paragraphs with a minimum length.") + parser.add_argument("-pl", "--min_paragraph_length", type=int, default=160, help="Minimum length of a paragraph, only works if split on titles is disabled (default: 160)") + parser.add_argument("-td", "--max_title_depth", type=int, default=4, help="Maximum depth of titles that divide the source text into sections, only works if split on titles is enabled (default: 4)") parser.add_argument("-l", "--links", action="store_true", help="Add links to the output texts") - parser.add_argument("-dd", "--deep_directories", action="store_true", help="Generate a nested directory structure following the structure of the subtitles. Only works if split on titles is set to one") + parser.add_argument("-dd", "--deep_directories", action="store_true", help="Generate a nested directory structure following the structure of the subtitles. Only works if split on titles is enabled") args = parser.parse_args() - options = {SPLIT_ON_TITLES: bool(args.split_on_titles), - SPLIT_ON_PARAGRAPHS: not args.split_on_titles, - MIN_PARAGRAPH_LENGTH: args.min_paragraph_length, - MAX_TITLE_DEPTH: args.max_title_depth, - INCLUDE_LINKS_IN_PLAINTEXT: args.links, - DEEP_DIRECTORIES: args.deep_directories and args.split_on_titles} - - main(options) + options_dict = {SPLIT_ON_TITLES: args.split_on_titles, + SPLIT_ON_PARAGRAPHS: not args.split_on_titles, + MIN_PARAGRAPH_LENGTH: args.min_paragraph_length, + MAX_TITLE_DEPTH: args.max_title_depth, + INCLUDE_LINKS_IN_PLAINTEXT: args.links, + DEEP_DIRECTORIES: args.deep_directories and args.split_on_titles} + + main(options_dict) From 5902c96c19985f4225a34f7d081e294482bedcce Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 26 Aug 2024 10:37:51 +0200 Subject: [PATCH 094/145] first revised version of the README --- scripts/HPC_chatbot_preprocessor/README.md | 67 ++++++++++++++++------ 1 file changed, 48 insertions(+), 19 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md index 55996e0bef53..86bea6b9ed77 100644 --- a/scripts/HPC_chatbot_preprocessor/README.md +++ b/scripts/HPC_chatbot_preprocessor/README.md @@ -1,6 +1,44 @@ # Chatbot parser -`chatbot_parser.py` is a script that transforms the markdown sourcefiles into a structured directory as input for a chatbot. +`chatbot_parser.py` is a script that transforms the markdown sourcefiles into a structured directory as input for a chatbot. + +## Usage + +The script can be ran in a shell environment with the following command: + +```shell +python chatbot_parser.py +``` + +This command has the following possible options: + +```shell +chatbot_parser.py [-h] [-st SPLIT_ON_TITLES] [-pl MIN_PARAGRAPH_LENGTH] [-td MAX_TITLE_DEPTH] [-l] [-dd] +``` + +### `h`/`help` + +Display the help message + +### `st`/`split_on_titles` + +Including this option will split the source files based on the titles and subtitles in the markdown text. Not including this option will split the text on paragraphs with a certain minimum length. + +### `pl`/`min_paragraph_length` + +This option allows the user to configure the minimum length a paragraph must be. Some deviations from this minimum length are possible (for example at the end of a file). The default value for this minimum paragraph length is 160 characters. This options only works if `split_on_titles` is not enabled. + +### `td`/`max_title_depth` + +This option allows the user to configure the maximum "title depth" (the amount of `#` in front) to be used as borders between sections if `split_on_titles` is enabled. The default value is 4. + +### `l`/`links` + +Some of the sourcefiles might contain links. Including this option will retain the links in the plaintext. If this option is not included, the links will be dropped from the plaintext. + +### `dd`/`deep_directories` + +Including this option will make the script generate a "deep directory" where every title encountered will be made into a subdirectory of its parent title (So for example a title with three `#`s will be made a subdirectory of the most recent title with two `#`s). This option only works if `split_on_titles` is enabled. ## Generated file structure @@ -11,22 +49,17 @@ The generated directory structure is written as a subdirectory of `parsed_mds`. Within `os_specific` a further distinction is made for each of the three possible operating systems included in the documentation. -These subdirectories then contain a subdirectory for each individual markdown sourcefile. In the file specific subdirectories, further divisions are made according to the titles and subtitles found in that markdown sourcefile. - -Finally, each of these subtitle-specific subdirectories contains a `.txt` file with the (processed) plaintext of that section and at the end a reference link to the corresponding part of the documentation website on . +Both the generic and each of the three os-specific directories then contain a directory for each source file. -## Requirements +If the option `deep_directories` is not enabled, all paragraphs of the source file and their corresponding metadata will be saved in this directory. The (processed) plaintext of the paragraph is written to a `.txt` file and the metadata is written to a `.json` file. -- The required Python packages are listed in `requirements.txt` -- [Pandoc](https://pandoc.org/installing.html) must be installed and must be added to the system PATH +If the option `deep_directories` is enabled, the directory of each source file will contain a subdirectory structure corresponding to the structure of the subtitles at different levels in the source file. Each subtitle in the source file corresponds to a directory nested in the directory of its parent title (So for example a title with three `#`s will be made a subdirectory of the most recent title with two `#`s). -## Usage +Finally, each of these subtitle-specific subdirectories contains a `.txt` file with the (processed) plaintext of that section and a `.json` file with the metadata of that section. -The script can be ran in a shell environment with the following command: +## Requirements -```shell -python chatbot_parser.py -``` +- The required Python packages are listed in `requirements.txt` ## Restrictions on source-files @@ -102,13 +135,9 @@ endif This will also result in the parser "forgetting" it opened an os-specific if-statement with OS != windows and not properly closing it. -### Allowed html syntax +### html syntax -The script contains a list of html syntax keywords it filters out. If more html syntax keywords are used in the future, it suffices to add them to this list to adapt the script to filter them out. The current list is: -``` -["pre", "b", "code", "sub", "br", "center", "p", "div", "u", "p", "i", "tt", "a", "t", "span"] -``` -The script is also adapted to take into consideration structures like `
` and retain the link. +The input shouldn't contain any html syntax. While some failsafes are in place, the script isn't made with the use case of handling html syntax in mind. ### Markdown comments @@ -121,4 +150,4 @@ Any comments within the markdown files (for example TODO's) should follow the fo ### Long filepaths -Due to the nature of this script, it can generate large directories with very long names. Depending on the operating system, this can cause problems with filepaths being to long, resulting in files not being able to open. A possible fix for this is to make sure the filepath to the script is not too long. +Due to the nature of this script, it can generate large directories with very long names if `deep_directories` is enabled. Depending on the operating system, this can cause problems with filepaths being to long, resulting in files not being able to open. A possible fix for this is to make sure the filepath to where the script is located is not too long. Another solution is lowering the `max_title_depth` or disabling `deep_directories`. From 6e488005ed15345e34878c7a9ee7944d554a42ef Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 26 Aug 2024 11:00:58 +0200 Subject: [PATCH 095/145] added docstring to main function --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index e0741a9a3473..282e16070315 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -890,6 +890,14 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or def main(options): """ main function + + :param options: dictionary containing the options specified by the user to run the script: + {SPLIT_ON_TITLES: boolean indicating whether to split on titles, + SPLIT_ON_PARAGRAPHS: boolean indicating whether to split on paragraphs (should always be the opposite of SPLIT_ON_TITLES), + MIN_PARAGRAPH_LENGTH: integer representing the minimum length of a paragraph, + MAX_TITLE_DEPTH: integer representing the maximum depth of a title for it to be used when splitting the text, + INCLUDE_LINKS_IN_PLAINTEXT: boolean indicating whether links should be included in the plaintext, + DEEP_DIRECTORIES: boolean indicating whether the generated directories should be nested by title-structure or not} :return: """ From 0bc440bc71dc0cb2a01fc799db5566c112f0c481 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 26 Aug 2024 11:22:23 +0200 Subject: [PATCH 096/145] include chatbot_prepprocessor --- scripts/README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/README.md b/scripts/README.md index eed5a73e4d57..a88bd42cc46c 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -1,3 +1,4 @@ Scripts that can be used to automatically generate markdown files, can be found here. -* [`available_software`](available_software): script to generate overview of available environment modules; \ No newline at end of file +* [`available_software`](available_software): script to generate overview of available environment modules; +* [`chatbot_preprocessor`](HPC_chatbot_preprocessor): script to generate input files for the chatbot; \ No newline at end of file From e6e6023c068b8c512af808cb40d4bd1dd68c1603 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 26 Aug 2024 12:26:38 +0200 Subject: [PATCH 097/145] added options for source and destination directories --- scripts/HPC_chatbot_preprocessor/README.md | 10 +- .../chatbot_parser.py | 170 ++++++++---------- 2 files changed, 86 insertions(+), 94 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md index 86bea6b9ed77..82aaa9b7e3c0 100644 --- a/scripts/HPC_chatbot_preprocessor/README.md +++ b/scripts/HPC_chatbot_preprocessor/README.md @@ -13,13 +13,21 @@ python chatbot_parser.py This command has the following possible options: ```shell -chatbot_parser.py [-h] [-st SPLIT_ON_TITLES] [-pl MIN_PARAGRAPH_LENGTH] [-td MAX_TITLE_DEPTH] [-l] [-dd] +chatbot_parser.py [-h] -src SOURCE -dst DESTINATION [-st] [-pl MIN_PARAGRAPH_LENGTH] [-td MAX_TITLE_DEPTH] [-l] [-dd] ``` ### `h`/`help` Display the help message +### `src`/`source` + +This is a required option that specifies the source directory of the input files for the script. This location is also used to look for jinja templates when using jinja to parse the source files (such as the `macros` directory within `vsc_user_docs/mkdocs/docs/HPC`). + +### `dst`/`destination` + +This is a required option that specifies where the output of the script should be written. The script also generates extra intermediate subdirectories, so subdirectories with the following names shouldn't be present in the destination directory: `parsed_mds`, `copies` and `if_mangled_files`. If any of these pose a problem, the name of the intermediate subdirectory used for the script can be changed in the macros at the top of the script. + ### `st`/`split_on_titles` Including this option will split the source files based on the titles and subtitles in the markdown text. Not including this option will split the text on paragraphs with a certain minimum length. diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 282e16070315..245c5d68f514 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -13,6 +13,8 @@ #################### define macro's #################### # options +SOURCE_DIRECTORY = "SOURCE_DIRECTORY" +DESTINATION_DIRECTORY = "DESTINATION_DIRECTORY" SPLIT_ON_TITLES = "SPLIT_ON_TITLES" MIN_PARAGRAPH_LENGTH = "MIN_PARAGRAPH_LENGTH" MAX_TITLE_DEPTH = "MAX_TITLE_DEPTH" @@ -284,10 +286,7 @@ def split_on_titles(file, main_title, options): previous_contained_if = False # list to keep track of most recent directories on each title level - if LINUX_TUTORIAL not in file: - curr_dirs = [main_title for _ in range(options[MAX_TITLE_DEPTH] + 1)] - else: - curr_dirs = [os.path.join(LINUX_TUTORIAL, main_title) for _ in range(options[MAX_TITLE_DEPTH] + 1)] + curr_dirs = [main_title for _ in range(options[MAX_TITLE_DEPTH] + 1)] with open(file, 'r') as readfile: @@ -515,12 +514,13 @@ def write_metadata(main_title, subtitle, links, title_level, directory): return paragraph_metadata -def jinja_parser(filename, copy_location): +def jinja_parser(filename, copy_location, options): """ function that let's jinja do its thing to format the files except for the os-related if-statements :param filename: the name of the file that needs to be formatted using jinja :param copy_location: the location of the file that needs to be formatted using jinja + :param options: dictionary containing the options given by the user :return: """ # YAML file location @@ -539,10 +539,10 @@ def jinja_parser(filename, copy_location): combined_context = {**words_dict, **additional_context} # Mangle the OS-related if-statements - mangle_ifs(copy_location, filename) + mangle_ifs(copy_location, filename, options) # Use Jinja2 to replace the macros - template_loader = ChoiceLoader([FileSystemLoader(searchpath=[IF_MANGLED_FILES, os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR)]), FunctionLoader(load_macros)]) + template_loader = ChoiceLoader([FileSystemLoader(searchpath=[os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES), options[SOURCE_DIRECTORY], os.path.join(options[SOURCE_DIRECTORY], RETURN_DIR)]), FunctionLoader(load_macros)]) templateEnv = Environment(loader=template_loader) template = templateEnv.get_template(filename) rendered_content = template.render(combined_context) @@ -642,18 +642,19 @@ def mangle_os_ifs(line, is_os): return line, is_os -def mangle_ifs(directory, filename): +def mangle_ifs(directory, filename, options): """ function that writes the if-mangled version of a file to a location where the jinja parser will use it :param directory: the directory of the file to be if mangled :param filename: the filename of the file to be mangled + :param options: dictionary containing the options given by the user :return: """ # variable to keep track of latest if-statement scope is_os = NON_OS_IF - with open(os.path.join(IF_MANGLED_FILES, filename), 'w') as write_file: + with open(os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES, filename), 'w') as write_file: with open(directory, 'r') as read_file: for line in read_file: new_line, is_os = mangle_os_ifs(line, is_os) @@ -682,7 +683,7 @@ def make_valid_title(title): return valid_filename -def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number, options): +def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number, options, is_linux_tutorial): """ Function that writes text and metadata of a generic (non-os-specific) file @@ -692,21 +693,22 @@ def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, :param title_order: list containing all subtitles in order :param title_order_number: order number of the title of the section that is being written :param options: dictionary containing the options given by the user + :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial :return: """ if len(paragraphs_text[title]) > 0: # make the directory needed for the files that will be written - filepath = os.path.join(PARSED_MDS, GENERIC_DIR, paragraphs_metadata[title][DIRECTORY]) + filepath = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, GENERIC_DIR, paragraphs_metadata[title][DIRECTORY]) os.makedirs(filepath, exist_ok=True) - write_files(title, paragraphs_text[title], paragraphs_metadata, title_order, title_order_number, filepath, GENERIC, options) + write_files(title, paragraphs_text[title], paragraphs_metadata, title_order, title_order_number, filepath, GENERIC, options, is_linux_tutorial) else: # don't write empty files pass -def write_files(title, text, paragraphs_metadata, title_order, title_order_number, filepath, OS, options): +def write_files(title, text, paragraphs_metadata, title_order, title_order_number, filepath, OS, options, is_linux_tutorial): """ Function to write files to a certain filepath @@ -718,6 +720,7 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe :param filepath: filepath to write files to :param OS: OS to be included in the metadata :param options: dictionary containing the options given by the user + :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial :return: """ @@ -750,7 +753,7 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe metadata[METADATA_OS] = OS # add reference link - if bool(LINUX_TUTORIAL in paragraphs_metadata[title][DIRECTORY]): + if is_linux_tutorial: linux_part = LINUX_TUTORIAL + "/" else: linux_part = "" @@ -792,7 +795,7 @@ def insert_links(text, links, options): return text, new_links -def split_and_write_os_specific_section(text, metadata, subtitle_order, title_order_number, all_metadata, options): +def split_and_write_os_specific_section(text, metadata, subtitle_order, title_order_number, all_metadata, options, is_linux_tutorial): """ Function that splits os-specific sections into subtitles, parses them using jinja and writes them away @@ -802,6 +805,7 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or :param title_order_number: order number of the section :param all_metadata: all metadata generated by the splitter :param options: dictionary containing the options given by the user + :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial :return: """ @@ -824,20 +828,11 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or # re-adjust text to correct overcorrections jinja_text = re.sub('"' + OS + '"', OS, jinja_text) - if LINUX_TUTORIAL not in metadata[DIRECTORY]: - with open(TEMP_JINJA_FILE, 'w') as writefile: - writefile.write(jinja_text) + with open(TEMP_JINJA_FILE, 'w') as writefile: + writefile.write(jinja_text) - # split in right way - _, os_specific_text, os_specific_metadata, os_subtitle_order = split_text(TEMP_JINJA_FILE, metadata[MAIN_TITLE], options, current_paragraph_number=subtitle_order[title_order_number].split('_')[-1], OS=OS) - - else: - os.makedirs(LINUX_TUTORIAL, exist_ok=True) - with open(os.path.join(LINUX_TUTORIAL, TEMP_JINJA_FILE), 'w') as writefile: - writefile.write(jinja_text) - - # split in right way - _, os_specific_text, os_specific_metadata, os_subtitle_order = split_text(os.path.join(LINUX_TUTORIAL, TEMP_JINJA_FILE), metadata[MAIN_TITLE], options, current_paragraph_number=subtitle_order[title_order_number].split('_')[-1], OS=OS) + # split in right way + _, os_specific_text, os_specific_metadata, os_subtitle_order = split_text(TEMP_JINJA_FILE, metadata[MAIN_TITLE], options, current_paragraph_number=subtitle_order[title_order_number].split('_')[-1], OS=OS) # prepare variables to fix metadata total_subtitle_order = subtitle_order[:title_order_number] + os_subtitle_order + subtitle_order[title_order_number+1:] @@ -874,11 +869,11 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or os_specific_metadata[os_subtitle][DIRECTORY] = os.path.join(all_metadata[parent][DIRECTORY], os_specific_metadata[os_subtitle][SUBTITLE]) # make a directory to save the files - filepath = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, OS, os_specific_metadata[os_subtitle][DIRECTORY]) + filepath = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, OS_SPECIFIC_DIR, OS, os_specific_metadata[os_subtitle][DIRECTORY]) os.makedirs(filepath, exist_ok=True) # write to files - write_files(os_subtitle, os_specific_text[os_subtitle], os_specific_metadata, total_subtitle_order, os_i + title_order_number, filepath, OS, options) + write_files(os_subtitle, os_specific_text[os_subtitle], os_specific_metadata, total_subtitle_order, os_i + title_order_number, filepath, OS, options, is_linux_tutorial) else: # don't write empty files pass @@ -905,32 +900,27 @@ def main(options): print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.") # remove the directories from a previous run of the parser if they weren't cleaned up properly for some reason - shutil.rmtree(PARSED_MDS, ignore_errors=True) - shutil.rmtree(COPIES, ignore_errors=True) - shutil.rmtree(IF_MANGLED_FILES, ignore_errors=True) + shutil.rmtree(os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS), ignore_errors=True) + shutil.rmtree(os.path.join(options[DESTINATION_DIRECTORY], COPIES), ignore_errors=True) + shutil.rmtree(os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES), ignore_errors=True) # make the necessary directories - for directory in [COPIES, os.path.join(COPIES, LINUX_TUTORIAL), PARSED_MDS, IF_MANGLED_FILES]: + for directory in [COPIES, PARSED_MDS, IF_MANGLED_FILES]: + directory = os.path.join(options[DESTINATION_DIRECTORY], directory) if not os.path.exists(directory): os.makedirs(directory) ################### define loop-invariant variables ################### # constant that keeps track of the source directories - source_directories = [os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR), - os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, HPC_DIR, LINUX_TUTORIAL)] + source_directory = options[SOURCE_DIRECTORY] # list of all the filenames - filenames_generic = {} - filenames_linux = {} - for source_directory in source_directories: - all_items = os.listdir(source_directory) - files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]] - for file in files: - if LINUX_TUTORIAL in source_directory: - filenames_linux[file] = os.path.join(source_directory, file) - else: - filenames_generic[file] = os.path.join(source_directory, file) + filenames = {} + all_items = os.listdir(source_directory) + files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]] + for file in files: + filenames[file] = os.path.join(source_directory, file) # # Temporary variables to test with just one singular file # filenames_generic = {} @@ -940,65 +930,55 @@ def main(options): # filenames_linux["common_pitfalls.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/linux-tutorial/common_pitfalls.md" # for loops over all files - for filenames in [filenames_generic, filenames_linux]: - for filename in filenames.keys(): - ################### define/reset loop specific variables ################### + for filename in filenames.keys(): + ################### define/reset loop specific variables ################### - # variable that keeps track of whether file is part of the linux tutorial - is_linux_tutorial = bool(LINUX_TUTORIAL in filenames[filename]) + # boolean indicating whether the current file is part of the linux tutorial + is_linux_tutorial = bool(LINUX_TUTORIAL in filenames[filename]) - # make a copy of the original file in order to make sure the original does not get altered - if is_linux_tutorial: - copy_file = os.path.join(COPIES, LINUX_TUTORIAL, filename) - else: - copy_file = os.path.join(COPIES, filename) - shutil.copyfile(filenames[filename], copy_file) - - # variable that keeps track of the directories that are used to write in at different levels - if is_linux_tutorial: - root_dir_generic = os.path.join(PARSED_MDS, GENERIC_DIR, LINUX_TUTORIAL) - root_dir_os_specific_linux = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, LINUX, LINUX_TUTORIAL) - root_dir_os_specific_windows = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, WINDOWS, LINUX_TUTORIAL) - root_dir_os_specific_macos = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, MACOS, LINUX_TUTORIAL) - else: - root_dir_generic = os.path.join(PARSED_MDS, GENERIC_DIR) - root_dir_os_specific_linux = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, LINUX) - root_dir_os_specific_windows = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, WINDOWS) - root_dir_os_specific_macos = os.path.join(PARSED_MDS, OS_SPECIFIC_DIR, MACOS) + # make a copy of the original file in order to make sure the original does not get altered + copy_file = os.path.join(options[DESTINATION_DIRECTORY], COPIES, filename) + shutil.copyfile(filenames[filename], copy_file) - # variable for the main title (needed for reference links) - main_title = filename[:-3] + # variable that keeps track of the directories that are used to write in at different levels + root_dir_generic = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, GENERIC_DIR) + root_dir_os_specific_linux = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, OS_SPECIFIC_DIR, LINUX) + root_dir_os_specific_windows = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, OS_SPECIFIC_DIR, WINDOWS) + root_dir_os_specific_macos = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, OS_SPECIFIC_DIR, MACOS) - # variable that keeps track of the directories that are used to write in at different levels - curr_dirs = [filename[:-3] for _ in range(5)] + # variable for the main title (needed for reference links) + main_title = filename[:-3] - ################### actually parse the md file ################### + # variable that keeps track of the directories that are used to write in at different levels + curr_dirs = [filename[:-3] for _ in range(5)] - # create directories for the source markdown file - for directory in [root_dir_generic, os.path.join(PARSED_MDS, OS_SPECIFIC_DIR), root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos, os.path.join(root_dir_generic, curr_dirs[0]), os.path.join(root_dir_os_specific_linux, curr_dirs[0]), os.path.join(root_dir_os_specific_windows, curr_dirs[0]), os.path.join(root_dir_os_specific_macos, curr_dirs[0])]: - os.makedirs(directory, exist_ok=True) + ################### actually parse the md file ################### - # process the jinja macros - jinja_parser(filename, copy_file) + # create directories for the source markdown file + for directory in [root_dir_generic, os.path.join(PARSED_MDS, OS_SPECIFIC_DIR), root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos, os.path.join(root_dir_generic, curr_dirs[0]), os.path.join(root_dir_os_specific_linux, curr_dirs[0]), os.path.join(root_dir_os_specific_windows, curr_dirs[0]), os.path.join(root_dir_os_specific_macos, curr_dirs[0])]: + os.makedirs(os.path.join(options[DESTINATION_DIRECTORY], directory), exist_ok=True) - # split the text in paragraphs - paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order = split_text(copy_file, main_title, options) + # process the jinja macros + jinja_parser(filename, copy_file, options) - # for every section, either make the whole section generic, or create an os-specific file for each OS - for i, subtitle in enumerate(subtitle_order): + # split the text in paragraphs + paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order = split_text(copy_file, main_title, options) - # generic - if subtitle in paragraphs_os_free_text.keys(): - write_generic_file(subtitle, paragraphs_os_free_text, paragraphs_metadata, subtitle_order, i, options) + # for every section, either make the whole section generic, or create an os-specific file for each OS + for i, subtitle in enumerate(subtitle_order): - # os-specific - else: - split_and_write_os_specific_section(paragraphs_os_text[subtitle], paragraphs_metadata[subtitle], subtitle_order, i, paragraphs_metadata, options) + # generic + if subtitle in paragraphs_os_free_text.keys(): + write_generic_file(subtitle, paragraphs_os_free_text, paragraphs_metadata, subtitle_order, i, options, is_linux_tutorial) + + # os-specific + else: + split_and_write_os_specific_section(paragraphs_os_text[subtitle], paragraphs_metadata[subtitle], subtitle_order, i, paragraphs_metadata, options, is_linux_tutorial) # clean up temporary directories and files - shutil.rmtree(COPIES, ignore_errors=True) - shutil.rmtree(IF_MANGLED_FILES, ignore_errors=True) - shutil.rmtree(LINUX_TUTORIAL, ignore_errors=True) + shutil.rmtree(os.path.join(options[DESTINATION_DIRECTORY], COPIES), ignore_errors=True) + shutil.rmtree(os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES), ignore_errors=True) + shutil.rmtree(os.path.join(options[DESTINATION_DIRECTORY], LINUX_TUTORIAL), ignore_errors=True) if os.path.exists(TEMP_JINJA_FILE): os.remove(TEMP_JINJA_FILE) @@ -1011,6 +991,8 @@ def main(options): # adding command-line options + parser.add_argument("-src", "--source", required=True, type=str, help="The source directory where the original files are located") + parser.add_argument("-dst", "--destination", required=True, type=str, help="The destination directory where the processed files should be written to") parser.add_argument("-st", "--split_on_titles", action="store_true", help="Splits the text based on titles and subtitles instead of paragraphs with a minimum length.") parser.add_argument("-pl", "--min_paragraph_length", type=int, default=160, help="Minimum length of a paragraph, only works if split on titles is disabled (default: 160)") parser.add_argument("-td", "--max_title_depth", type=int, default=4, help="Maximum depth of titles that divide the source text into sections, only works if split on titles is enabled (default: 4)") @@ -1019,7 +1001,9 @@ def main(options): args = parser.parse_args() - options_dict = {SPLIT_ON_TITLES: args.split_on_titles, + options_dict = {SOURCE_DIRECTORY: args.source, + DESTINATION_DIRECTORY: args.destination, + SPLIT_ON_TITLES: args.split_on_titles, SPLIT_ON_PARAGRAPHS: not args.split_on_titles, MIN_PARAGRAPH_LENGTH: args.min_paragraph_length, MAX_TITLE_DEPTH: args.max_title_depth, From a6d99d9c724e453c9adb4262b747ddbf01ab711e Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 26 Aug 2024 12:27:20 +0200 Subject: [PATCH 098/145] cleanup --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 245c5d68f514..bfc152cee60d 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -922,13 +922,6 @@ def main(options): for file in files: filenames[file] = os.path.join(source_directory, file) - # # Temporary variables to test with just one singular file - # filenames_generic = {} - # filenames_linux = {} - # filenames_generic["account.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/account.md" - # filenames_generic["example_text_1.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/scripts/HPC_chatbot_preprocessor/tests/example_files/example_text_1.md" - # filenames_linux["common_pitfalls.md"] = "C:/HPC_werk/Documentation/local/vsc_user_docs/mkdocs/docs/HPC/linux-tutorial/common_pitfalls.md" - # for loops over all files for filename in filenames.keys(): ################### define/reset loop specific variables ################### From 2be834f19ce8729a0d28ef4b89ddeea59b5e398e Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 26 Aug 2024 13:14:22 +0200 Subject: [PATCH 099/145] cleanup --- .../HPC_chatbot_preprocessor/chatbot_parser.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index bfc152cee60d..26cf15b79a23 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -86,6 +86,9 @@ # link indicators LINK_MARKER = r'§link§link§' +# HTML tags +HTML_TAGS = ["pre", "b", "code", "sub", "br", "center", "p", "div", "u", "p", "i", "tt", "a", "t", "span"] # make sure these are always lowercase + # regex patterns IF_MANGLED_PATTERNS = { IF: r'({' + IF_MANGLED_PART + r'%[-\s]*if\s+OS\s*[!=]=\s*.+?[-\s]*%' + IF_MANGLED_PART + '})', @@ -164,9 +167,8 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title): match = re.findall(r'<(.*?)>', curr_line) if match: for i, content in enumerate(match): - syntax_words = ["pre", "b", "code", "sub", "br", "center", "p", "div", "u", "p", "i", "tt", "a", "t", "span"] # make sure these are always lowercase - syntax_words_variations = list(chain.from_iterable([[element, element + "/", "/" + element] for element in syntax_words])) - syntax_words_style = [element + " style=.*" for element in syntax_words] + html_tags_variations = list(chain.from_iterable([[element, element + "/", "/" + element] for element in HTML_TAGS])) + html_tags_style = [element + " style=.*" for element in HTML_TAGS] # add references for every link of format if re.search(r'a href=.*', content): @@ -175,11 +177,11 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title): linklist.append(link) # drop the syntax words - elif content.lower() in syntax_words_variations: + elif content.lower() in html_tags_variations: curr_line = re.sub(f'<{content}>', "", curr_line) - # drop the version of the syntax_words followed by " style=" - elif any(re.match(pattern, content) for pattern in syntax_words_style): + # drop the version of the HTML_TAGS followed by " style=" + elif any(re.match(pattern, content) for pattern in html_tags_style): curr_line = re.sub(r'<.*?>', "", curr_line) # drop markdown comments @@ -983,7 +985,6 @@ def main(options): parser = argparse.ArgumentParser(description="Preprocessing script for the chatbot\n") # adding command-line options - parser.add_argument("-src", "--source", required=True, type=str, help="The source directory where the original files are located") parser.add_argument("-dst", "--destination", required=True, type=str, help="The destination directory where the processed files should be written to") parser.add_argument("-st", "--split_on_titles", action="store_true", help="Splits the text based on titles and subtitles instead of paragraphs with a minimum length.") From 532543a18785e966a76a830c04055ec46425d20e Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 26 Aug 2024 13:17:07 +0200 Subject: [PATCH 100/145] cleanup --- scripts/HPC_chatbot_preprocessor/README.md | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md index 82aaa9b7e3c0..bc2922aaf5a8 100644 --- a/scripts/HPC_chatbot_preprocessor/README.md +++ b/scripts/HPC_chatbot_preprocessor/README.md @@ -16,35 +16,37 @@ This command has the following possible options: chatbot_parser.py [-h] -src SOURCE -dst DESTINATION [-st] [-pl MIN_PARAGRAPH_LENGTH] [-td MAX_TITLE_DEPTH] [-l] [-dd] ``` -### `h`/`help` +### Options + +#### `h`/`help` Display the help message -### `src`/`source` +#### `src`/`source` This is a required option that specifies the source directory of the input files for the script. This location is also used to look for jinja templates when using jinja to parse the source files (such as the `macros` directory within `vsc_user_docs/mkdocs/docs/HPC`). -### `dst`/`destination` +#### `dst`/`destination` This is a required option that specifies where the output of the script should be written. The script also generates extra intermediate subdirectories, so subdirectories with the following names shouldn't be present in the destination directory: `parsed_mds`, `copies` and `if_mangled_files`. If any of these pose a problem, the name of the intermediate subdirectory used for the script can be changed in the macros at the top of the script. -### `st`/`split_on_titles` +#### `st`/`split_on_titles` Including this option will split the source files based on the titles and subtitles in the markdown text. Not including this option will split the text on paragraphs with a certain minimum length. -### `pl`/`min_paragraph_length` +#### `pl`/`min_paragraph_length` This option allows the user to configure the minimum length a paragraph must be. Some deviations from this minimum length are possible (for example at the end of a file). The default value for this minimum paragraph length is 160 characters. This options only works if `split_on_titles` is not enabled. -### `td`/`max_title_depth` +#### `td`/`max_title_depth` This option allows the user to configure the maximum "title depth" (the amount of `#` in front) to be used as borders between sections if `split_on_titles` is enabled. The default value is 4. -### `l`/`links` +#### `l`/`links` Some of the sourcefiles might contain links. Including this option will retain the links in the plaintext. If this option is not included, the links will be dropped from the plaintext. -### `dd`/`deep_directories` +#### `dd`/`deep_directories` Including this option will make the script generate a "deep directory" where every title encountered will be made into a subdirectory of its parent title (So for example a title with three `#`s will be made a subdirectory of the most recent title with two `#`s). This option only works if `split_on_titles` is enabled. From 107464e57b3581d96130eeea63f7d3390025125e Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 26 Aug 2024 13:42:40 +0200 Subject: [PATCH 101/145] relocate test files --- .../if_mangler_test_files/if_mangler_1_input.md | 0 .../if_mangler_test_files/if_mangler_1_output.md | 0 .../if_mangler_test_files/if_mangler_2_input.md | 0 .../if_mangler_test_files/if_mangler_2_output.md | 0 .../if_mangler_test_files/if_mangler_3_input.md | 0 .../if_mangler_test_files/if_mangler_3_output.md | 0 .../if_mangler_test_files/if_mangler_4_input.md | 0 .../if_mangler_test_files/if_mangler_4_output.md | 0 .../if_mangler_test_files/if_mangler_5_input.md | 0 .../if_mangler_test_files/if_mangler_5_output.md | 0 .../if_mangler_test_files/if_mangler_6_input.md | 0 .../if_mangler_test_files/if_mangler_6_output.md | 0 .../if_mangler_test_files/if_mangler_7_input.md | 0 .../if_mangler_test_files/if_mangler_7_output.md | 0 .../if_mangler_test_files/if_mangler_input.md | 0 .../if_mangler_test_files/if_mangler_output.md | 0 .../tests/test_files/test_paragraph_split_1.md | 0 .../example_text_1.md => test_files/test_title_split_1.md} | 0 18 files changed, 0 insertions(+), 0 deletions(-) rename scripts/HPC_chatbot_preprocessor/tests/{example_files => test_files}/if_mangler_test_files/if_mangler_1_input.md (100%) rename scripts/HPC_chatbot_preprocessor/tests/{example_files => test_files}/if_mangler_test_files/if_mangler_1_output.md (100%) rename scripts/HPC_chatbot_preprocessor/tests/{example_files => test_files}/if_mangler_test_files/if_mangler_2_input.md (100%) rename scripts/HPC_chatbot_preprocessor/tests/{example_files => test_files}/if_mangler_test_files/if_mangler_2_output.md (100%) rename scripts/HPC_chatbot_preprocessor/tests/{example_files => test_files}/if_mangler_test_files/if_mangler_3_input.md (100%) rename scripts/HPC_chatbot_preprocessor/tests/{example_files => test_files}/if_mangler_test_files/if_mangler_3_output.md (100%) rename scripts/HPC_chatbot_preprocessor/tests/{example_files => test_files}/if_mangler_test_files/if_mangler_4_input.md (100%) rename scripts/HPC_chatbot_preprocessor/tests/{example_files => test_files}/if_mangler_test_files/if_mangler_4_output.md (100%) rename scripts/HPC_chatbot_preprocessor/tests/{example_files => test_files}/if_mangler_test_files/if_mangler_5_input.md (100%) rename scripts/HPC_chatbot_preprocessor/tests/{example_files => test_files}/if_mangler_test_files/if_mangler_5_output.md (100%) rename scripts/HPC_chatbot_preprocessor/tests/{example_files => test_files}/if_mangler_test_files/if_mangler_6_input.md (100%) rename scripts/HPC_chatbot_preprocessor/tests/{example_files => test_files}/if_mangler_test_files/if_mangler_6_output.md (100%) rename scripts/HPC_chatbot_preprocessor/tests/{example_files => test_files}/if_mangler_test_files/if_mangler_7_input.md (100%) rename scripts/HPC_chatbot_preprocessor/tests/{example_files => test_files}/if_mangler_test_files/if_mangler_7_output.md (100%) rename scripts/HPC_chatbot_preprocessor/tests/{example_files => test_files}/if_mangler_test_files/if_mangler_input.md (100%) rename scripts/HPC_chatbot_preprocessor/tests/{example_files => test_files}/if_mangler_test_files/if_mangler_output.md (100%) create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/test_paragraph_split_1.md rename scripts/HPC_chatbot_preprocessor/tests/{example_files/example_text_1.md => test_files/test_title_split_1.md} (100%) diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_1_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_input.md similarity index 100% rename from scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_1_input.md rename to scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_input.md diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_1_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_output.md similarity index 100% rename from scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_1_output.md rename to scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_output.md diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_2_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_input.md similarity index 100% rename from scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_2_input.md rename to scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_input.md diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_2_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_output.md similarity index 100% rename from scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_2_output.md rename to scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_output.md diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_3_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_input.md similarity index 100% rename from scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_3_input.md rename to scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_input.md diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_3_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_output.md similarity index 100% rename from scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_3_output.md rename to scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_output.md diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_4_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_input.md similarity index 100% rename from scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_4_input.md rename to scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_input.md diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_4_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_output.md similarity index 100% rename from scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_4_output.md rename to scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_output.md diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_5_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_input.md similarity index 100% rename from scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_5_input.md rename to scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_input.md diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_5_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_output.md similarity index 100% rename from scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_5_output.md rename to scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_output.md diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_6_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_input.md similarity index 100% rename from scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_6_input.md rename to scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_input.md diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_6_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_output.md similarity index 100% rename from scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_6_output.md rename to scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_output.md diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_7_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_input.md similarity index 100% rename from scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_7_input.md rename to scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_input.md diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_7_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_output.md similarity index 100% rename from scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_7_output.md rename to scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_output.md diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_input.md similarity index 100% rename from scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_input.md rename to scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_input.md diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_output.md similarity index 100% rename from scripts/HPC_chatbot_preprocessor/tests/example_files/if_mangler_test_files/if_mangler_output.md rename to scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_output.md diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/test_paragraph_split_1.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/test_paragraph_split_1.md new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/scripts/HPC_chatbot_preprocessor/tests/example_files/example_text_1.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/test_title_split_1.md similarity index 100% rename from scripts/HPC_chatbot_preprocessor/tests/example_files/example_text_1.md rename to scripts/HPC_chatbot_preprocessor/tests/test_files/test_title_split_1.md From dd64381efc3b7156c2905f69aec54572b2ca2c53 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 26 Aug 2024 13:47:22 +0200 Subject: [PATCH 102/145] update arguments of if mangler --- scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py b/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py index 17053fe705c1..4d0dd8761034 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py +++ b/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py @@ -18,10 +18,10 @@ def test_if_mangler(input_file, output_file): os.makedirs(os.path.join("if_mangled_files"), exist_ok=True) # make filepaths - input_file_path = os.path.join("tests", "example_files", "if_mangler_test_files", input_file) - expected_output_file_path = os.path.join("tests", "example_files", "if_mangler_test_files", output_file) + input_file_path = os.path.join("tests", "test_files", "if_mangler_test_files", input_file) + expected_output_file_path = os.path.join("tests", "test_files", "if_mangler_test_files", output_file) actual_output_file_path = os.path.join("if_mangled_files", input_file) - mangle_ifs(input_file_path, input_file) + mangle_ifs(input_file_path, input_file, {"DESTINATION_DIRECTORY": '.'}) # check every line with open(expected_output_file_path, "r") as expected_read_file: From ef3fd584a21e2e417363a5f083bcf94261739ceb Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 26 Aug 2024 13:54:19 +0200 Subject: [PATCH 103/145] relocate full test files --- .../test_paragraph_split_1.md | 0 .../test_title_split_1.md | 0 .../if_mangler_1_input.md | 4 -- .../if_mangler_1_output.md | 4 -- .../if_mangler_2_input.md | 7 --- .../if_mangler_2_output.md | 7 --- .../if_mangler_3_input.md | 6 -- .../if_mangler_3_output.md | 6 -- .../if_mangler_4_input.md | 4 -- .../if_mangler_4_output.md | 4 -- .../if_mangler_5_input.md | 11 ---- .../if_mangler_5_output.md | 11 ---- .../if_mangler_6_input.md | 8 --- .../if_mangler_6_output.md | 8 --- .../if_mangler_7_input.md | 9 --- .../if_mangler_7_output.md | 9 --- .../if_mangler_test_files/if_mangler_input.md | 55 ------------------- .../if_mangler_output.md | 55 ------------------- 18 files changed, 208 deletions(-) rename scripts/HPC_chatbot_preprocessor/tests/test_files/{ => full_test_paragraph_split}/test_paragraph_split_1.md (100%) rename scripts/HPC_chatbot_preprocessor/tests/test_files/{ => full_test_title_split}/test_title_split_1.md (100%) delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_input.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_output.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_input.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_output.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_input.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_output.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_input.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_output.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_input.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_output.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_input.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_output.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_input.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_output.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_input.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_output.md diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/test_paragraph_split_1.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1.md similarity index 100% rename from scripts/HPC_chatbot_preprocessor/tests/test_files/test_paragraph_split_1.md rename to scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1.md diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/test_title_split_1.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_title_split/test_title_split_1.md similarity index 100% rename from scripts/HPC_chatbot_preprocessor/tests/test_files/test_title_split_1.md rename to scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_title_split/test_title_split_1.md diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_input.md deleted file mode 100644 index 6a74b3c0181b..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_input.md +++ /dev/null @@ -1,4 +0,0 @@ -test1: OS_IF -{% if OS == windows %} -test1 -{% endif %} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_output.md deleted file mode 100644 index 2f9cdc38294b..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_output.md +++ /dev/null @@ -1,4 +0,0 @@ -test1: OS_IF -{-if-% if OS == windows %-if-} -test1 -{-if-% endif %-if-} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_input.md deleted file mode 100644 index 360a4a59ba38..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_input.md +++ /dev/null @@ -1,7 +0,0 @@ -test2: OS_IF in NON_OS_IF -{% if site == Gent %} -test2 -{% if OS == windows %} -test2 -{% endif %} -{% endif %} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_output.md deleted file mode 100644 index 798dcf6db24a..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_output.md +++ /dev/null @@ -1,7 +0,0 @@ -test2: OS_IF in NON_OS_IF -{% if site == Gent %} -test2 -{-if-% if OS == windows %-if-} -test2 -{-if-% endif %-if-} -{% endif %} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_input.md deleted file mode 100644 index d93125a59716..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_input.md +++ /dev/null @@ -1,6 +0,0 @@ -test3: OS_IF with else -{% if OS == linux %} -test3 -{% else %} -test3 -{% endif %} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_output.md deleted file mode 100644 index 02141961338d..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_output.md +++ /dev/null @@ -1,6 +0,0 @@ -test3: OS_IF with else -{-if-% if OS == linux %-if-} -test3 -{-if-% else %-if-} -test3 -{-if-% endif %-if-} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_input.md deleted file mode 100644 index cc15fae1df11..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_input.md +++ /dev/null @@ -1,4 +0,0 @@ -test4: OS_IF with wrong syntax -{ if OS == macos } -test4 -{ endif } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_output.md deleted file mode 100644 index cc15fae1df11..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_output.md +++ /dev/null @@ -1,4 +0,0 @@ -test4: OS_IF with wrong syntax -{ if OS == macos } -test4 -{ endif } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_input.md deleted file mode 100644 index bdb288474e24..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_input.md +++ /dev/null @@ -1,11 +0,0 @@ -test5: OS_IF in OS_IF -{% if OS == windows %} -test5 -{% else %} -{% if OS == linux %} -test5 -{% else %} -test5 -{% endif %} -test5 -{% endif %} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_output.md deleted file mode 100644 index 10443eb67a4f..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_output.md +++ /dev/null @@ -1,11 +0,0 @@ -test5: OS_IF in OS_IF -{-if-% if OS == windows %-if-} -test5 -{-if-% else %-if-} -{-if-% if OS == linux %-if-} -test5 -{-if-% else %-if-} -test5 -{-if-% endif %-if-} -test5 -{-if-% endif %-if-} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_input.md deleted file mode 100644 index 0731ee3588ce..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_input.md +++ /dev/null @@ -1,8 +0,0 @@ -test6: NON_OS_IF in OS_IF -{% if OS == macos %} -test6 -{% if site == Gent %} -test6 -{% endif %} -test6 -{% endif %} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_output.md deleted file mode 100644 index cd37117cb004..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_output.md +++ /dev/null @@ -1,8 +0,0 @@ -test6: NON_OS_IF in OS_IF -{-if-% if OS == macos %-if-} -test6 -{% if site == Gent %} -test6 -{% endif %} -test6 -{-if-% endif %-if-} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_input.md deleted file mode 100644 index 6a72a338527a..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_input.md +++ /dev/null @@ -1,9 +0,0 @@ -test7: weird spacing and dashes - {%if OS == windows %} - test7 -{%- else%} - test7 - {% if OS == linux%} -test7 - {%-endif %} -{%endif%} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_output.md deleted file mode 100644 index dfe342ebfb14..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_output.md +++ /dev/null @@ -1,9 +0,0 @@ -test7: weird spacing and dashes - {-if-%if OS == windows %-if-} - test7 -{-if-%- else%-if-} - test7 - {-if-% if OS == linux%-if-} -test7 - {-if-%-endif %-if-} -{-if-%endif%-if-} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_input.md deleted file mode 100644 index fb8c1f8b5396..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_input.md +++ /dev/null @@ -1,55 +0,0 @@ -test1: OS_IF -{% if OS == windows %} -test1 -{% endif %} - -test2: OS_IF in NON_OS_IF -{% if site == Gent %} -test2 -{% if OS == windows %} -test2 -{% endif %} -{% endif %} - -test3: OS_IF with else -{% if OS == linux %} -test3 -{% else %} -test3 -{% endif %} - -test4: OS_IF with wrong syntax -{ if OS == macos } -test4 -{ endif } - -test5: OS_IF in OS_IF -{% if OS == windows %} -test5 -{% else %} -{% if OS == linux %} -test5 -{% else %} -test5 -{% endif %} -test5 -{% endif %} - -test6: NON_OS_IF in OS_IF -{% if OS == macos %} -test6 -{% if site == Gent %} -test6 -{% endif %} -test6 -{% endif %} - -test7: weird spacing and dashes - {%if OS == windows %} - test7 -{%- else%} - test7 - {% if OS == linux%} -test7 - {%-endif %} -{%endif%} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_output.md deleted file mode 100644 index 796e94348fa2..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_output.md +++ /dev/null @@ -1,55 +0,0 @@ -test1: OS_IF -{-if-% if OS == windows %-if-} -test1 -{-if-% endif %-if-} - -test2: OS_IF in NON_OS_IF -{% if site == Gent %} -test2 -{-if-% if OS == windows %-if-} -test2 -{-if-% endif %-if-} -{% endif %} - -test3: OS_IF with else -{-if-% if OS == linux %-if-} -test3 -{-if-% else %-if-} -test3 -{-if-% endif %-if-} - -test4: OS_IF with wrong syntax -{ if OS == macos } -test4 -{ endif } - -test5: OS_IF in OS_IF -{-if-% if OS == windows %-if-} -test5 -{-if-% else %-if-} -{-if-% if OS == linux %-if-} -test5 -{-if-% else %-if-} -test5 -{-if-% endif %-if-} -test5 -{-if-% endif %-if-} - -test6: NON_OS_IF in OS_IF -{-if-% if OS == macos %-if-} -test6 -{% if site == Gent %} -test6 -{% endif %} -test6 -{-if-% endif %-if-} - -test7: weird spacing and dashes - {-if-%if OS == windows %-if-} - test7 -{-if-%- else%-if-} - test7 - {-if-% if OS == linux%-if-} -test7 - {-if-%-endif %-if-} -{-if-%endif%-if-} \ No newline at end of file From 4d7db8f889decbcf157ef08c55912c3e269ef382 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 26 Aug 2024 13:59:49 +0200 Subject: [PATCH 104/145] Revert "update arguments of if mangler" This reverts commit dd64381efc3b7156c2905f69aec54572b2ca2c53. --- scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py b/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py index 4d0dd8761034..17053fe705c1 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py +++ b/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py @@ -18,10 +18,10 @@ def test_if_mangler(input_file, output_file): os.makedirs(os.path.join("if_mangled_files"), exist_ok=True) # make filepaths - input_file_path = os.path.join("tests", "test_files", "if_mangler_test_files", input_file) - expected_output_file_path = os.path.join("tests", "test_files", "if_mangler_test_files", output_file) + input_file_path = os.path.join("tests", "example_files", "if_mangler_test_files", input_file) + expected_output_file_path = os.path.join("tests", "example_files", "if_mangler_test_files", output_file) actual_output_file_path = os.path.join("if_mangled_files", input_file) - mangle_ifs(input_file_path, input_file, {"DESTINATION_DIRECTORY": '.'}) + mangle_ifs(input_file_path, input_file) # check every line with open(expected_output_file_path, "r") as expected_read_file: From df9bac5031138324895fa70b6d16d82c8fa2e164 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 26 Aug 2024 14:02:12 +0200 Subject: [PATCH 105/145] Revert "relocate full test files" This reverts commit ef3fd584a21e2e417363a5f083bcf94261739ceb. --- .../if_mangler_1_input.md | 4 ++ .../if_mangler_1_output.md | 4 ++ .../if_mangler_2_input.md | 7 +++ .../if_mangler_2_output.md | 7 +++ .../if_mangler_3_input.md | 6 ++ .../if_mangler_3_output.md | 6 ++ .../if_mangler_4_input.md | 4 ++ .../if_mangler_4_output.md | 4 ++ .../if_mangler_5_input.md | 11 ++++ .../if_mangler_5_output.md | 11 ++++ .../if_mangler_6_input.md | 8 +++ .../if_mangler_6_output.md | 8 +++ .../if_mangler_7_input.md | 9 +++ .../if_mangler_7_output.md | 9 +++ .../if_mangler_test_files/if_mangler_input.md | 55 +++++++++++++++++++ .../if_mangler_output.md | 55 +++++++++++++++++++ .../test_paragraph_split_1.md | 0 .../test_title_split_1.md | 0 18 files changed, 208 insertions(+) create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_input.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_output.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_input.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_output.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_input.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_output.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_input.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_output.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_input.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_output.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_input.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_output.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_input.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_output.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_input.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_output.md rename scripts/HPC_chatbot_preprocessor/tests/test_files/{full_test_paragraph_split => }/test_paragraph_split_1.md (100%) rename scripts/HPC_chatbot_preprocessor/tests/test_files/{full_test_title_split => }/test_title_split_1.md (100%) diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_input.md new file mode 100644 index 000000000000..6a74b3c0181b --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_input.md @@ -0,0 +1,4 @@ +test1: OS_IF +{% if OS == windows %} +test1 +{% endif %} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_output.md new file mode 100644 index 000000000000..2f9cdc38294b --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_output.md @@ -0,0 +1,4 @@ +test1: OS_IF +{-if-% if OS == windows %-if-} +test1 +{-if-% endif %-if-} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_input.md new file mode 100644 index 000000000000..360a4a59ba38 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_input.md @@ -0,0 +1,7 @@ +test2: OS_IF in NON_OS_IF +{% if site == Gent %} +test2 +{% if OS == windows %} +test2 +{% endif %} +{% endif %} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_output.md new file mode 100644 index 000000000000..798dcf6db24a --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_output.md @@ -0,0 +1,7 @@ +test2: OS_IF in NON_OS_IF +{% if site == Gent %} +test2 +{-if-% if OS == windows %-if-} +test2 +{-if-% endif %-if-} +{% endif %} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_input.md new file mode 100644 index 000000000000..d93125a59716 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_input.md @@ -0,0 +1,6 @@ +test3: OS_IF with else +{% if OS == linux %} +test3 +{% else %} +test3 +{% endif %} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_output.md new file mode 100644 index 000000000000..02141961338d --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_output.md @@ -0,0 +1,6 @@ +test3: OS_IF with else +{-if-% if OS == linux %-if-} +test3 +{-if-% else %-if-} +test3 +{-if-% endif %-if-} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_input.md new file mode 100644 index 000000000000..cc15fae1df11 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_input.md @@ -0,0 +1,4 @@ +test4: OS_IF with wrong syntax +{ if OS == macos } +test4 +{ endif } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_output.md new file mode 100644 index 000000000000..cc15fae1df11 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_output.md @@ -0,0 +1,4 @@ +test4: OS_IF with wrong syntax +{ if OS == macos } +test4 +{ endif } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_input.md new file mode 100644 index 000000000000..bdb288474e24 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_input.md @@ -0,0 +1,11 @@ +test5: OS_IF in OS_IF +{% if OS == windows %} +test5 +{% else %} +{% if OS == linux %} +test5 +{% else %} +test5 +{% endif %} +test5 +{% endif %} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_output.md new file mode 100644 index 000000000000..10443eb67a4f --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_output.md @@ -0,0 +1,11 @@ +test5: OS_IF in OS_IF +{-if-% if OS == windows %-if-} +test5 +{-if-% else %-if-} +{-if-% if OS == linux %-if-} +test5 +{-if-% else %-if-} +test5 +{-if-% endif %-if-} +test5 +{-if-% endif %-if-} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_input.md new file mode 100644 index 000000000000..0731ee3588ce --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_input.md @@ -0,0 +1,8 @@ +test6: NON_OS_IF in OS_IF +{% if OS == macos %} +test6 +{% if site == Gent %} +test6 +{% endif %} +test6 +{% endif %} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_output.md new file mode 100644 index 000000000000..cd37117cb004 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_output.md @@ -0,0 +1,8 @@ +test6: NON_OS_IF in OS_IF +{-if-% if OS == macos %-if-} +test6 +{% if site == Gent %} +test6 +{% endif %} +test6 +{-if-% endif %-if-} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_input.md new file mode 100644 index 000000000000..6a72a338527a --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_input.md @@ -0,0 +1,9 @@ +test7: weird spacing and dashes + {%if OS == windows %} + test7 +{%- else%} + test7 + {% if OS == linux%} +test7 + {%-endif %} +{%endif%} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_output.md new file mode 100644 index 000000000000..dfe342ebfb14 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_output.md @@ -0,0 +1,9 @@ +test7: weird spacing and dashes + {-if-%if OS == windows %-if-} + test7 +{-if-%- else%-if-} + test7 + {-if-% if OS == linux%-if-} +test7 + {-if-%-endif %-if-} +{-if-%endif%-if-} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_input.md new file mode 100644 index 000000000000..fb8c1f8b5396 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_input.md @@ -0,0 +1,55 @@ +test1: OS_IF +{% if OS == windows %} +test1 +{% endif %} + +test2: OS_IF in NON_OS_IF +{% if site == Gent %} +test2 +{% if OS == windows %} +test2 +{% endif %} +{% endif %} + +test3: OS_IF with else +{% if OS == linux %} +test3 +{% else %} +test3 +{% endif %} + +test4: OS_IF with wrong syntax +{ if OS == macos } +test4 +{ endif } + +test5: OS_IF in OS_IF +{% if OS == windows %} +test5 +{% else %} +{% if OS == linux %} +test5 +{% else %} +test5 +{% endif %} +test5 +{% endif %} + +test6: NON_OS_IF in OS_IF +{% if OS == macos %} +test6 +{% if site == Gent %} +test6 +{% endif %} +test6 +{% endif %} + +test7: weird spacing and dashes + {%if OS == windows %} + test7 +{%- else%} + test7 + {% if OS == linux%} +test7 + {%-endif %} +{%endif%} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_output.md new file mode 100644 index 000000000000..796e94348fa2 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_output.md @@ -0,0 +1,55 @@ +test1: OS_IF +{-if-% if OS == windows %-if-} +test1 +{-if-% endif %-if-} + +test2: OS_IF in NON_OS_IF +{% if site == Gent %} +test2 +{-if-% if OS == windows %-if-} +test2 +{-if-% endif %-if-} +{% endif %} + +test3: OS_IF with else +{-if-% if OS == linux %-if-} +test3 +{-if-% else %-if-} +test3 +{-if-% endif %-if-} + +test4: OS_IF with wrong syntax +{ if OS == macos } +test4 +{ endif } + +test5: OS_IF in OS_IF +{-if-% if OS == windows %-if-} +test5 +{-if-% else %-if-} +{-if-% if OS == linux %-if-} +test5 +{-if-% else %-if-} +test5 +{-if-% endif %-if-} +test5 +{-if-% endif %-if-} + +test6: NON_OS_IF in OS_IF +{-if-% if OS == macos %-if-} +test6 +{% if site == Gent %} +test6 +{% endif %} +test6 +{-if-% endif %-if-} + +test7: weird spacing and dashes + {-if-%if OS == windows %-if-} + test7 +{-if-%- else%-if-} + test7 + {-if-% if OS == linux%-if-} +test7 + {-if-%-endif %-if-} +{-if-%endif%-if-} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/test_paragraph_split_1.md similarity index 100% rename from scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1.md rename to scripts/HPC_chatbot_preprocessor/tests/test_files/test_paragraph_split_1.md diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_title_split/test_title_split_1.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/test_title_split_1.md similarity index 100% rename from scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_title_split/test_title_split_1.md rename to scripts/HPC_chatbot_preprocessor/tests/test_files/test_title_split_1.md From 631d9e9c26945359eb25ce08a37cd424061c2407 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 26 Aug 2024 14:08:22 +0200 Subject: [PATCH 106/145] update test to adapt to new arguments in if mangler --- scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py b/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py index 17053fe705c1..4d0dd8761034 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py +++ b/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py @@ -18,10 +18,10 @@ def test_if_mangler(input_file, output_file): os.makedirs(os.path.join("if_mangled_files"), exist_ok=True) # make filepaths - input_file_path = os.path.join("tests", "example_files", "if_mangler_test_files", input_file) - expected_output_file_path = os.path.join("tests", "example_files", "if_mangler_test_files", output_file) + input_file_path = os.path.join("tests", "test_files", "if_mangler_test_files", input_file) + expected_output_file_path = os.path.join("tests", "test_files", "if_mangler_test_files", output_file) actual_output_file_path = os.path.join("if_mangled_files", input_file) - mangle_ifs(input_file_path, input_file) + mangle_ifs(input_file_path, input_file, {"DESTINATION_DIRECTORY": '.'}) # check every line with open(expected_output_file_path, "r") as expected_read_file: From c6e600dcbdf9885b41cd8cbd07917a92d2b423a6 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 26 Aug 2024 14:15:25 +0200 Subject: [PATCH 107/145] relocated full test files --- .../{ => full_test_paragraph_split}/test_paragraph_split_1.md | 0 .../test_files/{ => full_test_title_split}/test_title_split_1.md | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename scripts/HPC_chatbot_preprocessor/tests/test_files/{ => full_test_paragraph_split}/test_paragraph_split_1.md (100%) rename scripts/HPC_chatbot_preprocessor/tests/test_files/{ => full_test_title_split}/test_title_split_1.md (100%) diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/test_paragraph_split_1.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1.md similarity index 100% rename from scripts/HPC_chatbot_preprocessor/tests/test_files/test_paragraph_split_1.md rename to scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1.md diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/test_title_split_1.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_title_split/test_title_split_1.md similarity index 100% rename from scripts/HPC_chatbot_preprocessor/tests/test_files/test_title_split_1.md rename to scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_title_split/test_title_split_1.md From d1c6194e8aa75301f7e3b1a2396eb13538de063d Mon Sep 17 00:00:00 2001 From: EwDa291 <100782488+EwDa291@users.noreply.github.com> Date: Mon, 26 Aug 2024 14:17:42 +0200 Subject: [PATCH 108/145] Rename test_paragraph_split_1.md to test_paragraph_split_1_input.md --- .../full_test_paragraph_split/test_paragraph_split_1.md | 0 .../full_test_paragraph_split/test_paragraph_split_1_input.md | 1 + 2 files changed, 1 insertion(+) delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1_input.md diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1.md deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1_input.md new file mode 100644 index 000000000000..d3f5a12faa99 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1_input.md @@ -0,0 +1 @@ + From 695ffd635a61ff44514232a1b37f55198100f0bd Mon Sep 17 00:00:00 2001 From: EwDa291 <100782488+EwDa291@users.noreply.github.com> Date: Mon, 26 Aug 2024 14:18:00 +0200 Subject: [PATCH 109/145] Rename test_title_split_1.md to test_title_split_1_input.md --- .../{test_title_split_1.md => test_title_split_1_input.md} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_title_split/{test_title_split_1.md => test_title_split_1_input.md} (98%) diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_title_split/test_title_split_1.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_title_split/test_title_split_1_input.md similarity index 98% rename from scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_title_split/test_title_split_1.md rename to scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_title_split/test_title_split_1_input.md index 9b810c3f41af..5065852e2a10 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_title_split/test_title_split_1.md +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_title_split/test_title_split_1_input.md @@ -28,4 +28,4 @@ blablabla generic with a [link](generic.md) ## Subtitle 5 generic -blablabla \ No newline at end of file +blablabla From af4832b5a8cd50bc790353a232fcca5e51e35e90 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 26 Aug 2024 15:08:37 +0200 Subject: [PATCH 110/145] smal fix --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 26cf15b79a23..db2c5e842570 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -825,7 +825,8 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or # add first subtitle in front of section again if len(jinja_text) != 0: - jinja_text = "#" * metadata[TITLE_DEPTH] + " " + metadata[SUBTITLE] + "\n" + jinja_text + if options[SPLIT_ON_TITLES]: + jinja_text = "#" * metadata[TITLE_DEPTH] + " " + metadata[SUBTITLE] + "\n" + jinja_text # re-adjust text to correct overcorrections jinja_text = re.sub('"' + OS + '"', OS, jinja_text) @@ -951,7 +952,7 @@ def main(options): # create directories for the source markdown file for directory in [root_dir_generic, os.path.join(PARSED_MDS, OS_SPECIFIC_DIR), root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos, os.path.join(root_dir_generic, curr_dirs[0]), os.path.join(root_dir_os_specific_linux, curr_dirs[0]), os.path.join(root_dir_os_specific_windows, curr_dirs[0]), os.path.join(root_dir_os_specific_macos, curr_dirs[0])]: - os.makedirs(os.path.join(options[DESTINATION_DIRECTORY], directory), exist_ok=True) + os.makedirs(directory, exist_ok=True) # process the jinja macros jinja_parser(filename, copy_file, options) From 8805c8c01b6efe814fae4bba5f4b05f9e9d8beb2 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 26 Aug 2024 15:11:52 +0200 Subject: [PATCH 111/145] test text for paragraph split --- .../test_paragraph_split_1_input.md | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1_input.md index d3f5a12faa99..44ac82c795d1 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1_input.md +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1_input.md @@ -1 +1,43 @@ +# Main title +This is the first paragraph of text. It is non-os-specific, however it does contain [a link](generic.md). +It also contains some `other` *Markdown* _syntax_ and an +```shell +example code block. +``` +This intro needs to be sufficiently long as will be explained in the following section (we want to hit the minimum +character limit for a section). + +## OS specific sections + +This is the second section, it is the start of some {% if OS == windows %} text specific to windows. +In this section it is probably no longer needed to test the Markdown syntax again, however I will make it somewhat longer +to make sure we get a long section that is over the minimum required length for the next newline character to be +classified as the end of this section. I am doing this because for the next sections I want to test whether they will be +grouped together if they are not long enough to reach the minimum paragraph length on their own. Also, before I forget, +let's add [a link](windows.md) in this section as well. + +### Windows specific section + +Like this. + +And this. + +And also this. + +These section should all be grouped together under the windows specific section of the output. The addition of this long +section at the end should make sure the combination of sections comes to an end here. +{% else %} +text specific to OSes that aren't windows. I feel like there is no need to make this section very long, however I will +still add [a link](linuxmacos.md). + +### Non Windows section + +Whereas the Windows version of this section had a lot of unnecessary newlines, this one will just be a short and concise +section that ends right here. +{% endif %} + +## Conclusion + +Coming up with what to write in test texts is very hard. I think I got the most important test cases in there, but I +might add to this if needed. From a265ffd87121d3d195670cc76f8d94b4b8bcc009 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Mon, 26 Aug 2024 16:52:20 +0200 Subject: [PATCH 112/145] start of a fix for double title problem, not done yet --- .../chatbot_parser.py | 22 +++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index db2c5e842570..c0b91319912a 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -401,11 +401,13 @@ def split_on_paragraphs(file, main_title, options, current_paragraph_number=-1, # metadata title metadata_title = main_title + # TODO: define metadata data if split occurs on paragraphs and last_title and title_level are known (placeholder in place right now) + if current_paragraph_number != -1: + last_title_level = 5 + last_dir = "PLACEHOLDER" + # list to keep track of most recent directories on each title level - if LINUX_TUTORIAL not in file: - curr_dirs = [main_title for _ in range(options[MAX_TITLE_DEPTH] + 1)] - else: - curr_dirs = [os.path.join(LINUX_TUTORIAL, main_title) for _ in range(options[MAX_TITLE_DEPTH] + 1)] + curr_dirs = [main_title for _ in range(options[MAX_TITLE_DEPTH] + 1)] with open(file, 'r') as readfile: @@ -885,7 +887,7 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or pass -def main(options): +def main(options, verbose=True): """ main function @@ -896,10 +898,11 @@ def main(options): MAX_TITLE_DEPTH: integer representing the maximum depth of a title for it to be used when splitting the text, INCLUDE_LINKS_IN_PLAINTEXT: boolean indicating whether links should be included in the plaintext, DEEP_DIRECTORIES: boolean indicating whether the generated directories should be nested by title-structure or not} + :param verbose: boolean indicating whether print statements from the main function should be print, only used when for testing :return: """ - if options[DEEP_DIRECTORIES]: + if options[DEEP_DIRECTORIES] and verbose: print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.") # remove the directories from a previous run of the parser if they weren't cleaned up properly for some reason @@ -915,7 +918,7 @@ def main(options): ################### define loop-invariant variables ################### - # constant that keeps track of the source directories + # constant that keeps track of the source directory source_directory = options[SOURCE_DIRECTORY] # list of all the filenames @@ -952,7 +955,7 @@ def main(options): # create directories for the source markdown file for directory in [root_dir_generic, os.path.join(PARSED_MDS, OS_SPECIFIC_DIR), root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos, os.path.join(root_dir_generic, curr_dirs[0]), os.path.join(root_dir_os_specific_linux, curr_dirs[0]), os.path.join(root_dir_os_specific_windows, curr_dirs[0]), os.path.join(root_dir_os_specific_macos, curr_dirs[0])]: - os.makedirs(directory, exist_ok=True) + os.makedirs(os.path.join(options[DESTINATION_DIRECTORY], directory), exist_ok=True) # process the jinja macros jinja_parser(filename, copy_file, options) @@ -978,7 +981,8 @@ def main(options): if os.path.exists(TEMP_JINJA_FILE): os.remove(TEMP_JINJA_FILE) - print("Parsing finished successfully") + if verbose: + print("Parsing finished successfully") ################### run the script ################### From 6c2a61c25215cf3d5c942c6c2de7804baf725584 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 27 Aug 2024 09:51:47 +0200 Subject: [PATCH 113/145] Fix for double title bug when splitting on paragraph --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index c0b91319912a..72ebbcee3ab3 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -825,10 +825,11 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or template = Template(text) jinja_text = template.render(OS=OS) - # add first subtitle in front of section again if len(jinja_text) != 0: - if options[SPLIT_ON_TITLES]: - jinja_text = "#" * metadata[TITLE_DEPTH] + " " + metadata[SUBTITLE] + "\n" + jinja_text + + # add first subtitle in front of section again + if options[SPLIT_ON_TITLES] or metadata[SUBTITLE].replace("-", " ") not in jinja_text[:len(metadata[SUBTITLE]) + 1]: + jinja_text = "#" * metadata[TITLE_DEPTH] + " " + metadata[SUBTITLE].replace("-", " ") + "\n" + jinja_text # re-adjust text to correct overcorrections jinja_text = re.sub('"' + OS + '"', OS, jinja_text) From ed088794e1b6ceb5b805c87a82bcd31df6931299 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 27 Aug 2024 10:05:56 +0200 Subject: [PATCH 114/145] Fix bug for empty linklist in metadata --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 72ebbcee3ab3..dfa2972b9fc6 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -741,6 +741,10 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe writefile.write(text) # write metadata + # check if links in metadata is not empty + if LINKS in metadata.keys() and len(metadata[LINKS].keys()) == 0: + del metadata[LINKS] + # add previous subtitle if title_order_number != 0: metadata[PREVIOUS_SUBTITLE] = title_order[title_order_number - 1] @@ -830,6 +834,8 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or # add first subtitle in front of section again if options[SPLIT_ON_TITLES] or metadata[SUBTITLE].replace("-", " ") not in jinja_text[:len(metadata[SUBTITLE]) + 1]: jinja_text = "#" * metadata[TITLE_DEPTH] + " " + metadata[SUBTITLE].replace("-", " ") + "\n" + jinja_text + else: + jinja_text = "#" * metadata[TITLE_DEPTH] + " " + jinja_text # re-adjust text to correct overcorrections jinja_text = re.sub('"' + OS + '"', OS, jinja_text) From 176af130ab9837f3d28511bcf113aeb38bed1c9b Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 27 Aug 2024 10:33:17 +0200 Subject: [PATCH 115/145] fix bug where too many directories were sometimes created --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index dfa2972b9fc6..b0bacbbca172 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -962,7 +962,7 @@ def main(options, verbose=True): # create directories for the source markdown file for directory in [root_dir_generic, os.path.join(PARSED_MDS, OS_SPECIFIC_DIR), root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos, os.path.join(root_dir_generic, curr_dirs[0]), os.path.join(root_dir_os_specific_linux, curr_dirs[0]), os.path.join(root_dir_os_specific_windows, curr_dirs[0]), os.path.join(root_dir_os_specific_macos, curr_dirs[0])]: - os.makedirs(os.path.join(options[DESTINATION_DIRECTORY], directory), exist_ok=True) + os.makedirs(directory, exist_ok=True) # process the jinja macros jinja_parser(filename, copy_file, options) From d4ceac8962b2bf61def602b5dad3ecfc7d12bc1e Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 27 Aug 2024 10:41:49 +0200 Subject: [PATCH 116/145] test of full script, test files not ready to be pushed yet --- .../tests/test_full_script.py | 66 +++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_full_script.py diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_full_script.py b/scripts/HPC_chatbot_preprocessor/tests/test_full_script.py new file mode 100644 index 000000000000..61a6f3f1bdf6 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_full_script.py @@ -0,0 +1,66 @@ +import pytest +import os +import shutil +from chatbot_parser import main + + +@pytest.mark.parametrize("input_directory,actual_output_directory,expected_output_directory, options", [ + ("tests/test_files/ftps", "tests/test_files/ftps/actual", + "tests/test_files/ftps/output", + {"SOURCE_DIRECTORY": "tests/test_files/ftps", + "DESTINATION_DIRECTORY": "tests/test_files/ftps/actual", + "SPLIT_ON_TITLES": False, + "SPLIT_ON_PARAGRAPHS": True, + "MIN_PARAGRAPH_LENGTH": 160, + "MAX_TITLE_DEPTH": 4, + "INCLUDE_LINKS_IN_PLAINTEXT": False, + "DEEP_DIRECTORIES": False} + ), + ("tests/test_files/ftts", "tests/test_files/ftts/actual", + "tests/test_files/ftts/output", + {"SOURCE_DIRECTORY": "tests/test_files/ftts", + "DESTINATION_DIRECTORY": "tests/test_files/ftts/actual", + "SPLIT_ON_TITLES": True, + "SPLIT_ON_PARAGRAPHS": False, + "MIN_PARAGRAPH_LENGTH": 160, + "MAX_TITLE_DEPTH": 4, + "INCLUDE_LINKS_IN_PLAINTEXT": False, + "DEEP_DIRECTORIES": True} + ) +]) +def test_full_script_generated_directories(input_directory, actual_output_directory, expected_output_directory, options): + # run the script + main(options, verbose=False) + + # Compare directories and files + for dirpath, dirnames, filenames in os.walk(expected_output_directory): + relative_path = os.path.relpath(dirpath, expected_output_directory) + actual_dir = os.path.join(actual_output_directory, relative_path) + + # Check if the directory exists + assert os.path.isdir(actual_dir), f"Directory '{actual_dir}' is missing." + + # Check for files + for filename in filenames: + ref_file = os.path.join(dirpath, filename) + gen_file = os.path.join(actual_dir, filename) + + # Check if the file exists + assert os.path.isfile(gen_file), f"File '{gen_file}' is missing." + + # Check file content + with open(ref_file, 'r') as ref_f, open(gen_file, 'r') as gen_f: + ref_content = ref_f.read().strip() + gen_content = gen_f.read().strip() + assert ref_content == gen_content, f"Content of file '{gen_file}' does not match." + + # check that not too many directories have been generated + for dirpath, dirnames, filenames in os.walk(actual_output_directory): + relative_path = os.path.relpath(dirpath, actual_output_directory) + expected_dir = os.path.join(expected_output_directory, relative_path) + + # Check if the directory exists + assert os.path.isdir(expected_dir), f"Directory '{relative_path}' was made, but shouldn't have been." + + # remove directory + shutil.rmtree(actual_output_directory, ignore_errors=True) From 815a863fc83f37bfa49976ca14ce23e63e3fafa4 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 27 Aug 2024 12:18:12 +0200 Subject: [PATCH 117/145] updated requirements.txt --- scripts/HPC_chatbot_preprocessor/requirements.txt | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/requirements.txt b/scripts/HPC_chatbot_preprocessor/requirements.txt index 907f08fda77f..4d27d4624600 100644 --- a/scripts/HPC_chatbot_preprocessor/requirements.txt +++ b/scripts/HPC_chatbot_preprocessor/requirements.txt @@ -1,7 +1,2 @@ -os -re -shutil -pypandoc -yaml -jinja2 -pathlib \ No newline at end of file +PyYAML==6.0.2 +Jinja2==3.1.4 \ No newline at end of file From d15469f420a86edeabda1472497c38206b53351d Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 27 Aug 2024 12:19:05 +0200 Subject: [PATCH 118/145] updated docstring in main function --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index b0bacbbca172..698278da90d2 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -899,7 +899,9 @@ def main(options, verbose=True): main function :param options: dictionary containing the options specified by the user to run the script: - {SPLIT_ON_TITLES: boolean indicating whether to split on titles, + {SOURCE_DIRECTORY: The source directory where the original files are located, + DESTINATION_DIRECTORY: The destination directory where the processed files should be written to, + SPLIT_ON_TITLES: boolean indicating whether to split on titles, SPLIT_ON_PARAGRAPHS: boolean indicating whether to split on paragraphs (should always be the opposite of SPLIT_ON_TITLES), MIN_PARAGRAPH_LENGTH: integer representing the minimum length of a paragraph, MAX_TITLE_DEPTH: integer representing the maximum depth of a title for it to be used when splitting the text, From daa6b36e07854f1b41b5907339bf283218d93a2c Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 27 Aug 2024 12:32:09 +0200 Subject: [PATCH 119/145] add support for comments for the bot to be included in the source files --- scripts/HPC_chatbot_preprocessor/README.md | 8 +++++++- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 11 +++++++++-- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md index bc2922aaf5a8..2cb30bdc985d 100644 --- a/scripts/HPC_chatbot_preprocessor/README.md +++ b/scripts/HPC_chatbot_preprocessor/README.md @@ -149,7 +149,7 @@ This will also result in the parser "forgetting" it opened an os-specific if-sta The input shouldn't contain any html syntax. While some failsafes are in place, the script isn't made with the use case of handling html syntax in mind. -### Markdown comments +### Comments Any comments within the markdown files (for example TODO's) should follow the following syntax: @@ -158,6 +158,12 @@ Any comments within the markdown files (for example TODO's) should follow the fo ``` and should be limited to one line. +Comments can be written in such a way that the script will keep them as input for the bot. To do that, the marker `INPUT_FOR_BOT` should be put in front of the content of the comment as such. + +``` + +``` + ### Long filepaths Due to the nature of this script, it can generate large directories with very long names if `deep_directories` is enabled. Depending on the operating system, this can cause problems with filepaths being to long, resulting in files not being able to open. A possible fix for this is to make sure the filepath to where the script is located is not too long. Another solution is lowering the `max_title_depth` or disabling `deep_directories`. diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 698278da90d2..338cdef32f54 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -83,7 +83,7 @@ ELSE = "else" ENDIF = "endif" -# link indicators +# link indicator LINK_MARKER = r'§link§link§' # HTML tags @@ -101,6 +101,9 @@ _PARAGRAPH_ = "_paragraph_" METADATA_EXTENSION = "_metadata" +# Marker for comments for the bot +INPUT_FOR_BOT = "INPUT_FOR_BOT" + ################### define functions ################### @@ -184,7 +187,11 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title): elif any(re.match(pattern, content) for pattern in html_tags_style): curr_line = re.sub(r'<.*?>', "", curr_line) - # drop markdown comments + # keep comments for bot + elif re.fullmatch(r'!--' + INPUT_FOR_BOT + r'.*?--', content): + curr_line = re.sub(r'', lambda m: m.group(1), curr_line) + + # drop comments elif re.fullmatch(r'!--.*?--', content): curr_line = re.sub(r'<.*?>', "", curr_line) From 4c19f442e2e4f6af1f2448e26cf0b1b29e4522ac Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 27 Aug 2024 13:30:01 +0200 Subject: [PATCH 120/145] changed the default for min paragraph length --- scripts/HPC_chatbot_preprocessor/README.md | 2 +- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md index 2cb30bdc985d..b3bce665973d 100644 --- a/scripts/HPC_chatbot_preprocessor/README.md +++ b/scripts/HPC_chatbot_preprocessor/README.md @@ -36,7 +36,7 @@ Including this option will split the source files based on the titles and subtit #### `pl`/`min_paragraph_length` -This option allows the user to configure the minimum length a paragraph must be. Some deviations from this minimum length are possible (for example at the end of a file). The default value for this minimum paragraph length is 160 characters. This options only works if `split_on_titles` is not enabled. +This option allows the user to configure the minimum length a paragraph must be. Some deviations from this minimum length are possible (for example at the end of a file). The default value for this minimum paragraph length is 683 characters. This options only works if `split_on_titles` is not enabled. #### `td`/`max_title_depth` diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 338cdef32f54..a041160c855d 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -1009,7 +1009,7 @@ def main(options, verbose=True): parser.add_argument("-src", "--source", required=True, type=str, help="The source directory where the original files are located") parser.add_argument("-dst", "--destination", required=True, type=str, help="The destination directory where the processed files should be written to") parser.add_argument("-st", "--split_on_titles", action="store_true", help="Splits the text based on titles and subtitles instead of paragraphs with a minimum length.") - parser.add_argument("-pl", "--min_paragraph_length", type=int, default=160, help="Minimum length of a paragraph, only works if split on titles is disabled (default: 160)") + parser.add_argument("-pl", "--min_paragraph_length", type=int, default=683, help="Minimum length in characters of a paragraph, only works if split on titles is disabled (default: 683)") parser.add_argument("-td", "--max_title_depth", type=int, default=4, help="Maximum depth of titles that divide the source text into sections, only works if split on titles is enabled (default: 4)") parser.add_argument("-l", "--links", action="store_true", help="Add links to the output texts") parser.add_argument("-dd", "--deep_directories", action="store_true", help="Generate a nested directory structure following the structure of the subtitles. Only works if split on titles is enabled") From 9a6ff5814422fc2ea0d4a128407302572d964105 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 27 Aug 2024 13:31:40 +0200 Subject: [PATCH 121/145] added test files for full script test --- .../generic/tps1/tps1_paragraph_1.txt | 6 ++ .../tps1/tps1_paragraph_1_metadata.json | 14 +++ .../generic/tps1/tps1_paragraph_3.txt | 3 + .../tps1/tps1_paragraph_3_metadata.json | 11 +++ .../linux/tps1/tps1_linux_paragraph_2.1.txt | 4 + .../tps1_linux_paragraph_2.1_metadata.json | 14 +++ .../linux/tps1/tps1_linux_paragraph_2.2.txt | 3 + .../tps1_linux_paragraph_2.2_metadata.json | 11 +++ .../macos/tps1/tps1_macos_paragraph_2.1.txt | 4 + .../tps1_macos_paragraph_2.1_metadata.json | 14 +++ .../macos/tps1/tps1_macos_paragraph_2.2.txt | 3 + .../tps1_macos_paragraph_2.2_metadata.json | 11 +++ .../tps1/tps1_windows_paragraph_2.1.txt | 7 ++ .../tps1_windows_paragraph_2.1_metadata.json | 14 +++ .../tps1/tps1_windows_paragraph_2.2.txt | 6 ++ .../tps1_windows_paragraph_2.2_metadata.json | 11 +++ .../tps1.md} | 86 +++++++++---------- .../tts1/Main-title/Subtitle-1/Subtitle-1.txt | 2 + .../Subtitle-1/Subtitle-1_metadata.json | 11 +++ .../Main-title/Subtitle-5-g/Subtitle-5-g.txt | 1 + .../Subtitle-5-g/Subtitle-5-g_metadata.json | 11 +++ .../Main-title/Subtitle-2-g/Subtitle-2-g.txt | 4 + .../Subtitle-2-g/Subtitle-2-g_metadata.json | 14 +++ .../Subtitle-4-l&m/Subtitle-4-l&m.txt | 3 + .../Subtitle-4-l&m_metadata.json | 14 +++ .../Main-title/Subtitle-2-g/Subtitle-2-g.txt | 4 + .../Subtitle-2-g/Subtitle-2-g_metadata.json | 14 +++ .../Subtitle-4-l&m/Subtitle-4-l&m.txt | 3 + .../Subtitle-4-l&m_metadata.json | 14 +++ .../Main-title/Subtitle-2-g/Subtitle-2-g.txt | 4 + .../Subtitle-2-g/Subtitle-2-g_metadata.json | 14 +++ .../Subtitle-3-w/Subtitle-3-w.txt | 3 + .../Subtitle-3-w/Subtitle-3-w_metadata.json | 14 +++ .../tts1.md} | 8 +- 34 files changed, 313 insertions(+), 47 deletions(-) create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3.txt create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2_metadata.json rename scripts/HPC_chatbot_preprocessor/tests/test_files/{full_test_paragraph_split/test_paragraph_split_1_input.md => ftps/tps1.md} (97%) create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g.txt create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w.txt create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w_metadata.json rename scripts/HPC_chatbot_preprocessor/tests/test_files/{full_test_title_split/test_title_split_1_input.md => ftts/tts1.md} (76%) diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1.txt new file mode 100644 index 000000000000..94270ff37e3d --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1.txt @@ -0,0 +1,6 @@ +Main title +This is the first paragraph of text. It is non-os-specific, however it does contain a link. +It also contains some other Markdown syntax and an +example code block. +This intro needs to be sufficiently long as will be explained in the following section (we want to hit the minimum +character limit for a section). diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1_metadata.json new file mode 100644 index 000000000000..19e44fad91d6 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "tps1", + "subtitle": "Main-title", + "title_depth": 1, + "directory": "tps1", + "links": { + "0": "https://docs.hpc.ugent.be/generic" + }, + "parent_title": "", + "previous_title": null, + "next_title": "tps1_paragraph_2", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/tps1/#main-title" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3.txt new file mode 100644 index 000000000000..58eedc06aa02 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3.txt @@ -0,0 +1,3 @@ +Conclusion +Coming up with what to write in test texts is very hard. I think I got the most important test cases in there, but I +might add to this if needed. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3_metadata.json new file mode 100644 index 000000000000..b4c98ff64658 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "tps1", + "subtitle": "Conclusion", + "title_depth": 2, + "directory": "tps1", + "parent_title": "", + "previous_title": "tps1_paragraph_2", + "next_title": null, + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/tps1/#conclusion" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1.txt new file mode 100644 index 000000000000..d0ee9ce82564 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1.txt @@ -0,0 +1,4 @@ +OS specific sections +This is the second section, it is the start of some +text specific to OSes that aren't windows. I feel like there is no need to make this section very long, however I will +still add a link. diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1_metadata.json new file mode 100644 index 000000000000..bac81ed87e3a --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "tps1", + "subtitle": "OS-specific-sections", + "title_depth": 2, + "directory": "tps1", + "parent_title": "Main-title", + "links": { + "0": "https://docs.hpc.ugent.be/linuxmacos" + }, + "previous_title": "tps1_paragraph_1", + "next_title": "tps1_linux_paragraph_2.2", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/tps1/#os-specific-sections" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2.txt new file mode 100644 index 000000000000..1a3867e69fa9 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2.txt @@ -0,0 +1,3 @@ +Non Windows section +Whereas the Windows version of this section had a lot of unnecessary newlines, this one will just be a short and concise +section that ends right here. diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2_metadata.json new file mode 100644 index 000000000000..522265436ab3 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "tps1", + "subtitle": "Non-Windows-section", + "title_depth": 3, + "directory": "tps1", + "parent_title": "OS-specific-sections", + "previous_title": "tps1_linux_paragraph_2.1", + "next_title": "tps1_paragraph_3", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/tps1/#non-windows-section" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1.txt new file mode 100644 index 000000000000..e0642d6ac96b --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1.txt @@ -0,0 +1,4 @@ +OS specific sections +This is the second section, it is the start of some +text specific to OSes that aren't "windows". I feel like there is no need to make this section very long, however I will +still add a link. diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1_metadata.json new file mode 100644 index 000000000000..5d9ec163f99d --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "tps1", + "subtitle": "OS-specific-sections", + "title_depth": 2, + "directory": "tps1", + "parent_title": "Main-title", + "links": { + "0": "https://docs.hpc.ugent.be/linuxmacos" + }, + "previous_title": "tps1_paragraph_1", + "next_title": "tps1_macos_paragraph_2.2", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/tps1/#os-specific-sections" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2.txt new file mode 100644 index 000000000000..1a3867e69fa9 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2.txt @@ -0,0 +1,3 @@ +Non Windows section +Whereas the Windows version of this section had a lot of unnecessary newlines, this one will just be a short and concise +section that ends right here. diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2_metadata.json new file mode 100644 index 000000000000..7b06f06efddb --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "tps1", + "subtitle": "Non-Windows-section", + "title_depth": 3, + "directory": "tps1", + "parent_title": "OS-specific-sections", + "previous_title": "tps1_macos_paragraph_2.1", + "next_title": "tps1_paragraph_3", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/tps1/#non-windows-section" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1.txt new file mode 100644 index 000000000000..9a9cbe1f3d27 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1.txt @@ -0,0 +1,7 @@ +OS specific sections +This is the second section, it is the start of some text specific to windows. +In this section it is probably no longer needed to test the Markdown syntax again, however I will make it somewhat longer +to make sure we get a long section that is over the minimum required length for the next newline character to be +classified as the end of this section. I am doing this because for the next sections I want to test whether they will be +grouped together if they are not long enough to reach the minimum paragraph length on their own. Also, before I forget, +let's add a link in this section as well. diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1_metadata.json new file mode 100644 index 000000000000..e8e50aa6c322 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "tps1", + "subtitle": "OS-specific-sections", + "title_depth": 2, + "directory": "tps1", + "parent_title": "Main-title", + "links": { + "0": "https://docs.hpc.ugent.be/windows" + }, + "previous_title": "tps1_paragraph_1", + "next_title": "tps1_windows_paragraph_2.2", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/tps1/#os-specific-sections" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2.txt new file mode 100644 index 000000000000..6b57235f68fd --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2.txt @@ -0,0 +1,6 @@ +Windows specific section +Like this. +And this. +And also this. +These section should all be grouped together under the windows specific section of the output. The addition of this long +section at the end should make sure the combination of sections comes to an end here. diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2_metadata.json new file mode 100644 index 000000000000..84ea6ad53f9f --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "tps1", + "subtitle": "Windows-specific-section", + "title_depth": 3, + "directory": "tps1", + "parent_title": "OS-specific-sections", + "previous_title": "tps1_windows_paragraph_2.1", + "next_title": "tps1_paragraph_3", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/tps1/#windows-specific-section" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/tps1.md similarity index 97% rename from scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1_input.md rename to scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/tps1.md index 44ac82c795d1..d9b10d0c5241 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_paragraph_split/test_paragraph_split_1_input.md +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/tps1.md @@ -1,43 +1,43 @@ -# Main title - -This is the first paragraph of text. It is non-os-specific, however it does contain [a link](generic.md). -It also contains some `other` *Markdown* _syntax_ and an -```shell -example code block. -``` -This intro needs to be sufficiently long as will be explained in the following section (we want to hit the minimum -character limit for a section). - -## OS specific sections - -This is the second section, it is the start of some {% if OS == windows %} text specific to windows. -In this section it is probably no longer needed to test the Markdown syntax again, however I will make it somewhat longer -to make sure we get a long section that is over the minimum required length for the next newline character to be -classified as the end of this section. I am doing this because for the next sections I want to test whether they will be -grouped together if they are not long enough to reach the minimum paragraph length on their own. Also, before I forget, -let's add [a link](windows.md) in this section as well. - -### Windows specific section - -Like this. - -And this. - -And also this. - -These section should all be grouped together under the windows specific section of the output. The addition of this long -section at the end should make sure the combination of sections comes to an end here. -{% else %} -text specific to OSes that aren't windows. I feel like there is no need to make this section very long, however I will -still add [a link](linuxmacos.md). - -### Non Windows section - -Whereas the Windows version of this section had a lot of unnecessary newlines, this one will just be a short and concise -section that ends right here. -{% endif %} - -## Conclusion - -Coming up with what to write in test texts is very hard. I think I got the most important test cases in there, but I -might add to this if needed. +# Main title + +This is the first paragraph of text. It is non-os-specific, however it does contain [a link](generic.md). +It also contains some `other` *Markdown* _syntax_ and an +```shell +example code block. +``` +This intro needs to be sufficiently long as will be explained in the following section (we want to hit the minimum +character limit for a section). + +## OS specific sections + +This is the second section, it is the start of some {% if OS == windows %} text specific to windows. +In this section it is probably no longer needed to test the Markdown syntax again, however I will make it somewhat longer +to make sure we get a long section that is over the minimum required length for the next newline character to be +classified as the end of this section. I am doing this because for the next sections I want to test whether they will be +grouped together if they are not long enough to reach the minimum paragraph length on their own. Also, before I forget, +let's add [a link](windows.md) in this section as well. + +### Windows specific section + +Like this. + +And this. + +And also this. + +These section should all be grouped together under the windows specific section of the output. The addition of this long +section at the end should make sure the combination of sections comes to an end here. +{% else %} +text specific to OSes that aren't windows. I feel like there is no need to make this section very long, however I will +still add [a link](linuxmacos.md). + +### Non Windows section + +Whereas the Windows version of this section had a lot of unnecessary newlines, this one will just be a short and concise +section that ends right here. +{% endif %} + +## Conclusion + +Coming up with what to write in test texts is very hard. I think I got the most important test cases in there, but I +might add to this if needed. diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1.txt new file mode 100644 index 000000000000..f62a4f31feec --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1.txt @@ -0,0 +1,2 @@ +blablabla +blablablabla diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1_metadata.json new file mode 100644 index 000000000000..9fdbce652bf1 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "tts1", + "subtitle": "Subtitle-1", + "title_depth": 2, + "directory": "tts1\\Main-title\\Subtitle-1", + "parent_title": "Main-title", + "previous_title": "Main-title", + "next_title": "Subtitle-2-g", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/tts1/#subtitle-1" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g.txt new file mode 100644 index 000000000000..bdf68551202d --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g.txt @@ -0,0 +1 @@ +blablabla \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g_metadata.json new file mode 100644 index 000000000000..b48bcaaa08c0 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "tts1", + "subtitle": "Subtitle-5-g", + "title_depth": 2, + "directory": "tts1\\Main-title\\Subtitle-5-g", + "parent_title": "Main-title", + "previous_title": "Subtitle-2-g", + "next_title": null, + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/tts1/#subtitle-5-g" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt new file mode 100644 index 000000000000..48125d91679e --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt @@ -0,0 +1,4 @@ +blablabla generic +blablabla generic +blablabla Linux macOS +blablablabla Linux macOS with a link diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json new file mode 100644 index 000000000000..a2b68c8865e2 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "tts1", + "subtitle": "Subtitle-2-g", + "title_depth": 2, + "directory": "tts1\\Main-title\\Subtitle-2-g", + "parent_title": "Main-title", + "links": { + "0": "https://docs.hpc.ugent.be/linuxmacos" + }, + "previous_title": "Subtitle-1", + "next_title": "Subtitle-4-l&m", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/tts1/#subtitle-2-g" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt new file mode 100644 index 000000000000..b221f26074b2 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt @@ -0,0 +1,3 @@ +blablabla Linux macOS +blablablabla Linux macOS +blablabla generic with a link \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json new file mode 100644 index 000000000000..537541e2cb0f --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "tts1", + "subtitle": "Subtitle-4-l&m", + "title_depth": 3, + "directory": "tts1\\Main-title\\Subtitle-2-g\\Subtitle-4-l&m", + "parent_title": "Subtitle-2-g", + "links": { + "0": "https://docs.hpc.ugent.be/generic" + }, + "previous_title": "Subtitle-2-g", + "next_title": "Subtitle-5-g", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/tts1/#subtitle-4-lm" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt new file mode 100644 index 000000000000..48125d91679e --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt @@ -0,0 +1,4 @@ +blablabla generic +blablabla generic +blablabla Linux macOS +blablablabla Linux macOS with a link diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json new file mode 100644 index 000000000000..6846da26b728 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "tts1", + "subtitle": "Subtitle-2-g", + "title_depth": 2, + "directory": "tts1\\Main-title\\Subtitle-2-g", + "parent_title": "Main-title", + "links": { + "0": "https://docs.hpc.ugent.be/linuxmacos" + }, + "previous_title": "Subtitle-1", + "next_title": "Subtitle-4-l&m", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/tts1/#subtitle-2-g" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt new file mode 100644 index 000000000000..b221f26074b2 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt @@ -0,0 +1,3 @@ +blablabla Linux macOS +blablablabla Linux macOS +blablabla generic with a link \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json new file mode 100644 index 000000000000..4e167b116d2a --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "tts1", + "subtitle": "Subtitle-4-l&m", + "title_depth": 3, + "directory": "tts1\\Main-title\\Subtitle-2-g\\Subtitle-4-l&m", + "parent_title": "Subtitle-2-g", + "links": { + "0": "https://docs.hpc.ugent.be/generic" + }, + "previous_title": "Subtitle-2-g", + "next_title": "Subtitle-5-g", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/tts1/#subtitle-4-lm" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt new file mode 100644 index 000000000000..f9f205928327 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt @@ -0,0 +1,4 @@ +blablabla generic +blablabla generic +blablabla windows +blablabla windows with a link diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json new file mode 100644 index 000000000000..c4620a940808 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "tts1", + "subtitle": "Subtitle-2-g", + "title_depth": 2, + "directory": "tts1\\Main-title\\Subtitle-2-g", + "parent_title": "Main-title", + "links": { + "0": "https://docs.hpc.ugent.be/windows" + }, + "previous_title": "Subtitle-1", + "next_title": "Subtitle-3-w", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/tts1/#subtitle-2-g" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w.txt new file mode 100644 index 000000000000..0b587cef85ab --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w.txt @@ -0,0 +1,3 @@ +blablabla windows +blablablabla windows +blablabla generic with a link \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w_metadata.json new file mode 100644 index 000000000000..aa4b6317ce62 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "tts1", + "subtitle": "Subtitle-3-w", + "title_depth": 3, + "directory": "tts1\\Main-title\\Subtitle-2-g\\Subtitle-3-w", + "parent_title": "Subtitle-2-g", + "links": { + "0": "https://docs.hpc.ugent.be/generic" + }, + "previous_title": "Subtitle-2-g", + "next_title": "Subtitle-5-g", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/tts1/#subtitle-3-w" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_title_split/test_title_split_1_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/tts1.md similarity index 76% rename from scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_title_split/test_title_split_1_input.md rename to scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/tts1.md index 5065852e2a10..2f3ad7f9c088 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/full_test_title_split/test_title_split_1_input.md +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/tts1.md @@ -5,27 +5,27 @@ blablabla blablablabla -### Subtitle 2 partly generic +## Subtitle 2 g blablabla generic blablabla generic {% if OS == windows %}blablabla windows blablabla windows with a [link](windows.md) -#### Subtitle 3 Windows specific +### Subtitle 3 w blablabla windows blablablabla windows {% else %}blablabla Linux macOS blablablabla Linux macOS with a [link](linuxmacos.md) -#### Subtitle 4 Linux and macOS specific +### Subtitle 4 l&m blablabla Linux macOS blablablabla Linux macOS {% endif %} blablabla generic with a [link](generic.md) -## Subtitle 5 generic +## Subtitle 5 g blablabla From 56543f03ddbba5df7477e78468c8a9e46e92f227 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 27 Aug 2024 13:41:23 +0200 Subject: [PATCH 122/145] small fix for double title bug --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index a041160c855d..1530eedf31cb 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -839,7 +839,7 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or if len(jinja_text) != 0: # add first subtitle in front of section again - if options[SPLIT_ON_TITLES] or metadata[SUBTITLE].replace("-", " ") not in jinja_text[:len(metadata[SUBTITLE]) + 1]: + if options[SPLIT_ON_TITLES] or metadata[SUBTITLE] not in make_valid_title(jinja_text[:len(metadata[SUBTITLE]) + 1]): jinja_text = "#" * metadata[TITLE_DEPTH] + " " + metadata[SUBTITLE].replace("-", " ") + "\n" + jinja_text else: jinja_text = "#" * metadata[TITLE_DEPTH] + " " + jinja_text From 52a3861bec953f687c6317a1e180f9c27124d304 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 27 Aug 2024 13:44:13 +0200 Subject: [PATCH 123/145] added examples of output of the script when splitting on paragraphs with a min_paragraph_length of 683 --- .../generic/account/account_paragraph_1.txt | 13 +++++++ .../generic/account/account_paragraph_10.txt | 19 ++++++++++ .../account_paragraph_10_metadata.json | 11 ++++++ .../generic/account/account_paragraph_12.txt | 17 +++++++++ .../account_paragraph_12_metadata.json | 11 ++++++ .../account/account_paragraph_1_metadata.json | 14 ++++++++ .../generic/account/account_paragraph_2.txt | 6 ++++ .../account/account_paragraph_2_metadata.json | 16 +++++++++ .../generic/account/account_paragraph_3.txt | 11 ++++++ .../account/account_paragraph_3_metadata.json | 11 ++++++ .../generic/account/account_paragraph_8.txt | 13 +++++++ .../account/account_paragraph_8_metadata.json | 11 ++++++ .../connecting/connecting_paragraph_1.txt | 13 +++++++ .../connecting/connecting_paragraph_14.txt | 7 ++++ .../connecting_paragraph_14_metadata.json | 14 ++++++++ .../connecting/connecting_paragraph_15.txt | 12 +++++++ .../connecting_paragraph_15_metadata.json | 15 ++++++++ .../connecting_paragraph_1_metadata.json | 14 ++++++++ .../connecting/connecting_paragraph_2.txt | 14 ++++++++ .../connecting_paragraph_2_metadata.json | 11 ++++++ .../connecting/connecting_paragraph_3.txt | 12 +++++++ .../connecting_paragraph_3_metadata.json | 11 ++++++ .../connecting/connecting_paragraph_6.txt | 16 +++++++++ .../connecting_paragraph_6_metadata.json | 11 ++++++ .../connecting/connecting_paragraph_7.txt | 22 ++++++++++++ .../connecting_paragraph_7_metadata.json | 11 ++++++ .../connecting/connecting_paragraph_8.txt | 13 +++++++ .../connecting_paragraph_8_metadata.json | 14 ++++++++ .../connecting/connecting_paragraph_9.txt | 27 ++++++++++++++ .../connecting_paragraph_9_metadata.json | 11 ++++++ .../account/account_linux_paragraph_11.1.txt | 11 ++++++ ...account_linux_paragraph_11.1_metadata.json | 14 ++++++++ .../account/account_linux_paragraph_4.1.txt | 10 ++++++ .../account_linux_paragraph_4.1_metadata.json | 14 ++++++++ .../account/account_linux_paragraph_5.1.txt | 14 ++++++++ .../account_linux_paragraph_5.1_metadata.json | 11 ++++++ .../account/account_linux_paragraph_5.2.txt | 13 +++++++ .../account_linux_paragraph_5.2_metadata.json | 11 ++++++ .../account/account_linux_paragraph_5.3.txt | 17 +++++++++ .../account_linux_paragraph_5.3_metadata.json | 11 ++++++ .../account/account_linux_paragraph_5.4.txt | 18 ++++++++++ .../account_linux_paragraph_5.4_metadata.json | 11 ++++++ .../account/account_linux_paragraph_5.5.txt | 6 ++++ .../account_linux_paragraph_5.5_metadata.json | 11 ++++++ .../account/account_linux_paragraph_6.1.txt | 1 + .../account_linux_paragraph_6.1_metadata.json | 11 ++++++ .../account/account_linux_paragraph_7.1.txt | 14 ++++++++ .../account_linux_paragraph_7.1_metadata.json | 14 ++++++++ .../account/account_linux_paragraph_7.2.txt | 8 +++++ .../account_linux_paragraph_7.2_metadata.json | 11 ++++++ .../account/account_linux_paragraph_9.1.txt | 6 ++++ .../account_linux_paragraph_9.1_metadata.json | 11 ++++++ .../connecting_linux_paragraph_10.1.txt | 35 +++++++++++++++++++ ...necting_linux_paragraph_10.1_metadata.json | 11 ++++++ .../connecting_linux_paragraph_11.1.txt | 6 ++++ ...necting_linux_paragraph_11.1_metadata.json | 11 ++++++ .../connecting_linux_paragraph_12.1.txt | 12 +++++++ ...necting_linux_paragraph_12.1_metadata.json | 14 ++++++++ .../connecting_linux_paragraph_12.2.txt | 17 +++++++++ ...necting_linux_paragraph_12.2_metadata.json | 11 ++++++ .../connecting_linux_paragraph_12.3.txt | 22 ++++++++++++ ...necting_linux_paragraph_12.3_metadata.json | 11 ++++++ .../connecting_linux_paragraph_12.4.txt | 14 ++++++++ ...necting_linux_paragraph_12.4_metadata.json | 11 ++++++ .../connecting_linux_paragraph_12.5.txt | 14 ++++++++ ...necting_linux_paragraph_12.5_metadata.json | 11 ++++++ .../connecting_linux_paragraph_12.6.txt | 18 ++++++++++ ...necting_linux_paragraph_12.6_metadata.json | 15 ++++++++ .../connecting_linux_paragraph_13.1.txt | 10 ++++++ ...necting_linux_paragraph_13.1_metadata.json | 11 ++++++ .../connecting_linux_paragraph_4.1.txt | 7 ++++ ...nnecting_linux_paragraph_4.1_metadata.json | 15 ++++++++ .../connecting_linux_paragraph_5.1.txt | 12 +++++++ ...nnecting_linux_paragraph_5.1_metadata.json | 14 ++++++++ .../connecting_linux_paragraph_5.2.txt | 4 +++ ...nnecting_linux_paragraph_5.2_metadata.json | 11 ++++++ .../account/account_macos_paragraph_11.1.txt | 11 ++++++ ...account_macos_paragraph_11.1_metadata.json | 14 ++++++++ .../account/account_macos_paragraph_4.1.txt | 10 ++++++ .../account_macos_paragraph_4.1_metadata.json | 14 ++++++++ .../account/account_macos_paragraph_5.1.txt | 12 +++++++ .../account_macos_paragraph_5.1_metadata.json | 11 ++++++ .../account/account_macos_paragraph_5.2.txt | 13 +++++++ .../account_macos_paragraph_5.2_metadata.json | 11 ++++++ .../account/account_macos_paragraph_5.3.txt | 20 +++++++++++ .../account_macos_paragraph_5.3_metadata.json | 11 ++++++ .../account/account_macos_paragraph_5.4.txt | 18 ++++++++++ .../account_macos_paragraph_5.4_metadata.json | 11 ++++++ .../account/account_macos_paragraph_5.5.txt | 6 ++++ .../account_macos_paragraph_5.5_metadata.json | 11 ++++++ .../account/account_macos_paragraph_6.1.txt | 1 + .../account_macos_paragraph_6.1_metadata.json | 11 ++++++ .../account/account_macos_paragraph_7.1.txt | 14 ++++++++ .../account_macos_paragraph_7.1_metadata.json | 14 ++++++++ .../account/account_macos_paragraph_7.2.txt | 7 ++++ .../account_macos_paragraph_7.2_metadata.json | 11 ++++++ .../account/account_macos_paragraph_9.1.txt | 11 ++++++ .../account_macos_paragraph_9.1_metadata.json | 11 ++++++ .../connecting_macos_paragraph_10.1.txt | 35 +++++++++++++++++++ ...necting_macos_paragraph_10.1_metadata.json | 11 ++++++ .../connecting_macos_paragraph_11.1.txt | 6 ++++ ...necting_macos_paragraph_11.1_metadata.json | 11 ++++++ .../connecting_macos_paragraph_12.1.txt | 12 +++++++ ...necting_macos_paragraph_12.1_metadata.json | 14 ++++++++ .../connecting_macos_paragraph_12.2.txt | 17 +++++++++ ...necting_macos_paragraph_12.2_metadata.json | 11 ++++++ .../connecting_macos_paragraph_12.3.txt | 22 ++++++++++++ ...necting_macos_paragraph_12.3_metadata.json | 11 ++++++ .../connecting_macos_paragraph_12.4.txt | 14 ++++++++ ...necting_macos_paragraph_12.4_metadata.json | 11 ++++++ .../connecting_macos_paragraph_12.5.txt | 14 ++++++++ ...necting_macos_paragraph_12.5_metadata.json | 11 ++++++ .../connecting_macos_paragraph_12.6.txt | 18 ++++++++++ ...necting_macos_paragraph_12.6_metadata.json | 15 ++++++++ .../connecting_macos_paragraph_13.1.txt | 12 +++++++ ...necting_macos_paragraph_13.1_metadata.json | 11 ++++++ .../connecting_macos_paragraph_13.2.txt | 6 ++++ ...necting_macos_paragraph_13.2_metadata.json | 11 ++++++ .../connecting_macos_paragraph_4.1.txt | 7 ++++ ...nnecting_macos_paragraph_4.1_metadata.json | 15 ++++++++ .../connecting_macos_paragraph_5.1.txt | 10 ++++++ ...nnecting_macos_paragraph_5.1_metadata.json | 14 ++++++++ .../connecting_macos_paragraph_5.2.txt | 7 ++++ ...nnecting_macos_paragraph_5.2_metadata.json | 11 ++++++ .../account_windows_paragraph_11.1.txt | 11 ++++++ ...count_windows_paragraph_11.1_metadata.json | 14 ++++++++ .../account/account_windows_paragraph_4.1.txt | 14 ++++++++ ...ccount_windows_paragraph_4.1_metadata.json | 14 ++++++++ .../account/account_windows_paragraph_4.2.txt | 13 +++++++ ...ccount_windows_paragraph_4.2_metadata.json | 11 ++++++ .../account/account_windows_paragraph_4.3.txt | 13 +++++++ ...ccount_windows_paragraph_4.3_metadata.json | 15 ++++++++ .../account/account_windows_paragraph_4.4.txt | 17 +++++++++ ...ccount_windows_paragraph_4.4_metadata.json | 11 ++++++ .../account/account_windows_paragraph_4.5.txt | 7 ++++ ...ccount_windows_paragraph_4.5_metadata.json | 11 ++++++ .../account/account_windows_paragraph_6.1.txt | 13 +++++++ ...ccount_windows_paragraph_6.1_metadata.json | 11 ++++++ .../account/account_windows_paragraph_6.2.txt | 11 ++++++ ...ccount_windows_paragraph_6.2_metadata.json | 15 ++++++++ .../account/account_windows_paragraph_6.3.txt | 5 +++ ...ccount_windows_paragraph_6.3_metadata.json | 11 ++++++ .../account/account_windows_paragraph_9.1.txt | 7 ++++ ...ccount_windows_paragraph_9.1_metadata.json | 11 ++++++ .../connecting_windows_paragraph_10.1.txt | 5 +++ ...cting_windows_paragraph_10.1_metadata.json | 11 ++++++ .../connecting_windows_paragraph_11.1.txt | 11 ++++++ ...cting_windows_paragraph_11.1_metadata.json | 11 ++++++ .../connecting_windows_paragraph_11.2.txt | 19 ++++++++++ ...cting_windows_paragraph_11.2_metadata.json | 11 ++++++ .../connecting_windows_paragraph_11.3.txt | 7 ++++ ...cting_windows_paragraph_11.3_metadata.json | 11 ++++++ .../connecting_windows_paragraph_4.1.txt | 11 ++++++ ...ecting_windows_paragraph_4.1_metadata.json | 15 ++++++++ .../connecting_windows_paragraph_4.2.txt | 13 +++++++ ...ecting_windows_paragraph_4.2_metadata.json | 11 ++++++ .../connecting_windows_paragraph_4.3.txt | 13 +++++++ ...ecting_windows_paragraph_4.3_metadata.json | 14 ++++++++ .../connecting_windows_paragraph_4.4.txt | 11 ++++++ ...ecting_windows_paragraph_4.4_metadata.json | 11 ++++++ 160 files changed, 1976 insertions(+) create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_14.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_14_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.3.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.3_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.4.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.4_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.5.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.5_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.6.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.6_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_4.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_4.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.3.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.3_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.4.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.4_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.5.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.5_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.6.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.6_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_4.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_4.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.5.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.5_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_10.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_10.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.3.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.3_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.3.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.3_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.4.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.4_metadata.json diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1.txt new file mode 100644 index 000000000000..1b79fd223918 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1.txt @@ -0,0 +1,13 @@ +Getting an HPC Account +Getting ready to request an account +All users of AUGent can request +an +account on the HPC, which is part of the Flemish Supercomputing Centre (VSC). +See HPC policies for more information on who is entitled to an account. +The VSC, abbreviation of Flemish Supercomputer Centre, is a virtual +supercomputer centre. It is a partnership between the five Flemish +associations: the Association KU Leuven, Ghent University Association, +Brussels University Association, Antwerp University Association and the +University Colleges-Limburg. The VSC is funded by the Flemish +Government. +There are two methods for connecting to HPC-UGent infrastructure: diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10.txt new file mode 100644 index 000000000000..371dd9db52b4 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10.txt @@ -0,0 +1,19 @@ +Welcome e-mail +Within one day, you should receive a Welcome e-mail with your VSC +account details. +Dear (Username), +Your VSC-account has been approved by an administrator. +Your vsc-username is vsc40000 +Your account should be fully active within one hour. +To check or update your account information please visit +https://account.vscentrum.be/ +For further info please visit https://www.vscentrum.be/user-portal +Kind regards, +-- The VSC administrators + +Now, you can start using the HPC. You can always look up your VSC id later +by visiting . +Adding multiple SSH public keys (optional) +In case you are connecting from different computers to the login nodes, +it is advised to use separate SSH public keys to do so. You should +follow these steps. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10_metadata.json new file mode 100644 index 000000000000..4b5b5202d1ca --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "account", + "subtitle": "Adding-multiple-SSH-public-keys-(optional)", + "title_depth": 3, + "directory": "account", + "parent_title": "", + "previous_title": "account_paragraph_9", + "next_title": "account_paragraph_11", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/account/#adding-multiple-ssh-public-keys-optional" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12.txt new file mode 100644 index 000000000000..6ee6880838e6 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12.txt @@ -0,0 +1,17 @@ +5. Take into account that it will take some time before the new SSH + public key is active in your account on the system; waiting for + 15-30 minutes should be sufficient. +Computation Workflow on the HPC +A typical Computation workflow will be: +1. Connect to the HPC +2. Transfer your files to the HPC +3. Compile your code and test it +4. Create a job script +5. Submit your job +6. Wait while + 1. your job gets into the queue + 2. your job gets executed + 3. your job finishes +7. Move your results +We'll take you through the different tasks one by one in the following +chapters. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12_metadata.json new file mode 100644 index 000000000000..a5df035df493 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "account", + "subtitle": "Computation-Workflow-on-the-HPC", + "title_depth": 2, + "directory": "account", + "parent_title": "", + "previous_title": "account_paragraph_11", + "next_title": null, + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/account/#computation-workflow-on-the-hpc" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json new file mode 100644 index 000000000000..726ce9f94fa1 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "account", + "subtitle": "Getting-ready-to-request-an-account", + "title_depth": 2, + "directory": "account", + "links": { + "0": "../sites/hpc_policies" + }, + "parent_title": "", + "previous_title": null, + "next_title": "account_paragraph_2", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/account/#getting-ready-to-request-an-account" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2.txt new file mode 100644 index 000000000000..6ecd65e2184d --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2.txt @@ -0,0 +1,6 @@ +- Using a terminal to connect via SSH. +- Using the web portal +The web portal offers a convenient way to upload files and gain shell access to the HPC-UGent infrastructure from a standard web browser (no software installation or configuration required). +If you would like use a terminal with SSH as this gives you more flexibility continue reading. +However if you prefer to use the web portal, you can skip ahead to the following section: Applying for the account. +Once you have successfully obtained an account, you can then delve into the details of utilizing the HPC-UGent web portal by reading Using the HPC-UGent web portal. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2_metadata.json new file mode 100644 index 000000000000..257f886c6e01 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2_metadata.json @@ -0,0 +1,16 @@ +{ + "main_title": "account", + "subtitle": "Getting-ready-to-request-an-account", + "title_depth": 2, + "directory": "account", + "links": { + "0": "https://docs.hpc.ugent.be/web_portal", + "1": "https://docs.hpc.ugent.be/account/#applying-for-the-account", + "2": "https://docs.hpc.ugent.be/web_portal" + }, + "parent_title": "", + "previous_title": "account_paragraph_1", + "next_title": "account_paragraph_3", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/account/#getting-ready-to-request-an-account" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3.txt new file mode 100644 index 000000000000..9632ef1f5afc --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3.txt @@ -0,0 +1,11 @@ +The HPC-UGent infrastructure clusters use public/private key pairs for user authentication +(rather than passwords). Technically, the private key is stored on your +local computer and always stays there; the public key is stored on the HPC. +Access to the HPC is granted to anyone who can prove to have access to the +corresponding private key on his local computer. +How do SSH keys work? +- an SSH public/private key pair can be seen as a lock and a key +- the SSH public key is equivalent with a lock: you give it to the + VSC and they put it on the door that gives access to your account. +- the SSH private key is like a physical key: you don't hand it out + to other people. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3_metadata.json new file mode 100644 index 000000000000..b94f233779b3 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "account", + "subtitle": "How-do-SSH-keys-work", + "title_depth": 3, + "directory": "account", + "parent_title": "", + "previous_title": "account_paragraph_2", + "next_title": "account_paragraph_4", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/account/#how-do-ssh-keys-work" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8.txt new file mode 100644 index 000000000000..125b566419a1 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8.txt @@ -0,0 +1,13 @@ +Applying for the account +Visit +You will be redirected to our WAYF (Where Are You From) service where +you have to select your "Home Organisation". +Select "UGent" in the dropdown box and optionally select "Save my preference" +and "permanently". +Click "Confirm" +You will now be taken to the authentication page of your institute. +After you log in using your UGent login and password, you will be asked to +upload the file that contains your public key, i.e., the file +"id_rsa.pub" which you have generated earlier. Make sure that your +public key is actually accepted for upload, because if it is in a wrong +format, wrong type or too short, then it will be refused. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8_metadata.json new file mode 100644 index 000000000000..6d186b6ff463 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "account", + "subtitle": "Applying-for-the-account", + "title_depth": 2, + "directory": "account", + "parent_title": "", + "previous_title": "account_paragraph_7", + "next_title": "account_paragraph_9", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/account/#applying-for-the-account" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1.txt new file mode 100644 index 000000000000..b144712c9df1 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1.txt @@ -0,0 +1,13 @@ +Connecting to the HPC infrastructure +Before you can really start using the HPC clusters, there are several things +you need to do or know: +1. You need to log on to the cluster using an SSH client to one of + the login nodes or by using the HPC web portal. + This will give you command-line access. + A standard web browser like Firefox or Chrome for the web portal will suffice. +2. Before you can do some work, you'll have to transfer the files + that you need from your desktop computer to the cluster. At the end + of a job, you might want to transfer some files back. +3. Optionally, if you wish to use programs with a **graphical user + interface**, you will need an X-server on your client system and log + in to the login nodes with X-forwarding enabled. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_14.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_14.txt new file mode 100644 index 000000000000..df00d4ed2a4a --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_14.txt @@ -0,0 +1,7 @@ +Fast file transfer for large datasets +See the section on rsync in chapter 5 of the Linux intro manual. +Changing login nodes +It can be useful to have control over which login node you are on. However, when you connect to the HPC (High-Performance Computing) system, you are directed to a random login node, which might not be the one where you already have an active session. To address this, there is a way to manually switch your active login node. +For instance, if you want to switch to the login node named gligar07.gastly.os, you can use the following command while you are connected to the gligar08.gastly.os login node on the HPC: +ssh gligar07.gastly.os +This is also possible the other way around. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_14_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_14_metadata.json new file mode 100644 index 000000000000..0543efa40833 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_14_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "connecting", + "subtitle": "Changing-login-nodes", + "title_depth": 2, + "directory": "connecting", + "links": { + "0": "https://docs.hpc.ugent.be/connecting/../linux-tutorial/uploading_files/#copying-faster-with-rsync" + }, + "parent_title": "", + "previous_title": "connecting_paragraph_13", + "next_title": "connecting_paragraph_15", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/connecting/#changing-login-nodes" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15.txt new file mode 100644 index 000000000000..b21976186473 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15.txt @@ -0,0 +1,12 @@ +If you want to find out which login host you are connected to, you can use the hostname command. +$ hostname +gligar07.gastly.os +$ ssh gligar08.gastly.os +$ hostname +gligar08.gastly.os + +Rather than always starting a new session on the HPC, you can also use a terminal multiplexer like screen or tmux. +These can make sessions that 'survives' across disconnects. +You can find more information on how to use these tools here (or on other online sources): +- screen +- tmux \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15_metadata.json new file mode 100644 index 000000000000..d23146ed79f0 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15_metadata.json @@ -0,0 +1,15 @@ +{ + "main_title": "connecting", + "subtitle": "Changing-login-nodes", + "title_depth": 2, + "directory": "connecting", + "links": { + "0": "https://www.howtogeek.com/662422/how-to-use-linuxs-screen-command/", + "1": "https://www.howtogeek.com/671422/how-to-use-tmux-on-linux-and-why-its-better-than-screen/" + }, + "parent_title": "", + "previous_title": "connecting_paragraph_14", + "next_title": null, + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/connecting/#changing-login-nodes" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1_metadata.json new file mode 100644 index 000000000000..ef0bc5473b0d --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "connecting", + "subtitle": "Connecting-to-the-HPC-infrastructure", + "title_depth": 1, + "directory": "connecting", + "links": { + "0": "https://docs.hpc.ugent.be/web_portal" + }, + "parent_title": "", + "previous_title": null, + "next_title": "connecting_paragraph_2", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/connecting/#connecting-to-the-hpc-infrastructure" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2.txt new file mode 100644 index 000000000000..4c1d879b954a --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2.txt @@ -0,0 +1,14 @@ +4. Often several versions of software packages and libraries are + installed, so you need to select the ones you need. To manage + different versions efficiently, the VSC clusters use so-called + modules, so you will need to select and load the modules that + you need. +Connection restrictions +Since March 20th 2020, restrictions are in place that limit from where +you can connect to the VSC HPC infrastructure, in response to security +incidents involving several European HPC centres. +VSC login nodes are only directly accessible from within university +networks, and from (most) Belgian commercial internet providers. +All other IP domains are blocked by default. If you are connecting from +an IP address that is not allowed direct access, you have the following +options to get access to VSC login nodes: diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2_metadata.json new file mode 100644 index 000000000000..39ee53fcf0b8 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "Connection-restrictions", + "title_depth": 2, + "directory": "connecting", + "parent_title": "", + "previous_title": "connecting_paragraph_1", + "next_title": "connecting_paragraph_3", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/connecting/#connection-restrictions" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3.txt new file mode 100644 index 000000000000..668a1e6df572 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3.txt @@ -0,0 +1,12 @@ +- Use an VPN connection to connect to UGent the network (recommended). +- Whitelist your IP address automatically by accessing + and log in with your UGent account. + - While this web connection is active new SSH sessions can be + started. + - Active SSH sessions will remain active even when this web page + is closed. +- Contact your HPC support team (via hpc@ugent.be) and ask them to whitelist your + IP range (e.g., for industry access, automated processes). +Trying to establish an SSH connection from an IP address that does not +adhere to these restrictions will result in an immediate failure to +connect, with an error message like: diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json new file mode 100644 index 000000000000..4dc75d7dcf33 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "Connection-restrictions", + "title_depth": 2, + "directory": "connecting", + "parent_title": "", + "previous_title": "connecting_paragraph_2", + "next_title": "connecting_paragraph_4", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/connecting/#connection-restrictions" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6.txt new file mode 100644 index 000000000000..472991adada3 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6.txt @@ -0,0 +1,16 @@ +Congratulations, you're on the HPC infrastructure now! +To find out where you have landed you can print the current working directory: +$ pwd +/user/home/gent/vsc400/vsc40000 + +Your new private home directory is "/user/home/gent/vsc400/vsc40000". Here you can create your own +subdirectory structure, copy and prepare your applications, compile and +test them and submit your jobs on the HPC. +$ cd /apps/gent/tutorials +$ ls +Intro-HPC/ + +This directory currently contains all training material for the Introduction to the HPC. More +relevant training material to work with the HPC can always be added later in +this directory. +You can now explore the content of this directory with the "ls --l" (lists long) and the "cd" (change directory) commands: diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6_metadata.json new file mode 100644 index 000000000000..1c7ae8ed2678 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "First-Time-connection-to-the-HPC-infrastructure", + "title_depth": 2, + "directory": "connecting", + "parent_title": "", + "previous_title": "connecting_paragraph_5", + "next_title": "connecting_paragraph_7", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/connecting/#first-time-connection-to-the-hpc-infrastructure" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7.txt new file mode 100644 index 000000000000..35996afe4da5 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7.txt @@ -0,0 +1,22 @@ +As we are interested in the use of the HPC, move further to Intro-HPC and explore the +contents up to 2 levels deep: +$ cd Intro-HPC +$ tree -L 2 +. +'-- examples + |-- Compiling-and-testing-your-software-on-the-HPC + |-- Fine-tuning-Job-Specifications + |-- Multi-core-jobs-Parallel-Computing + |-- Multi-job-submission + |-- Program-examples + |-- Running-batch-jobs + |-- Running-jobs-with-input + |-- Running-jobs-with-input-output-data + |-- example.pbs + '-- example.sh +9 directories, 5 files + +This directory contains: +1. This HPC Tutorial (in either a Mac, Linux or Windows version). +2. An examples subdirectory, containing all the examples that you need in this + Tutorial, as well as examples that might be useful for your specific applications. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7_metadata.json new file mode 100644 index 000000000000..709753e4dc46 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "First-Time-connection-to-the-HPC-infrastructure", + "title_depth": 2, + "directory": "connecting", + "parent_title": "", + "previous_title": "connecting_paragraph_6", + "next_title": "connecting_paragraph_8", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/connecting/#first-time-connection-to-the-hpc-infrastructure" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8.txt new file mode 100644 index 000000000000..096c74c1372c --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8.txt @@ -0,0 +1,13 @@ +$ cd examples + + tip + Typing cd ex followed by tab (the Tab-key) will generate the cd examples + command. Command-line completion (also tab completion) is a common feature of the bash command + line interpreter, in which the program automatically fills in partially + typed commands. + tip + For more exhaustive tutorials about Linux usage, see Appendix Useful Linux Commands +The first action is to copy the contents of the HPC examples directory to +your home directory, so that you have your own personal copy and that +you can start using the examples. The "-r" option of the copy command +will also copy the contents of the sub-directories "recursively". diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8_metadata.json new file mode 100644 index 000000000000..0241e0bd6b9b --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "connecting", + "subtitle": "First-Time-connection-to-the-HPC-infrastructure", + "title_depth": 2, + "directory": "connecting", + "links": { + "0": "../useful_linux_commands" + }, + "parent_title": "", + "previous_title": "connecting_paragraph_7", + "next_title": "connecting_paragraph_9", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/connecting/#first-time-connection-to-the-hpc-infrastructure" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9.txt new file mode 100644 index 000000000000..5a634e6bddc6 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9.txt @@ -0,0 +1,27 @@ +$ cp -r /apps/gent/tutorials/Intro-HPC/examples ~/ + +You can exit the connection at anytime by entering: +$ exit +logout +Connection to login.hpc.ugent.be closed. + + tip "tip: Setting your Language right" + You may encounter a warning message similar to the following one during connecting: + perl: warning: Setting locale failed. + perl: warning: Please check that your locale settings: + LANGUAGE = (unset), + LC_ALL = (unset), + LC_CTYPE = "UTF-8", + LANG = (unset) + are supported and installed on your system. + perl: warning: Falling back to the standard locale ("C"). + or any other error message complaining about the locale. + This means that the correct "locale" has not yet been properly specified on your local machine. Try: + LANG= + LC_COLLATE="C" + LC_CTYPE="UTF-8" + LC_MESSAGES="C" + LC_MONETARY="C" + LC_NUMERIC="C" + LC_TIME="C" + LC_ALL= diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9_metadata.json new file mode 100644 index 000000000000..40b04f24e9f1 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "First-Time-connection-to-the-HPC-infrastructure", + "title_depth": 2, + "directory": "connecting", + "parent_title": "", + "previous_title": "connecting_paragraph_8", + "next_title": "connecting_paragraph_10", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/connecting/#first-time-connection-to-the-hpc-infrastructure" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1.txt new file mode 100644 index 000000000000..3a46897bdee9 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1.txt @@ -0,0 +1,11 @@ +Adding multiple SSH public keys (optional) +1. Create a new public/private SSH key pair from the new computer. + Repeat the process described in + section Generate a public/private key pair with OpenSSH. +2. Go to +3. Upload the new SSH public key using the Add public key section. Make sure that your + public key is actually saved, because a public key will be refused + if it is too short, wrong type, or in a wrong format. +4. (optional) If you lost your key, you can delete the old key on the + same page. You should keep at least one valid public SSH key in your + account. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1_metadata.json new file mode 100644 index 000000000000..72b9f92061c2 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "account", + "subtitle": "Adding-multiple-SSH-public-keys-(optional)", + "title_depth": 3, + "directory": "account", + "parent_title": "Applying-for-the-account", + "links": { + "0": "https://docs.hpc.ugent.be/account/#generate-a-publicprivate-key-pair-with-openssh" + }, + "previous_title": "account_paragraph_10", + "next_title": "account_paragraph_12", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/account/#adding-multiple-ssh-public-keys-optional" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1.txt new file mode 100644 index 000000000000..1395e2ee7bd5 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1.txt @@ -0,0 +1,10 @@ +How do SSH keys work +- anyone who has the key (and the optional password) can unlock the + door and log in to the account. +- the door to your VSC account is special: it can have multiple + locks (SSH public keys) attached to it, and you only need to open + one lock with the corresponding key (SSH private key) to open + the door (log in to the account). +Since all VSC clusters use Linux as their main operating system, you +will need to get acquainted with using the command-line interface and +using the terminal (see tutorial). \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1_metadata.json new file mode 100644 index 000000000000..52e1569a8a7a --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "account", + "subtitle": "How-do-SSH-keys-work", + "title_depth": 3, + "directory": "account", + "parent_title": "Getting-ready-to-request-an-account", + "links": { + "0": "../../linux-tutorial" + }, + "previous_title": "account_paragraph_3", + "next_title": "account_paragraph_5", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/account/#how-do-ssh-keys-work" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1.txt new file mode 100644 index 000000000000..caaaea5ee919 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1.txt @@ -0,0 +1,14 @@ +How do SSH keys work +Launch a terminal from your desktop's application menu and you will see +the bash shell. There are other shells, but most Linux distributions use +bash by default. +Test OpenSSH +Secure Shell (ssh) is a cryptographic network protocol for secure data +communication, remote command-line login, remote command execution, and +other secure network services between two networked computers. In short, +ssh provides a secure connection between 2 computers via insecure +channels (Network, Internet, telephone lines, ...). +"Secure" means that: +1. the User is authenticated to the System; and +2. the System is authenticated to the User; and +3. all data is encrypted during transfer. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1_metadata.json new file mode 100644 index 000000000000..4636f13a4b42 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "account", + "subtitle": "Test-OpenSSH", + "title_depth": 3, + "directory": "account", + "parent_title": "Getting-ready-to-request-an-account", + "previous_title": "account_paragraph_4", + "next_title": "account_linux_paragraph_5.2", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/account/#test-openssh" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2.txt new file mode 100644 index 000000000000..a166dd145039 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2.txt @@ -0,0 +1,13 @@ +OpenSSH is a FREE implementation of the SSH connectivity protocol. comes +with its own implementation of OpenSSH, so you don't need to install any +third-party software to use it. Just open a terminal window and jump in! +On all popular Linux distributions, the OpenSSH software is readily +available, and most often installed by default. You can check whether +the OpenSSH software is installed by opening a terminal and typing: +$ ssh -V +OpenSSH_7.4p1, OpenSSL 1.0.2k-fips 26 Jan 2017 +To access the clusters and transfer your files, you will use the +following commands: +1. ssh-keygen: to generate the SSH key pair (public + private key); +2. ssh: to open a shell on a remote machine; +3. sftp: a secure equivalent of ftp; diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2_metadata.json new file mode 100644 index 000000000000..ca9c4c7dc1da --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "account", + "subtitle": "Test-OpenSSH", + "title_depth": 3, + "directory": "account", + "parent_title": "Getting-ready-to-request-an-account", + "previous_title": "account_linux_paragraph_5.1", + "next_title": "account_linux_paragraph_5.3", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/account/#test-openssh" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3.txt new file mode 100644 index 000000000000..2e8fe9e3a247 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3.txt @@ -0,0 +1,17 @@ +4. scp: a secure equivalent of the remote copy command rcp. +Generate a public/private key pair with OpenSSH +A key pair might already be present in the default location inside your +home directory. Therefore, we first check if a key is available with the +"list short" ("ls") command: +$ ls ~/.ssh +If a key-pair is already available, you would normally get: +authorized_keys id_rsa id_rsa.pub known_hosts +Otherwise, the command will show: +ls: .ssh: No such file or directory +You can recognise a public/private key pair when a pair of files has the +same name except for the extension ".pub" added to one of them. In this +particular case, the private key is "id_rsa" and public key is +"id_rsa.pub". You may have multiple keys (not necessarily in the +directory "~/.ssh") if you or your operating system requires this. Be +aware that your existing key pair might be too short, or not the right +type. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3_metadata.json new file mode 100644 index 000000000000..d902f6a0838e --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "account", + "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH", + "title_depth": 3, + "directory": "account", + "parent_title": "Getting-ready-to-request-an-account", + "previous_title": "account_linux_paragraph_5.2", + "next_title": "account_linux_paragraph_5.4", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/account/#generate-a-publicprivate-key-pair-with-openssh" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4.txt new file mode 100644 index 000000000000..3cde4395d81f --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4.txt @@ -0,0 +1,18 @@ +You will need to generate a new key pair, when: +1. you don't have a key pair yet +2. you forgot the passphrase protecting your private key +3. your private key was compromised +4. your key pair is too short or not the right type +For extra security, the private key itself can be encrypted using a +"passphrase", to prevent anyone from using your private key even when +they manage to copy it. You have to "unlock" the private key by typing +the passphrase. Be sure to never give away your private key, it is +private and should stay private. You should not even copy it to one of +your other machines, instead, you should create a new public/private key +pair for each machine. +$ ssh-keygen -t rsa -b 4096 +Generating public/private rsa key pair. Enter file in which to save the +key (/home/user/.ssh/id_rsa): Enter passphrase (empty for no +passphrase): Enter same passphrase again: Your identification has been +saved in /home/user/.ssh/id_rsa. Your public key has been saved in +/home/user/.ssh/id_rsa.pub. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4_metadata.json new file mode 100644 index 000000000000..1edae26d97b2 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "account", + "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH", + "title_depth": 3, + "directory": "account", + "parent_title": "Getting-ready-to-request-an-account", + "previous_title": "account_linux_paragraph_5.3", + "next_title": "account_linux_paragraph_5.5", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/account/#generate-a-publicprivate-key-pair-with-openssh" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5.txt new file mode 100644 index 000000000000..78c142e82e00 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5.txt @@ -0,0 +1,6 @@ +This will ask you for a file name to store the private and public key, +and a passphrase to protect your private key. It needs to be emphasised +that you really should choose the passphrase wisely! The system will ask +you for it every time you want to use the private key that is every time +you want to access the cluster or transfer your files. +Without your key pair, you won't be able to apply for a personal VSC account. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5_metadata.json new file mode 100644 index 000000000000..29affc0335eb --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "account", + "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH", + "title_depth": 3, + "directory": "account", + "parent_title": "Getting-ready-to-request-an-account", + "previous_title": "account_linux_paragraph_5.4", + "next_title": "account_paragraph_6", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/account/#generate-a-publicprivate-key-pair-with-openssh" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1.txt new file mode 100644 index 000000000000..c3b395b52962 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1.txt @@ -0,0 +1 @@ +Using an SSH agent (optional) \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1_metadata.json new file mode 100644 index 000000000000..acf12bc0a7d0 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "account", + "subtitle": "Using-an-SSH-agent-(optional", + "title_depth": 3, + "directory": "account", + "parent_title": "Getting-ready-to-request-an-account", + "previous_title": "account_paragraph_5", + "next_title": "account_paragraph_7", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/account/#using-an-ssh-agent-optional" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1.txt new file mode 100644 index 000000000000..e3ef2176f09e --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1.txt @@ -0,0 +1,14 @@ +Using an SSH agent (optional) +Most recent Unix derivatives include by default an SSH agent ("gnome-keyring-daemon" in most cases) +to keep and manage the user SSH keys. If you use one of these derivatives you must include the new keys into +the SSH manager keyring to be able to connect to the HPC cluster. If +not, SSH client will display an error message (see Connecting) similar to this: +Agent admitted failure to sign using the key. +Permission denied (publickey,gssapi-keyex,gssapi-with-mic). +This could be fixed using the ssh-add command. You can include the new +private keys' identities in your keyring with: +$ ssh-add + tip + Without extra options ssh-add adds any key located at $HOME/.ssh + directory, but you can specify the private key location path as + argument, as example: ssh-add /path/to/my/id_rsa. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1_metadata.json new file mode 100644 index 000000000000..b6b1e052345e --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "account", + "subtitle": "Using-an-SSH-agent-(optional)", + "title_depth": 3, + "directory": "account", + "parent_title": "Getting-ready-to-request-an-account", + "links": { + "0": "../connecting" + }, + "previous_title": "account_paragraph_6", + "next_title": "account_linux_paragraph_7.2", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/account/#using-an-ssh-agent-optional" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2.txt new file mode 100644 index 000000000000..93019fa1a6aa --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2.txt @@ -0,0 +1,8 @@ +Check that your key is available from the keyring with: +$ ssh-add -l +After these changes the key agent will keep your SSH key to connect to +the clusters as usual. + tip + You should execute ssh-add command again if you generate a new SSH + key. +Visit for more information. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2_metadata.json new file mode 100644 index 000000000000..35466be5b567 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "account", + "subtitle": "Using-an-SSH-agent-(optional)", + "title_depth": 3, + "directory": "account", + "parent_title": "Getting-ready-to-request-an-account", + "previous_title": "account_linux_paragraph_7.1", + "next_title": "account_paragraph_8", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/account/#using-an-ssh-agent-optional" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1.txt new file mode 100644 index 000000000000..a9059b224bf2 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1.txt @@ -0,0 +1,6 @@ +Applying for the account +This file has been stored in the directory "~/.ssh/". +After you have uploaded your public key you will receive an e-mail with +a link to confirm your e-mail address. After confirming your e-mail +address the VSC staff will review and if applicable approve your +account. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1_metadata.json new file mode 100644 index 000000000000..219883887235 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "account", + "subtitle": "Applying-for-the-account", + "title_depth": 2, + "directory": "account", + "parent_title": "account", + "previous_title": "account_paragraph_8", + "next_title": "account_paragraph_10", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/account/#applying-for-the-account" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1.txt new file mode 100644 index 000000000000..3e588c709d44 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1.txt @@ -0,0 +1,35 @@ +First Time connection to the HPC infrastructure + A locale is a set of parameters that defines the user's language, country and + any special variant preferences that the user wants to see in their user + interface. Usually a locale identifier consists of at least a language + identifier and a region identifier. + Open the .bashrc on your local machine with your favourite editor and + add the following lines: + + $ nano ~/.bashrc + ... + export LANGUAGE="en_US.UTF-8" + export LC_ALL="en_US.UTF-8" + export LC_CTYPE="en_US.UTF-8" + export LANG="en_US.UTF-8" + ... + + + tip "tip: vi" + To start entering text in vi: move to the place you want to start + entering text with the arrow keys and type "i" to switch to insert mode. You can easily exit vi by entering: ""ESC" :wq" + To exit vi without saving your changes, enter ""ESC":q!" + + + or alternatively (if you are not comfortable with the Linux editors), + again on your local machine: + + $ echo "export LANGUAGE=\"en_US.UTF-8\"" >> ~/.profile + $ echo "export LC_ALL=\"en_US.UTF-8\"" >> ~/.profile + $ echo "export LC_CTYPE=\"en_US.UTF-8\"" >> ~/.profile + $ echo "export LANG=\"en_US.UTF-8\"" >> ~/.profile + + + You can now log out, open a new terminal/shell on your local machine and + reconnect to the login node, and you should not get these warnings anymore. + \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1_metadata.json new file mode 100644 index 000000000000..364c81834cf8 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "First-Time-connection-to-the-HPC-infrastructure", + "title_depth": 2, + "directory": "connecting", + "parent_title": "Connecting-to-the-HPC-infrastructure", + "previous_title": "connecting_paragraph_9", + "next_title": "connecting_paragraph_11", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#first-time-connection-to-the-hpc-infrastructure" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1.txt new file mode 100644 index 000000000000..d872c89a0f83 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1.txt @@ -0,0 +1,6 @@ +Transfer Files to/from the HPC +Before you can do some work, you'll have to transfer the files you need from your desktop or department to the cluster. At the end of a job, you might want to transfer some files back. +The preferred way to transfer files is by using an scp or sftp via the +secure OpenSSH protocol. ships with an implementation of OpenSSH, so you +don't need to install any third-party software to use it. Just open a +terminal window and jump in! diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1_metadata.json new file mode 100644 index 000000000000..420f73742f5c --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "Transfer-Files-tofrom-the-HPC", + "title_depth": 2, + "directory": "connecting", + "parent_title": "Connecting-to-the-HPC-infrastructure", + "previous_title": "connecting_paragraph_10", + "next_title": "connecting_paragraph_12", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#transfer-files-tofrom-the-hpc" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1.txt new file mode 100644 index 000000000000..8d0031fcca9f --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1.txt @@ -0,0 +1,12 @@ +Transfer Files tofrom the HPC +Using scp +Secure copy or SCP is a tool (command) for securely transferring files between a local +host (= your computer) and a remote host (the HPC). It is based on the +Secure Shell (SSH) protocol. The scp command is the equivalent of the cp (i.e., +copy) command, but can copy files to or from remote machines. +It's easier to copy files directly to $VSC_DATA and $VSC_SCRATCH if +you have symlinks to them in your home directory. See +the chapter titled "Uploading/downloading/editing files", section "Symlinks for data/scratch" in the intro to Linux + for how to do this. +Open an additional terminal window and check that you're working on your +local machine. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1_metadata.json new file mode 100644 index 000000000000..19eba778d90c --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "connecting", + "subtitle": "Using-scp", + "title_depth": 3, + "directory": "connecting", + "parent_title": "Transfer-Files-tofrom-the-HPC", + "links": { + "0": "https://docs.hpc.ugent.be/connecting/localhost:8000/Gent//intro-Linux/uploading_files/#symlinks-for-datascratch" + }, + "previous_title": "connecting_paragraph_11", + "next_title": "connecting_linux_paragraph_12.2", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-scp" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.2.txt new file mode 100644 index 000000000000..f1da0677a677 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.2.txt @@ -0,0 +1,17 @@ +$ hostname + +If you're still using the terminal that is connected to the HPC, close the +connection by typing "exit" in the terminal window. +For example, we will copy the (local) file "localfile.txt" to your +home directory on the HPC cluster. We first generate a small dummy +"localfile.txt", which contains the word "Hello". Use your own VSC +account, which is something like "vsc40000". Don't forget the colon (:) at the +end: if you forget it, it will just create a file named vsc40000@login.hpc.ugent.be on your +local filesystem. You can even specify where to save the file on the +remote filesystem by putting a path after the colon. +$ echo "Hello" > localfile.txt +$ ls -l +... +-rw-r--r-- 1 user staff 6 Sep 18 09:37 localfile.txt +$ scp localfile.txt vsc40000@login.hpc.ugent.be: +localfile.txt 100% 6 0.0KB/s 00:00 diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.2_metadata.json new file mode 100644 index 000000000000..0b3a3418c55d --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.2_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "Using-scp", + "title_depth": 3, + "directory": "connecting", + "parent_title": "Transfer-Files-tofrom-the-HPC", + "previous_title": "connecting_linux_paragraph_12.1", + "next_title": "connecting_linux_paragraph_12.3", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-scp" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.3.txt new file mode 100644 index 000000000000..9585900e3564 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.3.txt @@ -0,0 +1,22 @@ +Connect to the HPC via another terminal, print the working directory (to +make sure you're in the home directory) and check whether the file has +arrived: +$ pwd +/user/home/gent/vsc400/vsc40000 +$ ls -l +total 1536 +drwxrwxr-x 2 +drwxrwxr-x 2 +drwxrwxr-x 10 +-rw-r--r-- 1 +$ cat localfile.txt +Hello +The scp command can also be used to copy files from the cluster to your +local machine. Let us copy the remote file "intro-HPC--Gent.pdf" from your "docs" +subdirectory on the cluster to your local computer. +First, we will confirm that the file is indeed in the "docs" +subdirectory. In the terminal on the login node, enter: +$ cd ~/docs +$ ls -l +total 1536 +-rw-r--r-- 1 vsc40000 Sep 11 09:53 intro-HPC--Gent.pdf diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.3_metadata.json new file mode 100644 index 000000000000..5624749ede84 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.3_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "Using-scp", + "title_depth": 3, + "directory": "connecting", + "parent_title": "Transfer-Files-tofrom-the-HPC", + "previous_title": "connecting_linux_paragraph_12.2", + "next_title": "connecting_linux_paragraph_12.4", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-scp" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.4.txt new file mode 100644 index 000000000000..2664953ed0ce --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.4.txt @@ -0,0 +1,14 @@ +Now we will copy the file to the local machine. On the terminal on your +own local computer, enter: +$ scp vsc40000@login.hpc.ugent.be:./docs/intro-HPC--Gent.pdf . +intro-HPC--Gent.pdf 100% 725KB 724.6KB/s 00:01 +$ ls -l +total 899 +-rw-r--r-- 1 user staff 741995 Sep 18 09:53 +-rw-r--r-- 1 user staff 6 Sep 18 09:37 localfile.txt +The file has been copied from the HPC to your local computer. +It's also possible to copy entire directories (and their contents) with +the -r flag. For example, if we want to copy the local directory +dataset to $VSC_SCRATCH, we can use the following command (assuming +you've created the scratch symlink): +$ scp -r dataset vsc40000@login.hpc.ugent.be:scratch diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.4_metadata.json new file mode 100644 index 000000000000..5a401911cab7 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.4_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "Using-scp", + "title_depth": 3, + "directory": "connecting", + "parent_title": "Transfer-Files-tofrom-the-HPC", + "previous_title": "connecting_linux_paragraph_12.3", + "next_title": "connecting_linux_paragraph_12.5", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-scp" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.5.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.5.txt new file mode 100644 index 000000000000..51d39b548c3a --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.5.txt @@ -0,0 +1,14 @@ +If you don't use the -r option to copy a directory, you will run into +the following error: +$ scp -r dataset vsc40000@login.hpc.ugent.be:scratch +dataset: not a regular file +Using sftp +The SSH File Transfer Protocol (also Secure File Transfer Protocol, or SFTP) is a network protocol that provides file access, file +transfer and file management functionalities over any reliable data +stream. It was designed as an extension of the Secure Shell protocol +(SSH) version 2.0. This protocol assumes that it is run over a secure +channel, such as SSH, that the server has already authenticated the +client, and that the identity of the client user is available to the +protocol. +The sftp is an equivalent of the ftp command, with the difference that +it uses the secure ssh protocol to connect to the clusters. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.5_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.5_metadata.json new file mode 100644 index 000000000000..a479f66e7e04 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.5_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "Using-sftp", + "title_depth": 3, + "directory": "connecting", + "parent_title": "Transfer-Files-tofrom-the-HPC", + "previous_title": "connecting_linux_paragraph_12.4", + "next_title": "connecting_linux_paragraph_12.6", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-sftp" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.6.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.6.txt new file mode 100644 index 000000000000..4ae257101f16 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.6.txt @@ -0,0 +1,18 @@ +One easy way of starting a sftp session is +$ sftp vsc40000@login.hpc.ugent.be +Typical and popular commands inside an sftp session are: +| | | +|:--------------------------|:-------------------------------------------------------------------------------------| +| cd ~/exmples/fibo | Move to the examples/fibo subdirectory on the (i.e., the HPC remote machine) | +| ls | Get a list of the files in the current directory on the HPC. | +| get fibo.py | Copy the file "fibo.py" from the HPC | +| get tutorial/HPC.pdf | Copy the file "HPC.pdf" from the HPC, which is in the "tutorial" subdirectory. | +| lcd test | Move to the "test" subdirectory on your local machine. | +| lcd .. | Move up one level in the local directory. | +| lls | Get local directory listing. | +| put test.py | Copy the local file test.py to the HPC. | +| put test1.py test2.py | Copy the local file test1.py to the and rename it to test2.py. | +| bye | Quit the sftp session | +| **mget *.cc** | Copy all the remote files with extension ".cc" to the local directory. | +| **mput *.h** | Copy all the local files with extension ".h" to the HPC. | +| | | diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.6_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.6_metadata.json new file mode 100644 index 000000000000..9c744fd5133a --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.6_metadata.json @@ -0,0 +1,15 @@ +{ + "main_title": "connecting", + "subtitle": "Using-sftp", + "title_depth": 3, + "directory": "connecting", + "parent_title": "Transfer-Files-tofrom-the-HPC", + "links": { + "0": "", + "1": "" + }, + "previous_title": "connecting_linux_paragraph_12.5", + "next_title": "connecting_linux_paragraph_12.7", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-sftp" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1.txt new file mode 100644 index 000000000000..a0496edfb14b --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1.txt @@ -0,0 +1,10 @@ +Transfer Files tofrom the HPC +Using a GUI +If you prefer a GUI to transfer files back and forth to the HPC, you can +use your file browser. Open your file browser and press +++"Ctrl"+"l"++ +This should open up a address bar where you can enter a URL. +Alternatively, look for the "connect to server" option in your file +browsers menu. +Enter: sftp://vsc40000@login.hpc.ugent.be/ and press enter. +You should now be able to browse files on the HPC in your file browser. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1_metadata.json new file mode 100644 index 000000000000..d634a356654e --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "Using-a-GUI", + "title_depth": 3, + "directory": "connecting", + "parent_title": "Transfer-Files-tofrom-the-HPC", + "previous_title": "connecting_paragraph_12", + "next_title": "connecting_paragraph_14", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-a-gui" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_4.1.txt new file mode 100644 index 000000000000..773d03f06893 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_4.1.txt @@ -0,0 +1,7 @@ +First Time connection to the HPC infrastructure +ssh_exchange_identification: read: Connection reset by peer +First Time connection to the HPC infrastructure +The remaining content in this chapter is primarily focused for people utilizing a terminal with SSH. +If you are instead using the web portal, the corresponding chapter might be more helpful: Using the HPC-UGent web portal. +If you have any issues connecting to the HPC after you've followed these +steps, see Issues connecting to login node to troubleshoot. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_4.1_metadata.json new file mode 100644 index 000000000000..f6745fc31dc5 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_4.1_metadata.json @@ -0,0 +1,15 @@ +{ + "main_title": "connecting", + "subtitle": "First-Time-connection-to-the-HPC-infrastructure", + "title_depth": 2, + "directory": "connecting", + "parent_title": "Connecting-to-the-HPC-infrastructure", + "links": { + "0": "https://docs.hpc.ugent.be/web_portal", + "1": "https://docs.hpc.ugent.be/connecting/../troubleshooting/#issues-connecting-to-login-node" + }, + "previous_title": "connecting_paragraph_3", + "next_title": "connecting_paragraph_5", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#first-time-connection-to-the-hpc-infrastructure" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1.txt new file mode 100644 index 000000000000..94d5d9500a3c --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1.txt @@ -0,0 +1,12 @@ +First Time connection to the HPC infrastructure +Connect +Open up a terminal and enter the following command to connect to the HPC. +$ ssh vsc40000@login.hpc.ugent.be +Here, user vsc40000 wants to make a connection to the "hpcugent" cluster at UGent via the login +node "login.hpc.ugent.be", so replace vsc40000 with your own VSC id in the above command. +The first time you make a connection to the login node, you will be +asked to verify the authenticity of the login node. Please check +Warning message when first connecting to new host on how to do this. +A possible error message you can get if you previously saved your +private key somewhere else than the default location +($HOME/.ssh/id_rsa): diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1_metadata.json new file mode 100644 index 000000000000..05996eb5df2c --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "connecting", + "subtitle": "Connect", + "title_depth": 3, + "directory": "connecting", + "parent_title": "First-Time-connection-to-the-HPC-infrastructure", + "links": { + "0": "https://docs.hpc.ugent.be/connecting/../troubleshooting/#warning-message-when-first-connecting-to-new-host" + }, + "previous_title": "connecting_paragraph_4", + "next_title": "connecting_linux_paragraph_5.2", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#connect" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2.txt new file mode 100644 index 000000000000..312fe885cb0c --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2.txt @@ -0,0 +1,4 @@ +Permission denied (publickey,gssapi-keyex,gssapi-with-mic). +In this case, use the -i option for the ssh command to specify the +location of your private key. For example: +$ ssh -i /home/example/my_keys diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2_metadata.json new file mode 100644 index 000000000000..85a826e41a3e --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "Connect", + "title_depth": 3, + "directory": "connecting", + "parent_title": "First-Time-connection-to-the-HPC-infrastructure", + "previous_title": "connecting_linux_paragraph_5.1", + "next_title": "connecting_paragraph_6", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#connect" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1.txt new file mode 100644 index 000000000000..3a46897bdee9 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1.txt @@ -0,0 +1,11 @@ +Adding multiple SSH public keys (optional) +1. Create a new public/private SSH key pair from the new computer. + Repeat the process described in + section Generate a public/private key pair with OpenSSH. +2. Go to +3. Upload the new SSH public key using the Add public key section. Make sure that your + public key is actually saved, because a public key will be refused + if it is too short, wrong type, or in a wrong format. +4. (optional) If you lost your key, you can delete the old key on the + same page. You should keep at least one valid public SSH key in your + account. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1_metadata.json new file mode 100644 index 000000000000..dd8b3400419f --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "account", + "subtitle": "Adding-multiple-SSH-public-keys-(optional)", + "title_depth": 3, + "directory": "account", + "parent_title": "Applying-for-the-account", + "links": { + "0": "https://docs.hpc.ugent.be/account/#generate-a-publicprivate-key-pair-with-openssh" + }, + "previous_title": "account_paragraph_10", + "next_title": "account_paragraph_12", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/account/#adding-multiple-ssh-public-keys-optional" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1.txt new file mode 100644 index 000000000000..1395e2ee7bd5 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1.txt @@ -0,0 +1,10 @@ +How do SSH keys work +- anyone who has the key (and the optional password) can unlock the + door and log in to the account. +- the door to your VSC account is special: it can have multiple + locks (SSH public keys) attached to it, and you only need to open + one lock with the corresponding key (SSH private key) to open + the door (log in to the account). +Since all VSC clusters use Linux as their main operating system, you +will need to get acquainted with using the command-line interface and +using the terminal (see tutorial). \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1_metadata.json new file mode 100644 index 000000000000..33d083958b99 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "account", + "subtitle": "How-do-SSH-keys-work", + "title_depth": 3, + "directory": "account", + "parent_title": "Getting-ready-to-request-an-account", + "links": { + "0": "../../linux-tutorial" + }, + "previous_title": "account_paragraph_3", + "next_title": "account_paragraph_5", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/account/#how-do-ssh-keys-work" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1.txt new file mode 100644 index 000000000000..f3483fcaef16 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1.txt @@ -0,0 +1,12 @@ +How do SSH keys work +To open a Terminal window in macOS, open the Finder and choose +*\>\> Applications \> Utilities \> Terminal* +Before requesting an account, you need to generate a pair of ssh keys. +One popular way to do this on is using the OpenSSH client included with , which you can then also use to log on to the clusters. +Test OpenSSH +Secure Shell (ssh) is a cryptographic network protocol for secure data +communication, remote command-line login, remote command execution, and +other secure network services between two networked computers. In short, +ssh provides a secure connection between 2 computers via insecure +channels (Network, Internet, telephone lines, ...). +"Secure" means that: diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1_metadata.json new file mode 100644 index 000000000000..c75d6aede582 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "account", + "subtitle": "Test-OpenSSH", + "title_depth": 3, + "directory": "account", + "parent_title": "Getting-ready-to-request-an-account", + "previous_title": "account_paragraph_4", + "next_title": "account_macos_paragraph_5.2", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/account/#test-openssh" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2.txt new file mode 100644 index 000000000000..5189a9530026 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2.txt @@ -0,0 +1,13 @@ +1. the User is authenticated to the System; and +2. the System is authenticated to the User; and +3. all data is encrypted during transfer. +OpenSSH is a FREE implementation of the SSH connectivity protocol. comes +with its own implementation of OpenSSH, so you don't need to install any +third-party software to use it. Just open a terminal window and jump in! +On all popular Linux distributions, the OpenSSH software is readily +available, and most often installed by default. You can check whether +the OpenSSH software is installed by opening a terminal and typing: +$ ssh -V +OpenSSH_7.4p1, OpenSSL 1.0.2k-fips 26 Jan 2017 +To access the clusters and transfer your files, you will use the +following commands: diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2_metadata.json new file mode 100644 index 000000000000..7f6c80a32f64 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "account", + "subtitle": "Test-OpenSSH", + "title_depth": 3, + "directory": "account", + "parent_title": "Getting-ready-to-request-an-account", + "previous_title": "account_macos_paragraph_5.1", + "next_title": "account_macos_paragraph_5.3", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/account/#test-openssh" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3.txt new file mode 100644 index 000000000000..2c97d5974257 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3.txt @@ -0,0 +1,20 @@ +1. ssh-keygen: to generate the SSH key pair (public + private key); +2. ssh: to open a shell on a remote machine; +3. sftp: a secure equivalent of ftp; +4. scp: a secure equivalent of the remote copy command rcp. +Generate a public/private key pair with OpenSSH +A key pair might already be present in the default location inside your +home directory. Therefore, we first check if a key is available with the +"list short" ("ls") command: +$ ls ~/.ssh +If a key-pair is already available, you would normally get: +authorized_keys id_rsa id_rsa.pub known_hosts +Otherwise, the command will show: +ls: .ssh: No such file or directory +You can recognise a public/private key pair when a pair of files has the +same name except for the extension ".pub" added to one of them. In this +particular case, the private key is "id_rsa" and public key is +"id_rsa.pub". You may have multiple keys (not necessarily in the +directory "~/.ssh") if you or your operating system requires this. Be +aware that your existing key pair might be too short, or not the right +type. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3_metadata.json new file mode 100644 index 000000000000..7c0f0d2a04d5 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "account", + "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH", + "title_depth": 3, + "directory": "account", + "parent_title": "Getting-ready-to-request-an-account", + "previous_title": "account_macos_paragraph_5.2", + "next_title": "account_macos_paragraph_5.4", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/account/#generate-a-publicprivate-key-pair-with-openssh" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4.txt new file mode 100644 index 000000000000..3cde4395d81f --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4.txt @@ -0,0 +1,18 @@ +You will need to generate a new key pair, when: +1. you don't have a key pair yet +2. you forgot the passphrase protecting your private key +3. your private key was compromised +4. your key pair is too short or not the right type +For extra security, the private key itself can be encrypted using a +"passphrase", to prevent anyone from using your private key even when +they manage to copy it. You have to "unlock" the private key by typing +the passphrase. Be sure to never give away your private key, it is +private and should stay private. You should not even copy it to one of +your other machines, instead, you should create a new public/private key +pair for each machine. +$ ssh-keygen -t rsa -b 4096 +Generating public/private rsa key pair. Enter file in which to save the +key (/home/user/.ssh/id_rsa): Enter passphrase (empty for no +passphrase): Enter same passphrase again: Your identification has been +saved in /home/user/.ssh/id_rsa. Your public key has been saved in +/home/user/.ssh/id_rsa.pub. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4_metadata.json new file mode 100644 index 000000000000..346108200ac7 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "account", + "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH", + "title_depth": 3, + "directory": "account", + "parent_title": "Getting-ready-to-request-an-account", + "previous_title": "account_macos_paragraph_5.3", + "next_title": "account_macos_paragraph_5.5", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/account/#generate-a-publicprivate-key-pair-with-openssh" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5.txt new file mode 100644 index 000000000000..78c142e82e00 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5.txt @@ -0,0 +1,6 @@ +This will ask you for a file name to store the private and public key, +and a passphrase to protect your private key. It needs to be emphasised +that you really should choose the passphrase wisely! The system will ask +you for it every time you want to use the private key that is every time +you want to access the cluster or transfer your files. +Without your key pair, you won't be able to apply for a personal VSC account. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5_metadata.json new file mode 100644 index 000000000000..25baa1e073f3 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "account", + "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH", + "title_depth": 3, + "directory": "account", + "parent_title": "Getting-ready-to-request-an-account", + "previous_title": "account_macos_paragraph_5.4", + "next_title": "account_paragraph_6", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/account/#generate-a-publicprivate-key-pair-with-openssh" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1.txt new file mode 100644 index 000000000000..c3b395b52962 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1.txt @@ -0,0 +1 @@ +Using an SSH agent (optional) \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1_metadata.json new file mode 100644 index 000000000000..b8931a423d3c --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "account", + "subtitle": "Using-an-SSH-agent-(optional", + "title_depth": 3, + "directory": "account", + "parent_title": "Getting-ready-to-request-an-account", + "previous_title": "account_paragraph_5", + "next_title": "account_paragraph_7", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/account/#using-an-ssh-agent-optional" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1.txt new file mode 100644 index 000000000000..d204f4e43920 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1.txt @@ -0,0 +1,14 @@ +Using an SSH agent (optional) +Most recent Unix derivatives include by default an SSH agent +to keep and manage the user SSH keys. If you use one of these derivatives you must include the new keys into +the SSH manager keyring to be able to connect to the HPC cluster. If +not, SSH client will display an error message (see Connecting) similar to this: +Agent admitted failure to sign using the key. +Permission denied (publickey,gssapi-keyex,gssapi-with-mic). +This could be fixed using the ssh-add command. You can include the new +private keys' identities in your keyring with: +$ ssh-add + tip + Without extra options ssh-add adds any key located at $HOME/.ssh + directory, but you can specify the private key location path as + argument, as example: ssh-add /path/to/my/id_rsa. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1_metadata.json new file mode 100644 index 000000000000..c43391b146ec --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "account", + "subtitle": "Using-an-SSH-agent-(optional)", + "title_depth": 3, + "directory": "account", + "parent_title": "Getting-ready-to-request-an-account", + "links": { + "0": "../connecting" + }, + "previous_title": "account_paragraph_6", + "next_title": "account_macos_paragraph_7.2", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/account/#using-an-ssh-agent-optional" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2.txt new file mode 100644 index 000000000000..8fd93f6b4f60 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2.txt @@ -0,0 +1,7 @@ +Check that your key is available from the keyring with: +$ ssh-add -l +After these changes the key agent will keep your SSH key to connect to +the clusters as usual. + tip + You should execute ssh-add command again if you generate a new SSH + key. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2_metadata.json new file mode 100644 index 000000000000..519b58bb1513 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "account", + "subtitle": "Using-an-SSH-agent-(optional)", + "title_depth": 3, + "directory": "account", + "parent_title": "Getting-ready-to-request-an-account", + "previous_title": "account_macos_paragraph_7.1", + "next_title": "account_paragraph_8", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/account/#using-an-ssh-agent-optional" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1.txt new file mode 100644 index 000000000000..d11380c25196 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1.txt @@ -0,0 +1,11 @@ +Applying for the account +This file has been stored in the directory "~/.ssh/". + tip + As ".ssh" is an invisible directory, the Finder will not show it by + default. The easiest way to access the folder, is by pressing ++cmd+shift+g++ (or ++cmd+shift+"."++), + which will allow you to enter the name of a directory, which you would + like to open in Finder. Here, type "~/.ssh" and press enter. +After you have uploaded your public key you will receive an e-mail with +a link to confirm your e-mail address. After confirming your e-mail +address the VSC staff will review and if applicable approve your +account. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1_metadata.json new file mode 100644 index 000000000000..6b6e8c727031 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "account", + "subtitle": "Applying-for-the-account", + "title_depth": 2, + "directory": "account", + "parent_title": "account", + "previous_title": "account_paragraph_8", + "next_title": "account_paragraph_10", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/account/#applying-for-the-account" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1.txt new file mode 100644 index 000000000000..3e588c709d44 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1.txt @@ -0,0 +1,35 @@ +First Time connection to the HPC infrastructure + A locale is a set of parameters that defines the user's language, country and + any special variant preferences that the user wants to see in their user + interface. Usually a locale identifier consists of at least a language + identifier and a region identifier. + Open the .bashrc on your local machine with your favourite editor and + add the following lines: + + $ nano ~/.bashrc + ... + export LANGUAGE="en_US.UTF-8" + export LC_ALL="en_US.UTF-8" + export LC_CTYPE="en_US.UTF-8" + export LANG="en_US.UTF-8" + ... + + + tip "tip: vi" + To start entering text in vi: move to the place you want to start + entering text with the arrow keys and type "i" to switch to insert mode. You can easily exit vi by entering: ""ESC" :wq" + To exit vi without saving your changes, enter ""ESC":q!" + + + or alternatively (if you are not comfortable with the Linux editors), + again on your local machine: + + $ echo "export LANGUAGE=\"en_US.UTF-8\"" >> ~/.profile + $ echo "export LC_ALL=\"en_US.UTF-8\"" >> ~/.profile + $ echo "export LC_CTYPE=\"en_US.UTF-8\"" >> ~/.profile + $ echo "export LANG=\"en_US.UTF-8\"" >> ~/.profile + + + You can now log out, open a new terminal/shell on your local machine and + reconnect to the login node, and you should not get these warnings anymore. + \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1_metadata.json new file mode 100644 index 000000000000..4c6e54771190 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "First-Time-connection-to-the-HPC-infrastructure", + "title_depth": 2, + "directory": "connecting", + "parent_title": "Connecting-to-the-HPC-infrastructure", + "previous_title": "connecting_paragraph_9", + "next_title": "connecting_paragraph_11", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#first-time-connection-to-the-hpc-infrastructure" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1.txt new file mode 100644 index 000000000000..d872c89a0f83 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1.txt @@ -0,0 +1,6 @@ +Transfer Files to/from the HPC +Before you can do some work, you'll have to transfer the files you need from your desktop or department to the cluster. At the end of a job, you might want to transfer some files back. +The preferred way to transfer files is by using an scp or sftp via the +secure OpenSSH protocol. ships with an implementation of OpenSSH, so you +don't need to install any third-party software to use it. Just open a +terminal window and jump in! diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1_metadata.json new file mode 100644 index 000000000000..1425455ade89 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "Transfer-Files-tofrom-the-HPC", + "title_depth": 2, + "directory": "connecting", + "parent_title": "Connecting-to-the-HPC-infrastructure", + "previous_title": "connecting_paragraph_10", + "next_title": "connecting_paragraph_12", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#transfer-files-tofrom-the-hpc" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1.txt new file mode 100644 index 000000000000..8d0031fcca9f --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1.txt @@ -0,0 +1,12 @@ +Transfer Files tofrom the HPC +Using scp +Secure copy or SCP is a tool (command) for securely transferring files between a local +host (= your computer) and a remote host (the HPC). It is based on the +Secure Shell (SSH) protocol. The scp command is the equivalent of the cp (i.e., +copy) command, but can copy files to or from remote machines. +It's easier to copy files directly to $VSC_DATA and $VSC_SCRATCH if +you have symlinks to them in your home directory. See +the chapter titled "Uploading/downloading/editing files", section "Symlinks for data/scratch" in the intro to Linux + for how to do this. +Open an additional terminal window and check that you're working on your +local machine. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1_metadata.json new file mode 100644 index 000000000000..332e6ed2996f --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "connecting", + "subtitle": "Using-scp", + "title_depth": 3, + "directory": "connecting", + "parent_title": "Transfer-Files-tofrom-the-HPC", + "links": { + "0": "https://docs.hpc.ugent.be/connecting/localhost:8000/Gent//intro-Linux/uploading_files/#symlinks-for-datascratch" + }, + "previous_title": "connecting_paragraph_11", + "next_title": "connecting_macos_paragraph_12.2", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-scp" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.2.txt new file mode 100644 index 000000000000..f1da0677a677 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.2.txt @@ -0,0 +1,17 @@ +$ hostname + +If you're still using the terminal that is connected to the HPC, close the +connection by typing "exit" in the terminal window. +For example, we will copy the (local) file "localfile.txt" to your +home directory on the HPC cluster. We first generate a small dummy +"localfile.txt", which contains the word "Hello". Use your own VSC +account, which is something like "vsc40000". Don't forget the colon (:) at the +end: if you forget it, it will just create a file named vsc40000@login.hpc.ugent.be on your +local filesystem. You can even specify where to save the file on the +remote filesystem by putting a path after the colon. +$ echo "Hello" > localfile.txt +$ ls -l +... +-rw-r--r-- 1 user staff 6 Sep 18 09:37 localfile.txt +$ scp localfile.txt vsc40000@login.hpc.ugent.be: +localfile.txt 100% 6 0.0KB/s 00:00 diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.2_metadata.json new file mode 100644 index 000000000000..d86cdd989ac6 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.2_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "Using-scp", + "title_depth": 3, + "directory": "connecting", + "parent_title": "Transfer-Files-tofrom-the-HPC", + "previous_title": "connecting_macos_paragraph_12.1", + "next_title": "connecting_macos_paragraph_12.3", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-scp" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.3.txt new file mode 100644 index 000000000000..9585900e3564 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.3.txt @@ -0,0 +1,22 @@ +Connect to the HPC via another terminal, print the working directory (to +make sure you're in the home directory) and check whether the file has +arrived: +$ pwd +/user/home/gent/vsc400/vsc40000 +$ ls -l +total 1536 +drwxrwxr-x 2 +drwxrwxr-x 2 +drwxrwxr-x 10 +-rw-r--r-- 1 +$ cat localfile.txt +Hello +The scp command can also be used to copy files from the cluster to your +local machine. Let us copy the remote file "intro-HPC--Gent.pdf" from your "docs" +subdirectory on the cluster to your local computer. +First, we will confirm that the file is indeed in the "docs" +subdirectory. In the terminal on the login node, enter: +$ cd ~/docs +$ ls -l +total 1536 +-rw-r--r-- 1 vsc40000 Sep 11 09:53 intro-HPC--Gent.pdf diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.3_metadata.json new file mode 100644 index 000000000000..4fcc42d23375 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.3_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "Using-scp", + "title_depth": 3, + "directory": "connecting", + "parent_title": "Transfer-Files-tofrom-the-HPC", + "previous_title": "connecting_macos_paragraph_12.2", + "next_title": "connecting_macos_paragraph_12.4", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-scp" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.4.txt new file mode 100644 index 000000000000..2664953ed0ce --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.4.txt @@ -0,0 +1,14 @@ +Now we will copy the file to the local machine. On the terminal on your +own local computer, enter: +$ scp vsc40000@login.hpc.ugent.be:./docs/intro-HPC--Gent.pdf . +intro-HPC--Gent.pdf 100% 725KB 724.6KB/s 00:01 +$ ls -l +total 899 +-rw-r--r-- 1 user staff 741995 Sep 18 09:53 +-rw-r--r-- 1 user staff 6 Sep 18 09:37 localfile.txt +The file has been copied from the HPC to your local computer. +It's also possible to copy entire directories (and their contents) with +the -r flag. For example, if we want to copy the local directory +dataset to $VSC_SCRATCH, we can use the following command (assuming +you've created the scratch symlink): +$ scp -r dataset vsc40000@login.hpc.ugent.be:scratch diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.4_metadata.json new file mode 100644 index 000000000000..757b533cf8df --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.4_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "Using-scp", + "title_depth": 3, + "directory": "connecting", + "parent_title": "Transfer-Files-tofrom-the-HPC", + "previous_title": "connecting_macos_paragraph_12.3", + "next_title": "connecting_macos_paragraph_12.5", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-scp" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.5.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.5.txt new file mode 100644 index 000000000000..51d39b548c3a --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.5.txt @@ -0,0 +1,14 @@ +If you don't use the -r option to copy a directory, you will run into +the following error: +$ scp -r dataset vsc40000@login.hpc.ugent.be:scratch +dataset: not a regular file +Using sftp +The SSH File Transfer Protocol (also Secure File Transfer Protocol, or SFTP) is a network protocol that provides file access, file +transfer and file management functionalities over any reliable data +stream. It was designed as an extension of the Secure Shell protocol +(SSH) version 2.0. This protocol assumes that it is run over a secure +channel, such as SSH, that the server has already authenticated the +client, and that the identity of the client user is available to the +protocol. +The sftp is an equivalent of the ftp command, with the difference that +it uses the secure ssh protocol to connect to the clusters. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.5_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.5_metadata.json new file mode 100644 index 000000000000..d18c7c7deb5c --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.5_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "Using-sftp", + "title_depth": 3, + "directory": "connecting", + "parent_title": "Transfer-Files-tofrom-the-HPC", + "previous_title": "connecting_macos_paragraph_12.4", + "next_title": "connecting_macos_paragraph_12.6", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-sftp" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.6.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.6.txt new file mode 100644 index 000000000000..4ae257101f16 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.6.txt @@ -0,0 +1,18 @@ +One easy way of starting a sftp session is +$ sftp vsc40000@login.hpc.ugent.be +Typical and popular commands inside an sftp session are: +| | | +|:--------------------------|:-------------------------------------------------------------------------------------| +| cd ~/exmples/fibo | Move to the examples/fibo subdirectory on the (i.e., the HPC remote machine) | +| ls | Get a list of the files in the current directory on the HPC. | +| get fibo.py | Copy the file "fibo.py" from the HPC | +| get tutorial/HPC.pdf | Copy the file "HPC.pdf" from the HPC, which is in the "tutorial" subdirectory. | +| lcd test | Move to the "test" subdirectory on your local machine. | +| lcd .. | Move up one level in the local directory. | +| lls | Get local directory listing. | +| put test.py | Copy the local file test.py to the HPC. | +| put test1.py test2.py | Copy the local file test1.py to the and rename it to test2.py. | +| bye | Quit the sftp session | +| **mget *.cc** | Copy all the remote files with extension ".cc" to the local directory. | +| **mput *.h** | Copy all the local files with extension ".h" to the HPC. | +| | | diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.6_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.6_metadata.json new file mode 100644 index 000000000000..a8a4f2a3bab0 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.6_metadata.json @@ -0,0 +1,15 @@ +{ + "main_title": "connecting", + "subtitle": "Using-sftp", + "title_depth": 3, + "directory": "connecting", + "parent_title": "Transfer-Files-tofrom-the-HPC", + "links": { + "0": "", + "1": "" + }, + "previous_title": "connecting_macos_paragraph_12.5", + "next_title": "connecting_macos_paragraph_12.7", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-sftp" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1.txt new file mode 100644 index 000000000000..c5ed84e9ea0b --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1.txt @@ -0,0 +1,12 @@ +Transfer Files tofrom the HPC +Using a GUI (Cyberduck) +Cyberduck is a graphical alternative to the scp command. It can be +installed from . +This is the one-time setup you will need to do before connecting: +1. After starting Cyberduck, the Bookmark tab will show up. To add a + new bookmark, click on the "+" sign on the bottom left of the + window. A new window will open. +2. In the drop-down menu on top, select "SFTP (SSH File Transfer Protocol)". +3. In the "Server" field, type in login.hpc.ugent.be. In the "Username" field, type in + your VSC account id (this looks like vsc40000). +4. Select the location of your SSH private key in the "SSH Private Key" field. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1_metadata.json new file mode 100644 index 000000000000..bd02ed8502fe --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "Using-a-GUI-(Cyberduck)", + "title_depth": 3, + "directory": "connecting", + "parent_title": "Transfer-Files-tofrom-the-HPC", + "previous_title": "connecting_paragraph_12", + "next_title": "connecting_macos_paragraph_13.2", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-a-gui-cyberduck" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2.txt new file mode 100644 index 000000000000..d48d0ce00a39 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2.txt @@ -0,0 +1,6 @@ +5. Finally, type in a name for the bookmark in the "Nickname" field and + close the window by pressing on the red circle in the top left + corner of the window. +To open the connection, click on the "Bookmarks" icon (which +resembles an open book) and double-click on the bookmark you just +created. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2_metadata.json new file mode 100644 index 000000000000..344ff690d546 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "Using-a-GUI-(Cyberduck)", + "title_depth": 3, + "directory": "connecting", + "parent_title": "Transfer-Files-tofrom-the-HPC", + "previous_title": "connecting_macos_paragraph_13.1", + "next_title": "connecting_paragraph_14", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-a-gui-cyberduck" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_4.1.txt new file mode 100644 index 000000000000..773d03f06893 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_4.1.txt @@ -0,0 +1,7 @@ +First Time connection to the HPC infrastructure +ssh_exchange_identification: read: Connection reset by peer +First Time connection to the HPC infrastructure +The remaining content in this chapter is primarily focused for people utilizing a terminal with SSH. +If you are instead using the web portal, the corresponding chapter might be more helpful: Using the HPC-UGent web portal. +If you have any issues connecting to the HPC after you've followed these +steps, see Issues connecting to login node to troubleshoot. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_4.1_metadata.json new file mode 100644 index 000000000000..89431f52435f --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_4.1_metadata.json @@ -0,0 +1,15 @@ +{ + "main_title": "connecting", + "subtitle": "First-Time-connection-to-the-HPC-infrastructure", + "title_depth": 2, + "directory": "connecting", + "parent_title": "Connecting-to-the-HPC-infrastructure", + "links": { + "0": "https://docs.hpc.ugent.be/web_portal", + "1": "https://docs.hpc.ugent.be/connecting/../troubleshooting/#issues-connecting-to-login-node" + }, + "previous_title": "connecting_paragraph_3", + "next_title": "connecting_paragraph_5", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#first-time-connection-to-the-hpc-infrastructure" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1.txt new file mode 100644 index 000000000000..d4c89b7e1c72 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1.txt @@ -0,0 +1,10 @@ +First Time connection to the HPC infrastructure +Connect +Open up a terminal and enter the following command to connect to the HPC. +You can open a terminal by navigation to Applications and then Utilities in the finder and open Terminal.app, or enter Terminal in Spotlight Search. +$ ssh vsc40000@login.hpc.ugent.be +Here, user vsc40000 wants to make a connection to the "hpcugent" cluster at UGent via the login +node "login.hpc.ugent.be", so replace vsc40000 with your own VSC id in the above command. +The first time you make a connection to the login node, you will be +asked to verify the authenticity of the login node. Please check +Warning message when first connecting to new host on how to do this. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1_metadata.json new file mode 100644 index 000000000000..e17629a55f3b --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "connecting", + "subtitle": "Connect", + "title_depth": 3, + "directory": "connecting", + "parent_title": "First-Time-connection-to-the-HPC-infrastructure", + "links": { + "0": "https://docs.hpc.ugent.be/connecting/../troubleshooting/#warning-message-when-first-connecting-to-new-host" + }, + "previous_title": "connecting_paragraph_4", + "next_title": "connecting_macos_paragraph_5.2", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#connect" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2.txt new file mode 100644 index 000000000000..6fa418464dd6 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2.txt @@ -0,0 +1,7 @@ +A possible error message you can get if you previously saved your +private key somewhere else than the default location +($HOME/.ssh/id_rsa): +Permission denied (publickey,gssapi-keyex,gssapi-with-mic). +In this case, use the -i option for the ssh command to specify the +location of your private key. For example: +$ ssh -i /home/example/my_keys diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2_metadata.json new file mode 100644 index 000000000000..5c1d808739cc --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "Connect", + "title_depth": 3, + "directory": "connecting", + "parent_title": "First-Time-connection-to-the-HPC-infrastructure", + "previous_title": "connecting_macos_paragraph_5.1", + "next_title": "connecting_paragraph_6", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#connect" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1.txt new file mode 100644 index 000000000000..062ea570d967 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1.txt @@ -0,0 +1,11 @@ +Adding multiple SSH public keys (optional) +1. Create a new public/private SSH key pair from Putty. Repeat the + process described in + section Generate a public/private key pair. +2. Go to +3. Upload the new SSH public key using the Add public key section. Make sure that your + public key is actually saved, because a public key will be refused + if it is too short, wrong type, or in a wrong format. +4. (optional) If you lost your key, you can delete the old key on the + same page. You should keep at least one valid public SSH key in your + account. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1_metadata.json new file mode 100644 index 000000000000..eb4dd3b3a57c --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "account", + "subtitle": "Adding-multiple-SSH-public-keys-(optional)", + "title_depth": 3, + "directory": "account", + "parent_title": "Applying-for-the-account", + "links": { + "0": "https://docs.hpc.ugent.be/account/#generate-a-publicprivate-key-pair" + }, + "previous_title": "account_paragraph_10", + "next_title": "account_paragraph_12", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/account/#adding-multiple-ssh-public-keys-optional" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1.txt new file mode 100644 index 000000000000..2ff8ffc1a085 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1.txt @@ -0,0 +1,14 @@ +How do SSH keys work +- anyone who has the key (and the optional password) can unlock the + door and log in to the account. +- the door to your VSC account is special: it can have multiple + locks (SSH public keys) attached to it, and you only need to open + one lock with the corresponding key (SSH private key) to open + the door (log in to the account). +Since all VSC clusters use Linux as their main operating system, you +will need to get acquainted with using the command-line interface and +using the terminal (see tutorial). +A typical Windows environment does not come with pre-installed software +to connect and run command-line executables on a HPC. Some tools need to be +installed on your Windows machine first, before we can start the actual +work. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json new file mode 100644 index 000000000000..08573d26bfe7 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "account", + "subtitle": "How-do-SSH-keys-work", + "title_depth": 3, + "directory": "account", + "parent_title": "Getting-ready-to-request-an-account", + "links": { + "0": "../../linux-tutorial" + }, + "previous_title": "account_paragraph_3", + "next_title": "account_windows_paragraph_4.2", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/account/#how-do-ssh-keys-work" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2.txt new file mode 100644 index 000000000000..c89b45d8f2bf --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2.txt @@ -0,0 +1,13 @@ +Get PuTTY: A free telnet/SSH client +We recommend to use the PuTTY tools package, which is freely available. +You do not need to install PuTTY, you can download the PuTTY and +PuTTYgen executable and run it. This can be useful in situations where +you do not have the required permissions to install software on the +computer you are using. Alternatively, an installation package is also +available. +You can download PuTTY from the official address: +. You +probably want the 64-bits version. If you can install software on your +computer, you can use the "Package files", if not, you can download and +use putty.exe and puttygen.exe in the "Alternative binary files" +section. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json new file mode 100644 index 000000000000..bedb3d332181 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "account", + "subtitle": "Get-PuTTY-A-free-telnetSSH-client", + "title_depth": 3, + "directory": "account", + "parent_title": "Getting-ready-to-request-an-account", + "previous_title": "account_windows_paragraph_4.1", + "next_title": "account_windows_paragraph_4.3", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/account/#get-putty-a-free-telnetssh-client" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt new file mode 100644 index 000000000000..cebd1da3bafe --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt @@ -0,0 +1,13 @@ +The PuTTY package consists of several components, but we'll only use +two: +1. PuTTY: the Telnet and SSH client itself (to login, see Open a terminal) +2. PuTTYgen: an RSA and DSA key generation utility (to generate a key pair, + see Generate a public/private key pair) +Generating a public/private key pair +Before requesting a VSC account, you need to generate a pair of ssh +keys. You need 2 keys, a public and a private key. You can visualise the +public key as a lock to which only you have the key (your private key). +You can send a copy of your lock to anyone without any problems, because +only you can open it, as long as you keep your private key secure. To +generate a public/private key pair, you can use the PuTTYgen key +generator. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json new file mode 100644 index 000000000000..a8fcacd08a0b --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json @@ -0,0 +1,15 @@ +{ + "main_title": "account", + "subtitle": "Generating-a-publicprivate-key-pair", + "title_depth": 3, + "directory": "account", + "parent_title": "Getting-ready-to-request-an-account", + "links": { + "0": "https://docs.hpc.ugent.be/account/../connecting/#open-a-terminal", + "1": "https://docs.hpc.ugent.be/account/../account/#generating-a-publicprivate-key-pair" + }, + "previous_title": "account_windows_paragraph_4.2", + "next_title": "account_windows_paragraph_4.4", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/account/#generating-a-publicprivate-key-pair" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4.txt new file mode 100644 index 000000000000..b7743b0b9ae3 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4.txt @@ -0,0 +1,17 @@ +Start PuTTYgen.exe it and follow these steps: +1. In "Parameters" (at the bottom of the window), choose "RSA" and set the number of + bits in the key to 4096. +2. Click on "Generate". To generate the key, you must move the mouse cursor over + the PuTTYgen window (this generates some random data that PuTTYgen + uses to generate the key pair). Once the key pair is generated, your + public key is shown in the field "Public key for pasting into OpenSSH authorized_keys file". +3. Next, it is advised to fill in the "Key comment" field to make it easier + identifiable afterwards. +4. Next, you should specify a passphrase in the "Key passphrase" field and retype it in + the "Confirm passphrase" field. Remember, the passphrase protects the private key against + unauthorised use, so it is best to choose one that is not too easy + to guess but that you can still remember. Using a passphrase is not + required, but we recommend you to use a good passphrase unless you + are certain that your computer's hard disk is encrypted with a + decent password. (If you are not sure your disk is encrypted, it + probably isn't.) diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4_metadata.json new file mode 100644 index 000000000000..5fe2e81aa3d0 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "account", + "subtitle": "Generating-a-publicprivate-key-pair", + "title_depth": 3, + "directory": "account", + "parent_title": "Getting-ready-to-request-an-account", + "previous_title": "account_windows_paragraph_4.3", + "next_title": "account_windows_paragraph_4.5", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/account/#generating-a-publicprivate-key-pair" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.5.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.5.txt new file mode 100644 index 000000000000..2326d87b6d74 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.5.txt @@ -0,0 +1,7 @@ +5. Save both the public and private keys in a folder on your personal + computer (We recommend to create and put them in the folder + "C:\\Users\\%USERNAME%\\AppData\\Local\\PuTTY\\.ssh") with the + buttons "Save public key" and "Save private key". We recommend using the name "id_rsa.pub" for the public key, and + "id_rsa.ppk" for the private key. +If you use another program to generate a key pair, please remember that +they need to be in the OpenSSH format to access the HPC clusters. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.5_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.5_metadata.json new file mode 100644 index 000000000000..79c584a8f411 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.5_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "account", + "subtitle": "Generating-a-publicprivate-key-pair", + "title_depth": 3, + "directory": "account", + "parent_title": "Getting-ready-to-request-an-account", + "previous_title": "account_windows_paragraph_4.4", + "next_title": "account_paragraph_5", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/account/#generating-a-publicprivate-key-pair" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1.txt new file mode 100644 index 000000000000..b8dba743c0a2 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1.txt @@ -0,0 +1,13 @@ +Using an SSH agent (optional) +It is possible to setup a SSH agent in Windows. This is an optional +configuration to help you to keep all your SSH keys (if you have +several) stored in the same key ring to avoid to type the SSH key +password each time. The SSH agent is also necessary to enable SSH hops +with key forwarding from Windows. +Pageant is the SSH authentication agent used in windows. This agent should be +available from the PuTTY installation package + or as +stand alone binary package. +After the installation just start the Pageant application in Windows, +this will start the agent in background. The agent icon will be visible +from the Windows panel. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1_metadata.json new file mode 100644 index 000000000000..69771b48c868 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "account", + "subtitle": "Using-an-SSH-agent-(optional)", + "title_depth": 3, + "directory": "account", + "parent_title": "Getting-ready-to-request-an-account", + "previous_title": "account_paragraph_5", + "next_title": "account_windows_paragraph_6.2", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/account/#using-an-ssh-agent-optional" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2.txt new file mode 100644 index 000000000000..62ac04dd9aa0 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2.txt @@ -0,0 +1,11 @@ +At this point the agent does not contain any private key. You should +include the private key(s) generated in the previous section Generating a public/private key pair. +1. Click on "Add key" +2. Select the private key file generated in Generating a public/private key pair ("id_rsa.ppk" by default). +3. Enter the same SSH key password used to generate the key. After this + step the new key will be included in Pageant to manage the SSH + connections. +4. You can see the SSH key(s) available in the key ring just clicking + on "View Keys". +5. You can change PuTTY setup to use the SSH agent. Open PuTTY and check + Connection > SSH > Auth > Allow agent forwarding. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2_metadata.json new file mode 100644 index 000000000000..246707080706 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2_metadata.json @@ -0,0 +1,15 @@ +{ + "main_title": "account", + "subtitle": "Using-an-SSH-agent-(optional)", + "title_depth": 3, + "directory": "account", + "parent_title": "Getting-ready-to-request-an-account", + "links": { + "0": "https://docs.hpc.ugent.be/account/../account/#generating-a-publicprivate-key-pair", + "1": "https://docs.hpc.ugent.be/account/../account/#generating-a-publicprivate-key-pair" + }, + "previous_title": "account_windows_paragraph_6.1", + "next_title": "account_windows_paragraph_6.3", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/account/#using-an-ssh-agent-optional" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3.txt new file mode 100644 index 000000000000..17c94975dec9 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3.txt @@ -0,0 +1,5 @@ +Now you can connect to the login nodes as usual. The SSH agent will know +which SSH key should be used and you do not have to type the SSH +passwords each time, this task is done by Pageant agent automatically. +It is also possible to use WinSCP with Pageant, see + for more details. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3_metadata.json new file mode 100644 index 000000000000..d47ad3bd215d --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "account", + "subtitle": "Using-an-SSH-agent-(optional)", + "title_depth": 3, + "directory": "account", + "parent_title": "Getting-ready-to-request-an-account", + "previous_title": "account_windows_paragraph_6.2", + "next_title": "account_paragraph_7", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/account/#using-an-ssh-agent-optional" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1.txt new file mode 100644 index 000000000000..90c17263cf5f --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1.txt @@ -0,0 +1,7 @@ +Applying for the account +This file should have been stored in the directory +"C:\\Users\\%USERNAME%\\AppData\\Local\\PuTTY\\.ssh" +After you have uploaded your public key you will receive an e-mail with +a link to confirm your e-mail address. After confirming your e-mail +address the VSC staff will review and if applicable approve your +account. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1_metadata.json new file mode 100644 index 000000000000..d01ac9c3c16e --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "account", + "subtitle": "Applying-for-the-account", + "title_depth": 2, + "directory": "account", + "parent_title": "account", + "previous_title": "account_paragraph_8", + "next_title": "account_paragraph_10", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/account/#applying-for-the-account" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_10.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_10.1.txt new file mode 100644 index 000000000000..aaf5a585ebd5 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_10.1.txt @@ -0,0 +1,5 @@ +First Time connection to the HPC infrastructure + A locale is a set of parameters that defines the user's language, country and + any special variant preferences that the user wants to see in their user + interface. Usually a locale identifier consists of at least a language + identifier and a region identifier. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_10.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_10.1_metadata.json new file mode 100644 index 000000000000..45c2bd2d90e4 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_10.1_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "First-Time-connection-to-the-HPC-infrastructure", + "title_depth": 2, + "directory": "connecting", + "parent_title": "Connecting-to-the-HPC-infrastructure", + "previous_title": "connecting_paragraph_9", + "next_title": "connecting_paragraph_11", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#first-time-connection-to-the-hpc-infrastructure" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1.txt new file mode 100644 index 000000000000..b43909c15c43 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1.txt @@ -0,0 +1,11 @@ +Transfer Files to/from the HPC +Before you can do some work, you'll have to transfer the files you need from your desktop or department to the cluster. At the end of a job, you might want to transfer some files back. +WinSCP +To transfer files to and from the cluster, we recommend the use of +WinSCP, a graphical file management tool which can transfer files using +secure protocols such as SFTP and SCP. WinSCP is freely available from +. +To transfer your files using WinSCP, +1. Open the program +2. The "Login" menu is shown automatically (if it is closed, click "New Session" to open it again). Fill in the necessary fields under "Session" + 1. Click "New Site". diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1_metadata.json new file mode 100644 index 000000000000..d9fbc64790ad --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "WinSCP", + "title_depth": 3, + "directory": "connecting", + "parent_title": "First-Time-connection-to-the-HPC-infrastructure", + "previous_title": "connecting_paragraph_10", + "next_title": "connecting_windows_paragraph_11.2", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#winscp" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.2.txt new file mode 100644 index 000000000000..642bb4e34b66 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.2.txt @@ -0,0 +1,19 @@ + 2. Enter "login.hpc.ugent.be" in the "Host name" field. + 3. Enter your "vsc-account" in the "User name" field. + 4. Select "SCP" as the "file" protocol. + 5. Note that the password field remains empty. + 6. Click "Advanced...". + 7. Click "SSH > Authentication". + 8. Select your private key in the field "Private key file". +3. Press the "Save" button, to save the session under "Session > Sites" for future access. +4. Finally, when clicking on "Login", you will be asked for your key passphrase. +The first time you make a connection to the login node, a Security +Alert will appear and you will be asked to verify the authenticity of the +login node. +Make sure the fingerprint in the alert matches one of the following: +- ssh-rsa 2048 10:2f:31:21:04:75:cb:ed:67:e0:d5:0c:a1:5a:f4:78 +- ssh-rsa 2048 SHA256:W8Wz0/FkkCR2ulN7+w8tNI9M0viRgFr2YlHrhKD2Dd0 +- ssh-ed25519 255 19:28:76:94:52:9d:ff:7d:fb:8b:27:b6:d7:69:42:eb +- ssh-ed25519 256 SHA256:8AJg3lPN27y6i+um7rFx3xoy42U8ZgqNe4LsEycHILA +- ssh-ecdsa 256 e6:d2:9c:d8:e7:59:45:03:4a:1f:dc:96:62:29:9c:5f +- ssh-ecdsa 256 SHA256:C8TVx0w8UjGgCQfCmEUaOPxJGNMqv2PXLyBNODe5eOQ diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.2_metadata.json new file mode 100644 index 000000000000..65055dc07647 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.2_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "WinSCP", + "title_depth": 3, + "directory": "connecting", + "parent_title": "First-Time-connection-to-the-HPC-infrastructure", + "previous_title": "connecting_windows_paragraph_11.1", + "next_title": "connecting_windows_paragraph_11.3", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#winscp" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.3.txt new file mode 100644 index 000000000000..b52c614f263e --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.3.txt @@ -0,0 +1,7 @@ +If it does, press Yes, if it doesn't, please contact hpc@ugent.be. +Note: it is possible that the ssh-ed25519 fingerprint starts with ssh-ed25519 255 +rather than ssh-ed25519 256 (or vice versa), depending on the PuTTY version you are using. +It is safe to ignore this 255 versus 256 difference, but the part after should be +identical. +Now, try out whether you can transfer an arbitrary file from your local +machine to the HPC and back. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.3_metadata.json new file mode 100644 index 000000000000..dd628f8e8cd6 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.3_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "WinSCP", + "title_depth": 3, + "directory": "connecting", + "parent_title": "First-Time-connection-to-the-HPC-infrastructure", + "previous_title": "connecting_windows_paragraph_11.2", + "next_title": "connecting_paragraph_12", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#winscp" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt new file mode 100644 index 000000000000..14f191fe61a7 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt @@ -0,0 +1,11 @@ +First Time connection to the HPC infrastructure +ssh_exchange_identification: read: Connection reset by peer +First Time connection to the HPC infrastructure +The remaining content in this chapter is primarily focused for people utilizing a terminal with SSH. +If you are instead using the web portal, the corresponding chapter might be more helpful: Using the HPC-UGent web portal. +If you have any issues connecting to the HPC after you've followed these +steps, see Issues connecting to login node to troubleshoot. +Open a Terminal +You've generated a public/private key pair with PuTTYgen and have an +approved account on the VSC clusters. The next step is to setup the +connection to (one of) the HPC. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json new file mode 100644 index 000000000000..24d4df9e248d --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json @@ -0,0 +1,15 @@ +{ + "main_title": "connecting", + "subtitle": "Open-a-Terminal", + "title_depth": 3, + "directory": "connecting", + "parent_title": "Connection-restrictions", + "links": { + "0": "https://docs.hpc.ugent.be/web_portal", + "1": "https://docs.hpc.ugent.be/connecting/../troubleshooting/#issues-connecting-to-login-node" + }, + "previous_title": "connecting_paragraph_3", + "next_title": "connecting_windows_paragraph_4.2", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#open-a-terminal" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.2.txt new file mode 100644 index 000000000000..e481b47bc2b2 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.2.txt @@ -0,0 +1,13 @@ +In the screenshots, we show the setup for user +to the HPC cluster via the login node "login.hpc.ugent.be". +1. Start the PuTTY executable putty.exe in your directory + C:\Program Files (x86)\PuTTY and the configuration screen will pop + up. As you will often use the PuTTY tool, we recommend adding a + shortcut on your desktop. +2. Within the category , in the field , enter the name of the + login node of the cluster (i.e., "login.hpc.ugent.be") you want to connect to. + +3. In the category "Connection > Data", in the field "Auto-login username", put in , which is your VSC + username that you have received by e-mail after your request was + approved. + diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.2_metadata.json new file mode 100644 index 000000000000..a783f797fdbb --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.2_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "Open-a-Terminal", + "title_depth": 3, + "directory": "connecting", + "parent_title": "Connection-restrictions", + "previous_title": "connecting_windows_paragraph_4.1", + "next_title": "connecting_windows_paragraph_4.3", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#open-a-terminal" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.3.txt new file mode 100644 index 000000000000..fbd5e76f2788 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.3.txt @@ -0,0 +1,13 @@ +4. In the category "Connection > SSH > Auth", in the field "Private key file for authentication" click on "Browse" and select the private key + (i.e., "id_rsa.ppk") that you generated and saved above. +5. In the category "Connection > SSH > X11", click the "Enable X11 Forwarding" checkbox. +6. Now go back to , and fill in "hpcugent" in the "Saved Sessions" field and press "Save" to + store the session information. + +7. Now pressing "Open", will open a terminal window and asks for you + passphrase. + +8. If this is your first time connecting, you will be asked to verify + the authenticity of the login node. Please see + section Warning message when first connecting to new host + on how to do this. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.3_metadata.json new file mode 100644 index 000000000000..9da459060afd --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.3_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "connecting", + "subtitle": "Open-a-Terminal", + "title_depth": 3, + "directory": "connecting", + "parent_title": "Connection-restrictions", + "links": { + "0": "https://docs.hpc.ugent.be/connecting/../troubleshooting/#warning-message-when-first-connecting-to-new-host" + }, + "previous_title": "connecting_windows_paragraph_4.2", + "next_title": "connecting_windows_paragraph_4.4", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#open-a-terminal" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.4.txt new file mode 100644 index 000000000000..f4a1302750b9 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.4.txt @@ -0,0 +1,11 @@ +9. After entering your correct passphrase, you will be connected to the + login-node of the HPC. +10. To check you can now "Print the Working Directory" (pwd) and check + the name of the computer, where you have logged in (hostname): + $ pwd + /user/home/gent/vsc400/vsc40000 + $ hostname -f + gligar07.gastly.os + +11. For future PuTTY sessions, just select your saved session (i.e. "hpcugent") + from the list, "Load" it and press "Open". diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.4_metadata.json new file mode 100644 index 000000000000..83127a292f84 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.4_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "connecting", + "subtitle": "Open-a-Terminal", + "title_depth": 3, + "directory": "connecting", + "parent_title": "Connection-restrictions", + "previous_title": "connecting_windows_paragraph_4.3", + "next_title": "connecting_paragraph_5", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#open-a-terminal" +} \ No newline at end of file From 692e77b51b5756859398d992293aa49d6cb4d527 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 27 Aug 2024 15:06:45 +0200 Subject: [PATCH 124/145] fix for issue with html links --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 1530eedf31cb..9e5baba82f3d 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -176,7 +176,7 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title): # add references for every link of format if re.search(r'a href=.*', content): link = content[8:-1] - curr_line = re.sub(f'<{content}>', "[" + str(len(linklist) + 1) + "]", curr_line) + curr_line = re.sub(f'<{content}>', LINK_MARKER + str(len(linklist) + 1) + LINK_MARKER, curr_line) linklist.append(link) # drop the syntax words From 7f493a19d3265c4634267075958a7d4caf966e8c Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 27 Aug 2024 15:08:23 +0200 Subject: [PATCH 125/145] fix for issue with html links --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 9e5baba82f3d..690385a95d45 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -175,8 +175,8 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title): # add references for every link of format if re.search(r'a href=.*', content): - link = content[8:-1] - curr_line = re.sub(f'<{content}>', LINK_MARKER + str(len(linklist) + 1) + LINK_MARKER, curr_line) + link = content[7:] + curr_line = re.sub(f'<{content}>', LINK_MARKER + str(len(linklist)) + LINK_MARKER, curr_line) linklist.append(link) # drop the syntax words From 0e34396f77b0112171d8a30df36cd76c14fbac4d Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 27 Aug 2024 15:30:41 +0200 Subject: [PATCH 126/145] fix for issue with relative links to the same document --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 690385a95d45..986ec2ef65bd 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -158,7 +158,7 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title): if "#" not in match[1]: linklist.append(match[1]) else: - linklist.append(DOCS_URL + "/" + main_title + "/" + match[1]) + linklist.append(DOCS_URL + "/" + main_title.replace(".md", "") + "/" + match[1]) else: linklist.append(DOCS_URL + "/" + match[1].replace(".md", "/").replace("index", "").rstrip("/")) From fa0004482a3326f8385502b85e5c4ed9b4bf5410 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 27 Aug 2024 15:31:33 +0200 Subject: [PATCH 127/145] added test for replace_markdown_markers --- .../tests/test_replace_markdown_markers.py | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_replace_markdown_markers.py diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_replace_markdown_markers.py b/scripts/HPC_chatbot_preprocessor/tests/test_replace_markdown_markers.py new file mode 100644 index 000000000000..f4cee6dd75cf --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_replace_markdown_markers.py @@ -0,0 +1,46 @@ +import pytest +from chatbot_parser import replace_markdown_markers + + +@pytest.mark.parametrize("input_line, input_linklist, in_code_block, main_title, expected_line, expected_linklist", [ + # baseline test + ("A normal line with nothing special", [], False, "", "A normal line with nothing special", []), + # image 1 + ("![image](a-nice-image.png)", [], False, "", "", []), + # image 2 + ("![](img/Look-at-this-photograph.png)", [], False, "", "", []), + # link 1 (outside docs) + ("A line with a [link](a-nice-link.com)", ["another-link.be"], False, "", + "A line with a link§link§link§1§link§link§", ["another-link.be", "a-nice-link.com"]), + # link 2 (another document within the docs) + ("A line with a [link to the docs](account.md#welcome-e-mail)", ["another-link.be"], False, "", + "A line with a link to the docs§link§link§1§link§link§", ["another-link.be", "https://docs.hpc.ugent.be/account/#welcome-e-mail"]), + # link 3 (the same document) + ("A line with a [link to the same doc](#welcome-e-mail)", ["another-link.be"], False, "account.md", + "A line with a link to the same doc§link§link§1§link§link§", ["another-link.be", "https://docs.hpc.ugent.be/account/#welcome-e-mail"]), + # codeblock + ("```shell", [], True, "", "", []), + # html syntax 1 (normal syntax) + ("A line with something in Bold", [], False, "", "A line with something in Bold", []), + # html syntax 2 (link) + ("A line with another link", ["other-website.com"], False, "", + "A line with another link§link§link§1§link§link§", ["other-website.com", "website.com"]), + # html syntax 3 (style) + ("

A line with style

", [], False, "", "A line with style", []), + # Bot comment + ("", [], False, "", "Something about the following table", []), + # non-Bot comment + ("", [], False, "", "", []), + # something else with <> + ("A line with an example where you should put ", [], False, "", "A line with an example where you should put ", []), + # info/tips/warnings + ("!!! warning", [], False, "", " warning", []), + # collapsable admonitions + ("??? note", [], False, "", " note", []), + # Markdown syntax 1 (not in code block) + ("`Line` **with** ++a++ _lot_ *of* _++markdown++_ `syntax`", [], False, "", "Line with a lot of markdown syntax", []), + # Markdown syntax 2 (in code block) + ("`Line` **with** ++slightly++ _less_ *markdown* _++syntax++_", [], True, "", "`Line` **with** ++slightly++ _less_ *markdown* _++syntax++_", []) +]) +def test_replace_markdown_markers(input_line, input_linklist, in_code_block, main_title, expected_line, expected_linklist): + assert replace_markdown_markers(input_line, input_linklist, in_code_block, main_title) == (expected_line, expected_linklist) From b3952b2e769483bc1a6dc7c146b847f7519843a3 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 27 Aug 2024 16:22:41 +0200 Subject: [PATCH 128/145] fix to small inconsistency in metadata --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 986ec2ef65bd..371ee52e6cd2 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -805,7 +805,7 @@ def insert_links(text, links, options): for link_number in links.keys(): if link_number in present_links: - new_links[len(new_links.keys())] = links[link_number] + new_links[str(len(new_links.keys()))] = links[link_number] return text, new_links From 73072bf2cd57e28b8dafbb2e88ef30f52c95958e Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Tue, 27 Aug 2024 16:26:41 +0200 Subject: [PATCH 129/145] added test for insert_links --- .../tests/test_insert_links.py | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_insert_links.py diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_insert_links.py b/scripts/HPC_chatbot_preprocessor/tests/test_insert_links.py new file mode 100644 index 000000000000..9109f2518ad3 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_insert_links.py @@ -0,0 +1,31 @@ +import pytest +from chatbot_parser import insert_links + +options_include = {"INCLUDE_LINKS_IN_PLAINTEXT": True} +options_leave_out = {"INCLUDE_LINKS_IN_PLAINTEXT": False} +links_input = {"0": "https://first_link.com", "1": "https://second_link.be", "2": "https://docs.hpc.ugent.be/account#welcome-e-mail", "3": "https://final-link.org"} + + +@pytest.mark.parametrize("text_input, options_input, text_output, new_links", [ + # Text without links + # don't include links + ("Text without links\nand with two lines.", options_leave_out, "Text without links\nand with two lines.", {}), + # include links + ("Text without links\nand with two lines.", options_include, "Text without links\nand with two lines.", {}), + # Text with all links + # don't include links + ("Text with all the links\nand with multiple lines.\n§link§link§0§link§link§\n§link§link§1§link§link§\n§link§link§2§link§link§\n§link§link§3§link§link§", options_leave_out, + "Text with all the links\nand with multiple lines.\n\n\n\n", links_input), + # include links + ("Text with all the links\nand with multiple lines.\n§link§link§0§link§link§\n§link§link§1§link§link§\n§link§link§2§link§link§\n§link§link§3§link§link§", options_include, + "Text with all the links\nand with multiple lines.\n https://first_link.com \n https://second_link.be \n https://docs.hpc.ugent.be/account#welcome-e-mail \n https://final-link.org ", links_input), + # Text with some links + # don't include links + ("Text with all the links\nand with multiple lines.\n§link§link§1§link§link§\n§link§link§3§link§link§", options_leave_out, + "Text with all the links\nand with multiple lines.\n\n", {"0": "https://second_link.be", "1": "https://final-link.org"}), + # include links + ("Text with all the links\nand with multiple lines.\n§link§link§0§link§link§\n§link§link§2§link§link§", options_include, + "Text with all the links\nand with multiple lines.\n https://first_link.com \n https://docs.hpc.ugent.be/account#welcome-e-mail ", {"0": "https://first_link.com", "1": "https://docs.hpc.ugent.be/account#welcome-e-mail"}) +]) +def test_insert_links(text_input, options_input, text_output, new_links): + assert insert_links(text_input, links_input, options_input) == (text_output, new_links) From 31613094e7e8fd60e74a0d639fcb28b08d262e65 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 28 Aug 2024 13:16:04 +0200 Subject: [PATCH 130/145] make sure paragraphs only include full lists --- .../chatbot_parser.py | 31 +++++++++++-- .../generic/account/account_paragraph_12.txt | 3 -- .../generic/account/account_paragraph_3.txt | 6 +++ .../connecting/connecting_paragraph_1.txt | 5 +++ .../connecting/connecting_paragraph_2.txt | 14 +++--- .../connecting/connecting_paragraph_3.txt | 16 +++---- .../connecting_paragraph_3_metadata.json | 8 +++- .../account/account_linux_paragraph_11.1.txt | 5 ++- .../account/account_linux_paragraph_4.1.txt | 6 --- .../account/account_linux_paragraph_5.2.txt | 1 + .../account/account_linux_paragraph_5.3.txt | 1 - .../connecting_linux_paragraph_4.1.txt | 7 --- ...nnecting_linux_paragraph_4.1_metadata.json | 15 ------- .../account/account_macos_paragraph_11.1.txt | 5 ++- .../account/account_macos_paragraph_4.1.txt | 6 --- .../connecting_macos_paragraph_13.1.txt | 3 ++ .../connecting_macos_paragraph_13.2.txt | 3 -- .../connecting_macos_paragraph_4.1.txt | 7 --- ...nnecting_macos_paragraph_4.1_metadata.json | 15 ------- .../account_windows_paragraph_11.1.txt | 5 ++- .../account/account_windows_paragraph_4.1.txt | 13 +++--- ...ccount_windows_paragraph_4.1_metadata.json | 4 +- .../account/account_windows_paragraph_4.2.txt | 13 +++--- ...ccount_windows_paragraph_4.2_metadata.json | 8 +++- .../account/account_windows_paragraph_4.3.txt | 28 +++++++++--- ...ccount_windows_paragraph_4.3_metadata.json | 4 -- .../account/account_windows_paragraph_4.4.txt | 19 +------- ...ccount_windows_paragraph_4.4_metadata.json | 2 +- .../account/account_windows_paragraph_4.5.txt | 7 --- ...ccount_windows_paragraph_4.5_metadata.json | 11 ----- .../connecting_windows_paragraph_11.1.txt | 9 ++++ .../connecting_windows_paragraph_11.2.txt | 10 +---- .../connecting_windows_paragraph_11.3.txt | 1 - .../connecting_windows_paragraph_4.1.txt | 43 ++++++++++++++++--- ...ecting_windows_paragraph_4.1_metadata.json | 7 ++- .../connecting_windows_paragraph_4.2.txt | 13 ------ ...ecting_windows_paragraph_4.2_metadata.json | 11 ----- .../connecting_windows_paragraph_4.3.txt | 13 ------ ...ecting_windows_paragraph_4.3_metadata.json | 14 ------ .../connecting_windows_paragraph_4.4.txt | 11 ----- ...ecting_windows_paragraph_4.4_metadata.json | 11 ----- 41 files changed, 172 insertions(+), 242 deletions(-) delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_4.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_4.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_4.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_4.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.5.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.5_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.2.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.2_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.3.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.3_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.4.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.4_metadata.json diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 371ee52e6cd2..6ec1aa0e9b23 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -7,7 +7,7 @@ import re import shutil import yaml -from itertools import chain +from itertools import chain, tee, zip_longest from pathlib import Path from jinja2 import FileSystemLoader, Environment, ChoiceLoader, FunctionLoader, Template @@ -402,6 +402,9 @@ def split_on_paragraphs(file, main_title, options, current_paragraph_number=-1, # variable to indicate that previous section was one with if-statements previous_contained_if = False + # variable to indicate that the previous line was part of a list + in_list = False + # paragraph number to add to title paragraph_number = 1 @@ -410,7 +413,7 @@ def split_on_paragraphs(file, main_title, options, current_paragraph_number=-1, # TODO: define metadata data if split occurs on paragraphs and last_title and title_level are known (placeholder in place right now) if current_paragraph_number != -1: - last_title_level = 5 + last_title_level = 4 last_dir = "PLACEHOLDER" # list to keep track of most recent directories on each title level @@ -418,12 +421,32 @@ def split_on_paragraphs(file, main_title, options, current_paragraph_number=-1, with open(file, 'r') as readfile: - for line in readfile: + # Create two independent iterators from the original file iterator (needed to check for lists) + current_line, next_line = tee(readfile) + + # Advance the next_line iterator by one step, so it is always one step ahead + next(next_line, None) + + # Process the lines + for line, nxt in zip_longest(current_line, next_line, fillvalue=""): # detect if-statements starting or ending on the current line in_if_statement += len(re.findall(IF_MANGLED_PATTERNS[IF], line)) - len( re.findall(IF_MANGLED_PATTERNS[ENDIF], line)) + # detect whether the current line is in a list + if re.search(r'^(\s*)([*+-]|\d+\.|[a-zA-Z]\.)\s+.*$', line): # beginning of a list entry + in_list = True + elif re.search(r'^\s{2,}.+$', line) and in_list: # middle of a list entry + pass + elif re.search(r'^(\s*)([*+-]|\d+\.|[a-zA-Z]\.)\s+.*$|^\s{2,}.+$|^\n', nxt) and in_list: # line(s) between list entries + pass + else: + in_list = False + + if in_list: + print(line[:-1]) + # only split up if current line is in a fully non-os-specific section if in_if_statement == 0: @@ -434,7 +457,7 @@ def split_on_paragraphs(file, main_title, options, current_paragraph_number=-1, in_code_block = not in_code_block # check whether a new paragraph should be started - if line == "\n" and len(re.sub(r'\{' + IF_MANGLED_PART + '%.*?%' + IF_MANGLED_PART + '}', "", current_paragraph)) >= options[MIN_PARAGRAPH_LENGTH] and not in_code_block: + if line == "\n" and len(re.sub(r'\{' + IF_MANGLED_PART + '%.*?%' + IF_MANGLED_PART + '}', "", current_paragraph)) >= options[MIN_PARAGRAPH_LENGTH] and not in_code_block and not in_list: # create a title for the previous paragraph if current_paragraph_number == -1: diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12.txt index 6ee6880838e6..7ecd78e5c9f6 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12.txt @@ -1,6 +1,3 @@ -5. Take into account that it will take some time before the new SSH - public key is active in your account on the system; waiting for - 15-30 minutes should be sufficient. Computation Workflow on the HPC A typical Computation workflow will be: 1. Connect to the HPC diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3.txt index 9632ef1f5afc..e49468692735 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3.txt @@ -9,3 +9,9 @@ How do SSH keys work? VSC and they put it on the door that gives access to your account. - the SSH private key is like a physical key: you don't hand it out to other people. +- anyone who has the key (and the optional password) can unlock the + door and log in to the account. +- the door to your VSC account is special: it can have multiple + locks (SSH public keys) attached to it, and you only need to open + one lock with the corresponding key (SSH private key) to open + the door (log in to the account). diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1.txt index b144712c9df1..bc5a1f80140f 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1.txt @@ -11,3 +11,8 @@ you need to do or know: 3. Optionally, if you wish to use programs with a **graphical user interface**, you will need an X-server on your client system and log in to the login nodes with X-forwarding enabled. +4. Often several versions of software packages and libraries are + installed, so you need to select the ones you need. To manage + different versions efficiently, the VSC clusters use so-called + modules, so you will need to select and load the modules that + you need. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2.txt index 4c1d879b954a..b150c8fbb28f 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2.txt @@ -1,8 +1,3 @@ -4. Often several versions of software packages and libraries are - installed, so you need to select the ones you need. To manage - different versions efficiently, the VSC clusters use so-called - modules, so you will need to select and load the modules that - you need. Connection restrictions Since March 20th 2020, restrictions are in place that limit from where you can connect to the VSC HPC infrastructure, in response to security @@ -12,3 +7,12 @@ networks, and from (most) Belgian commercial internet providers. All other IP domains are blocked by default. If you are connecting from an IP address that is not allowed direct access, you have the following options to get access to VSC login nodes: +- Use an VPN connection to connect to UGent the network (recommended). +- Whitelist your IP address automatically by accessing + and log in with your UGent account. + - While this web connection is active new SSH sessions can be + started. + - Active SSH sessions will remain active even when this web page + is closed. +- Contact your HPC support team (via hpc@ugent.be) and ask them to whitelist your + IP range (e.g., for industry access, automated processes). diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3.txt index 668a1e6df572..31dd64632665 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3.txt @@ -1,12 +1,10 @@ -- Use an VPN connection to connect to UGent the network (recommended). -- Whitelist your IP address automatically by accessing - and log in with your UGent account. - - While this web connection is active new SSH sessions can be - started. - - Active SSH sessions will remain active even when this web page - is closed. -- Contact your HPC support team (via hpc@ugent.be) and ask them to whitelist your - IP range (e.g., for industry access, automated processes). Trying to establish an SSH connection from an IP address that does not adhere to these restrictions will result in an immediate failure to connect, with an error message like: +ssh_exchange_identification: read: Connection reset by peer + +First Time connection to the HPC infrastructure +The remaining content in this chapter is primarily focused for people utilizing a terminal with SSH. +If you are instead using the web portal, the corresponding chapter might be more helpful: Using the HPC-UGent web portal. +If you have any issues connecting to the HPC after you've followed these +steps, see Issues connecting to login node to troubleshoot. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json index 4dc75d7dcf33..471e6bfcbf2a 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json @@ -1,11 +1,15 @@ { "main_title": "connecting", - "subtitle": "Connection-restrictions", + "subtitle": "First-Time-connection-to-the-HPC-infrastructure", "title_depth": 2, "directory": "connecting", + "links": { + "0": "https://docs.hpc.ugent.be/web_portal", + "1": "https://docs.hpc.ugent.be/connecting/../troubleshooting/#issues-connecting-to-login-node" + }, "parent_title": "", "previous_title": "connecting_paragraph_2", "next_title": "connecting_paragraph_4", "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/connecting/#connection-restrictions" + "reference_link": "https://docs.hpc.ugent.be/connecting/#first-time-connection-to-the-hpc-infrastructure" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1.txt index 3a46897bdee9..b2734cc9f897 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1.txt @@ -8,4 +8,7 @@ Adding multiple SSH public keys (optional) if it is too short, wrong type, or in a wrong format. 4. (optional) If you lost your key, you can delete the old key on the same page. You should keep at least one valid public SSH key in your - account. \ No newline at end of file + account. +5. Take into account that it will take some time before the new SSH + public key is active in your account on the system; waiting for + 15-30 minutes should be sufficient. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1.txt index 1395e2ee7bd5..3a282a73a15d 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1.txt @@ -1,10 +1,4 @@ How do SSH keys work -- anyone who has the key (and the optional password) can unlock the - door and log in to the account. -- the door to your VSC account is special: it can have multiple - locks (SSH public keys) attached to it, and you only need to open - one lock with the corresponding key (SSH private key) to open - the door (log in to the account). Since all VSC clusters use Linux as their main operating system, you will need to get acquainted with using the command-line interface and using the terminal (see tutorial). \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2.txt index a166dd145039..318f913fba34 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2.txt @@ -11,3 +11,4 @@ following commands: 1. ssh-keygen: to generate the SSH key pair (public + private key); 2. ssh: to open a shell on a remote machine; 3. sftp: a secure equivalent of ftp; +4. scp: a secure equivalent of the remote copy command rcp. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3.txt index 2e8fe9e3a247..9d84f459724c 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3.txt @@ -1,4 +1,3 @@ -4. scp: a secure equivalent of the remote copy command rcp. Generate a public/private key pair with OpenSSH A key pair might already be present in the default location inside your home directory. Therefore, we first check if a key is available with the diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_4.1.txt deleted file mode 100644 index 773d03f06893..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_4.1.txt +++ /dev/null @@ -1,7 +0,0 @@ -First Time connection to the HPC infrastructure -ssh_exchange_identification: read: Connection reset by peer -First Time connection to the HPC infrastructure -The remaining content in this chapter is primarily focused for people utilizing a terminal with SSH. -If you are instead using the web portal, the corresponding chapter might be more helpful: Using the HPC-UGent web portal. -If you have any issues connecting to the HPC after you've followed these -steps, see Issues connecting to login node to troubleshoot. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_4.1_metadata.json deleted file mode 100644 index f6745fc31dc5..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_4.1_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "First-Time-connection-to-the-HPC-infrastructure", - "title_depth": 2, - "directory": "connecting", - "parent_title": "Connecting-to-the-HPC-infrastructure", - "links": { - "0": "https://docs.hpc.ugent.be/web_portal", - "1": "https://docs.hpc.ugent.be/connecting/../troubleshooting/#issues-connecting-to-login-node" - }, - "previous_title": "connecting_paragraph_3", - "next_title": "connecting_paragraph_5", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#first-time-connection-to-the-hpc-infrastructure" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1.txt index 3a46897bdee9..b2734cc9f897 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1.txt @@ -8,4 +8,7 @@ Adding multiple SSH public keys (optional) if it is too short, wrong type, or in a wrong format. 4. (optional) If you lost your key, you can delete the old key on the same page. You should keep at least one valid public SSH key in your - account. \ No newline at end of file + account. +5. Take into account that it will take some time before the new SSH + public key is active in your account on the system; waiting for + 15-30 minutes should be sufficient. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1.txt index 1395e2ee7bd5..3a282a73a15d 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1.txt @@ -1,10 +1,4 @@ How do SSH keys work -- anyone who has the key (and the optional password) can unlock the - door and log in to the account. -- the door to your VSC account is special: it can have multiple - locks (SSH public keys) attached to it, and you only need to open - one lock with the corresponding key (SSH private key) to open - the door (log in to the account). Since all VSC clusters use Linux as their main operating system, you will need to get acquainted with using the command-line interface and using the terminal (see tutorial). \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1.txt index c5ed84e9ea0b..20a4acb40a80 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1.txt @@ -10,3 +10,6 @@ This is the one-time setup you will need to do before connecting: 3. In the "Server" field, type in login.hpc.ugent.be. In the "Username" field, type in your VSC account id (this looks like vsc40000). 4. Select the location of your SSH private key in the "SSH Private Key" field. +5. Finally, type in a name for the bookmark in the "Nickname" field and + close the window by pressing on the red circle in the top left + corner of the window. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2.txt index d48d0ce00a39..1d20edf411f8 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2.txt @@ -1,6 +1,3 @@ -5. Finally, type in a name for the bookmark in the "Nickname" field and - close the window by pressing on the red circle in the top left - corner of the window. To open the connection, click on the "Bookmarks" icon (which resembles an open book) and double-click on the bookmark you just created. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_4.1.txt deleted file mode 100644 index 773d03f06893..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_4.1.txt +++ /dev/null @@ -1,7 +0,0 @@ -First Time connection to the HPC infrastructure -ssh_exchange_identification: read: Connection reset by peer -First Time connection to the HPC infrastructure -The remaining content in this chapter is primarily focused for people utilizing a terminal with SSH. -If you are instead using the web portal, the corresponding chapter might be more helpful: Using the HPC-UGent web portal. -If you have any issues connecting to the HPC after you've followed these -steps, see Issues connecting to login node to troubleshoot. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_4.1_metadata.json deleted file mode 100644 index 89431f52435f..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_4.1_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "First-Time-connection-to-the-HPC-infrastructure", - "title_depth": 2, - "directory": "connecting", - "parent_title": "Connecting-to-the-HPC-infrastructure", - "links": { - "0": "https://docs.hpc.ugent.be/web_portal", - "1": "https://docs.hpc.ugent.be/connecting/../troubleshooting/#issues-connecting-to-login-node" - }, - "previous_title": "connecting_paragraph_3", - "next_title": "connecting_paragraph_5", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#first-time-connection-to-the-hpc-infrastructure" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1.txt index 062ea570d967..0863009f2906 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1.txt @@ -8,4 +8,7 @@ Adding multiple SSH public keys (optional) if it is too short, wrong type, or in a wrong format. 4. (optional) If you lost your key, you can delete the old key on the same page. You should keep at least one valid public SSH key in your - account. \ No newline at end of file + account. +5. Take into account that it will take some time before the new SSH + public key is active in your account on the system; waiting for + 15-30 minutes should be sufficient. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1.txt index 2ff8ffc1a085..1e70493305f7 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1.txt @@ -1,10 +1,4 @@ How do SSH keys work -- anyone who has the key (and the optional password) can unlock the - door and log in to the account. -- the door to your VSC account is special: it can have multiple - locks (SSH public keys) attached to it, and you only need to open - one lock with the corresponding key (SSH private key) to open - the door (log in to the account). Since all VSC clusters use Linux as their main operating system, you will need to get acquainted with using the command-line interface and using the terminal (see tutorial). @@ -12,3 +6,10 @@ A typical Windows environment does not come with pre-installed software to connect and run command-line executables on a HPC. Some tools need to be installed on your Windows machine first, before we can start the actual work. +Get PuTTY: A free telnet/SSH client +We recommend to use the PuTTY tools package, which is freely available. +You do not need to install PuTTY, you can download the PuTTY and +PuTTYgen executable and run it. This can be useful in situations where +you do not have the required permissions to install software on the +computer you are using. Alternatively, an installation package is also +available. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json index 08573d26bfe7..ce74735c538f 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json @@ -1,6 +1,6 @@ { "main_title": "account", - "subtitle": "How-do-SSH-keys-work", + "subtitle": "Get-PuTTY-A-free-telnetSSH-client", "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", @@ -10,5 +10,5 @@ "previous_title": "account_paragraph_3", "next_title": "account_windows_paragraph_4.2", "OS": "windows", - "reference_link": "https://docs.hpc.ugent.be/Windows/account/#how-do-ssh-keys-work" + "reference_link": "https://docs.hpc.ugent.be/Windows/account/#get-putty-a-free-telnetssh-client" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2.txt index c89b45d8f2bf..1a30a219fecb 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2.txt @@ -1,13 +1,12 @@ -Get PuTTY: A free telnet/SSH client -We recommend to use the PuTTY tools package, which is freely available. -You do not need to install PuTTY, you can download the PuTTY and -PuTTYgen executable and run it. This can be useful in situations where -you do not have the required permissions to install software on the -computer you are using. Alternatively, an installation package is also -available. You can download PuTTY from the official address: . You probably want the 64-bits version. If you can install software on your computer, you can use the "Package files", if not, you can download and use putty.exe and puttygen.exe in the "Alternative binary files" section. +The PuTTY package consists of several components, but we'll only use +two: +1. PuTTY: the Telnet and SSH client itself (to login, see Open a terminal) +2. PuTTYgen: an RSA and DSA key generation utility (to generate a key pair, + see Generate a public/private key pair) +Generating a public/private key pair diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json index bedb3d332181..9616b41452a8 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json @@ -1,11 +1,15 @@ { "main_title": "account", - "subtitle": "Get-PuTTY-A-free-telnetSSH-client", + "subtitle": "Generating-a-publicprivate-key-pair", "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", + "links": { + "0": "https://docs.hpc.ugent.be/account/../connecting/#open-a-terminal", + "1": "https://docs.hpc.ugent.be/account/../account/#generating-a-publicprivate-key-pair" + }, "previous_title": "account_windows_paragraph_4.1", "next_title": "account_windows_paragraph_4.3", "OS": "windows", - "reference_link": "https://docs.hpc.ugent.be/Windows/account/#get-putty-a-free-telnetssh-client" + "reference_link": "https://docs.hpc.ugent.be/Windows/account/#generating-a-publicprivate-key-pair" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt index cebd1da3bafe..de5d164bb7a6 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt @@ -1,9 +1,3 @@ -The PuTTY package consists of several components, but we'll only use -two: -1. PuTTY: the Telnet and SSH client itself (to login, see Open a terminal) -2. PuTTYgen: an RSA and DSA key generation utility (to generate a key pair, - see Generate a public/private key pair) -Generating a public/private key pair Before requesting a VSC account, you need to generate a pair of ssh keys. You need 2 keys, a public and a private key. You can visualise the public key as a lock to which only you have the key (your private key). @@ -11,3 +5,25 @@ You can send a copy of your lock to anyone without any problems, because only you can open it, as long as you keep your private key secure. To generate a public/private key pair, you can use the PuTTYgen key generator. +Start PuTTYgen.exe it and follow these steps: +1. In "Parameters" (at the bottom of the window), choose "RSA" and set the number of + bits in the key to 4096. +2. Click on "Generate". To generate the key, you must move the mouse cursor over + the PuTTYgen window (this generates some random data that PuTTYgen + uses to generate the key pair). Once the key pair is generated, your + public key is shown in the field "Public key for pasting into OpenSSH authorized_keys file". +3. Next, it is advised to fill in the "Key comment" field to make it easier + identifiable afterwards. +4. Next, you should specify a passphrase in the "Key passphrase" field and retype it in + the "Confirm passphrase" field. Remember, the passphrase protects the private key against + unauthorised use, so it is best to choose one that is not too easy + to guess but that you can still remember. Using a passphrase is not + required, but we recommend you to use a good passphrase unless you + are certain that your computer's hard disk is encrypted with a + decent password. (If you are not sure your disk is encrypted, it + probably isn't.) +5. Save both the public and private keys in a folder on your personal + computer (We recommend to create and put them in the folder + "C:\\Users\\%USERNAME%\\AppData\\Local\\PuTTY\\.ssh") with the + buttons "Save public key" and "Save private key". We recommend using the name "id_rsa.pub" for the public key, and + "id_rsa.ppk" for the private key. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json index a8fcacd08a0b..06b6e998c081 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json @@ -4,10 +4,6 @@ "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", - "links": { - "0": "https://docs.hpc.ugent.be/account/../connecting/#open-a-terminal", - "1": "https://docs.hpc.ugent.be/account/../account/#generating-a-publicprivate-key-pair" - }, "previous_title": "account_windows_paragraph_4.2", "next_title": "account_windows_paragraph_4.4", "OS": "windows", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4.txt index b7743b0b9ae3..d0425d6738f4 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4.txt @@ -1,17 +1,2 @@ -Start PuTTYgen.exe it and follow these steps: -1. In "Parameters" (at the bottom of the window), choose "RSA" and set the number of - bits in the key to 4096. -2. Click on "Generate". To generate the key, you must move the mouse cursor over - the PuTTYgen window (this generates some random data that PuTTYgen - uses to generate the key pair). Once the key pair is generated, your - public key is shown in the field "Public key for pasting into OpenSSH authorized_keys file". -3. Next, it is advised to fill in the "Key comment" field to make it easier - identifiable afterwards. -4. Next, you should specify a passphrase in the "Key passphrase" field and retype it in - the "Confirm passphrase" field. Remember, the passphrase protects the private key against - unauthorised use, so it is best to choose one that is not too easy - to guess but that you can still remember. Using a passphrase is not - required, but we recommend you to use a good passphrase unless you - are certain that your computer's hard disk is encrypted with a - decent password. (If you are not sure your disk is encrypted, it - probably isn't.) +If you use another program to generate a key pair, please remember that +they need to be in the OpenSSH format to access the HPC clusters. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4_metadata.json index 5fe2e81aa3d0..fba810e72990 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4_metadata.json @@ -5,7 +5,7 @@ "directory": "account", "parent_title": "Getting-ready-to-request-an-account", "previous_title": "account_windows_paragraph_4.3", - "next_title": "account_windows_paragraph_4.5", + "next_title": "account_paragraph_5", "OS": "windows", "reference_link": "https://docs.hpc.ugent.be/Windows/account/#generating-a-publicprivate-key-pair" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.5.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.5.txt deleted file mode 100644 index 2326d87b6d74..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.5.txt +++ /dev/null @@ -1,7 +0,0 @@ -5. Save both the public and private keys in a folder on your personal - computer (We recommend to create and put them in the folder - "C:\\Users\\%USERNAME%\\AppData\\Local\\PuTTY\\.ssh") with the - buttons "Save public key" and "Save private key". We recommend using the name "id_rsa.pub" for the public key, and - "id_rsa.ppk" for the private key. -If you use another program to generate a key pair, please remember that -they need to be in the OpenSSH format to access the HPC clusters. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.5_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.5_metadata.json deleted file mode 100644 index 79c584a8f411..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.5_metadata.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Generating-a-publicprivate-key-pair", - "title_depth": 3, - "directory": "account", - "parent_title": "Getting-ready-to-request-an-account", - "previous_title": "account_windows_paragraph_4.4", - "next_title": "account_paragraph_5", - "OS": "windows", - "reference_link": "https://docs.hpc.ugent.be/Windows/account/#generating-a-publicprivate-key-pair" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1.txt index b43909c15c43..a4f00ba7a5ff 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1.txt @@ -9,3 +9,12 @@ To transfer your files using WinSCP, 1. Open the program 2. The "Login" menu is shown automatically (if it is closed, click "New Session" to open it again). Fill in the necessary fields under "Session" 1. Click "New Site". + 2. Enter "login.hpc.ugent.be" in the "Host name" field. + 3. Enter your "vsc-account" in the "User name" field. + 4. Select "SCP" as the "file" protocol. + 5. Note that the password field remains empty. + 6. Click "Advanced...". + 7. Click "SSH > Authentication". + 8. Select your private key in the field "Private key file". +3. Press the "Save" button, to save the session under "Session > Sites" for future access. +4. Finally, when clicking on "Login", you will be asked for your key passphrase. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.2.txt index 642bb4e34b66..82c71ac41299 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.2.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.2.txt @@ -1,12 +1,3 @@ - 2. Enter "login.hpc.ugent.be" in the "Host name" field. - 3. Enter your "vsc-account" in the "User name" field. - 4. Select "SCP" as the "file" protocol. - 5. Note that the password field remains empty. - 6. Click "Advanced...". - 7. Click "SSH > Authentication". - 8. Select your private key in the field "Private key file". -3. Press the "Save" button, to save the session under "Session > Sites" for future access. -4. Finally, when clicking on "Login", you will be asked for your key passphrase. The first time you make a connection to the login node, a Security Alert will appear and you will be asked to verify the authenticity of the login node. @@ -17,3 +8,4 @@ Make sure the fingerprint in the alert matches one of the following: - ssh-ed25519 256 SHA256:8AJg3lPN27y6i+um7rFx3xoy42U8ZgqNe4LsEycHILA - ssh-ecdsa 256 e6:d2:9c:d8:e7:59:45:03:4a:1f:dc:96:62:29:9c:5f - ssh-ecdsa 256 SHA256:C8TVx0w8UjGgCQfCmEUaOPxJGNMqv2PXLyBNODe5eOQ +If it does, press Yes, if it doesn't, please contact hpc@ugent.be. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.3.txt index b52c614f263e..c0ffe6b46021 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.3.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.3.txt @@ -1,4 +1,3 @@ -If it does, press Yes, if it doesn't, please contact hpc@ugent.be. Note: it is possible that the ssh-ed25519 fingerprint starts with ssh-ed25519 255 rather than ssh-ed25519 256 (or vice versa), depending on the PuTTY version you are using. It is safe to ignore this 255 versus 256 difference, but the part after should be diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt index 14f191fe61a7..b5ecfb93e889 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt @@ -1,11 +1,42 @@ First Time connection to the HPC infrastructure -ssh_exchange_identification: read: Connection reset by peer -First Time connection to the HPC infrastructure -The remaining content in this chapter is primarily focused for people utilizing a terminal with SSH. -If you are instead using the web portal, the corresponding chapter might be more helpful: Using the HPC-UGent web portal. -If you have any issues connecting to the HPC after you've followed these -steps, see Issues connecting to login node to troubleshoot. Open a Terminal You've generated a public/private key pair with PuTTYgen and have an approved account on the VSC clusters. The next step is to setup the connection to (one of) the HPC. +In the screenshots, we show the setup for user +to the HPC cluster via the login node "login.hpc.ugent.be". +1. Start the PuTTY executable putty.exe in your directory + C:\Program Files (x86)\PuTTY and the configuration screen will pop + up. As you will often use the PuTTY tool, we recommend adding a + shortcut on your desktop. +2. Within the category , in the field , enter the name of the + login node of the cluster (i.e., "login.hpc.ugent.be") you want to connect to. + +3. In the category "Connection > Data", in the field "Auto-login username", put in , which is your VSC + username that you have received by e-mail after your request was + approved. + +4. In the category "Connection > SSH > Auth", in the field "Private key file for authentication" click on "Browse" and select the private key + (i.e., "id_rsa.ppk") that you generated and saved above. +5. In the category "Connection > SSH > X11", click the "Enable X11 Forwarding" checkbox. +6. Now go back to , and fill in "hpcugent" in the "Saved Sessions" field and press "Save" to + store the session information. + +7. Now pressing "Open", will open a terminal window and asks for you + passphrase. + +8. If this is your first time connecting, you will be asked to verify + the authenticity of the login node. Please see + section Warning message when first connecting to new host + on how to do this. +9. After entering your correct passphrase, you will be connected to the + login-node of the HPC. +10. To check you can now "Print the Working Directory" (pwd) and check + the name of the computer, where you have logged in (hostname): + $ pwd + /user/home/gent/vsc400/vsc40000 + $ hostname -f + gligar07.gastly.os + +11. For future PuTTY sessions, just select your saved session (i.e. "hpcugent") + from the list, "Load" it and press "Open". diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json index 24d4df9e248d..ef4de8bd8e47 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json @@ -3,13 +3,12 @@ "subtitle": "Open-a-Terminal", "title_depth": 3, "directory": "connecting", - "parent_title": "Connection-restrictions", + "parent_title": "First-Time-connection-to-the-HPC-infrastructure", "links": { - "0": "https://docs.hpc.ugent.be/web_portal", - "1": "https://docs.hpc.ugent.be/connecting/../troubleshooting/#issues-connecting-to-login-node" + "0": "https://docs.hpc.ugent.be/connecting/../troubleshooting/#warning-message-when-first-connecting-to-new-host" }, "previous_title": "connecting_paragraph_3", - "next_title": "connecting_windows_paragraph_4.2", + "next_title": "connecting_paragraph_5", "OS": "windows", "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#open-a-terminal" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.2.txt deleted file mode 100644 index e481b47bc2b2..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.2.txt +++ /dev/null @@ -1,13 +0,0 @@ -In the screenshots, we show the setup for user -to the HPC cluster via the login node "login.hpc.ugent.be". -1. Start the PuTTY executable putty.exe in your directory - C:\Program Files (x86)\PuTTY and the configuration screen will pop - up. As you will often use the PuTTY tool, we recommend adding a - shortcut on your desktop. -2. Within the category , in the field , enter the name of the - login node of the cluster (i.e., "login.hpc.ugent.be") you want to connect to. - -3. In the category "Connection > Data", in the field "Auto-login username", put in , which is your VSC - username that you have received by e-mail after your request was - approved. - diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.2_metadata.json deleted file mode 100644 index a783f797fdbb..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.2_metadata.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Open-a-Terminal", - "title_depth": 3, - "directory": "connecting", - "parent_title": "Connection-restrictions", - "previous_title": "connecting_windows_paragraph_4.1", - "next_title": "connecting_windows_paragraph_4.3", - "OS": "windows", - "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#open-a-terminal" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.3.txt deleted file mode 100644 index fbd5e76f2788..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.3.txt +++ /dev/null @@ -1,13 +0,0 @@ -4. In the category "Connection > SSH > Auth", in the field "Private key file for authentication" click on "Browse" and select the private key - (i.e., "id_rsa.ppk") that you generated and saved above. -5. In the category "Connection > SSH > X11", click the "Enable X11 Forwarding" checkbox. -6. Now go back to , and fill in "hpcugent" in the "Saved Sessions" field and press "Save" to - store the session information. - -7. Now pressing "Open", will open a terminal window and asks for you - passphrase. - -8. If this is your first time connecting, you will be asked to verify - the authenticity of the login node. Please see - section Warning message when first connecting to new host - on how to do this. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.3_metadata.json deleted file mode 100644 index 9da459060afd..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.3_metadata.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Open-a-Terminal", - "title_depth": 3, - "directory": "connecting", - "parent_title": "Connection-restrictions", - "links": { - "0": "https://docs.hpc.ugent.be/connecting/../troubleshooting/#warning-message-when-first-connecting-to-new-host" - }, - "previous_title": "connecting_windows_paragraph_4.2", - "next_title": "connecting_windows_paragraph_4.4", - "OS": "windows", - "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#open-a-terminal" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.4.txt deleted file mode 100644 index f4a1302750b9..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.4.txt +++ /dev/null @@ -1,11 +0,0 @@ -9. After entering your correct passphrase, you will be connected to the - login-node of the HPC. -10. To check you can now "Print the Working Directory" (pwd) and check - the name of the computer, where you have logged in (hostname): - $ pwd - /user/home/gent/vsc400/vsc40000 - $ hostname -f - gligar07.gastly.os - -11. For future PuTTY sessions, just select your saved session (i.e. "hpcugent") - from the list, "Load" it and press "Open". diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.4_metadata.json deleted file mode 100644 index 83127a292f84..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.4_metadata.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Open-a-Terminal", - "title_depth": 3, - "directory": "connecting", - "parent_title": "Connection-restrictions", - "previous_title": "connecting_windows_paragraph_4.3", - "next_title": "connecting_paragraph_5", - "OS": "windows", - "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#open-a-terminal" -} \ No newline at end of file From 3407be3ea8b45de9d43e91fda8c4730ab0ae34e2 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 28 Aug 2024 13:43:49 +0200 Subject: [PATCH 131/145] adapted to the new source files --- .../chatbot_parser.py | 11 ++----- .../generic/account/account_paragraph_10.txt | 1 - .../compiling_your_software_paragraph_1.txt | 10 ++++++ .../compiling_your_software_paragraph_10.txt | 19 +++++++++++ ...g_your_software_paragraph_10_metadata.json | 11 +++++++ .../compiling_your_software_paragraph_11.txt | 20 ++++++++++++ ...g_your_software_paragraph_11_metadata.json | 11 +++++++ .../compiling_your_software_paragraph_12.txt | 9 ++++++ ...g_your_software_paragraph_12_metadata.json | 11 +++++++ ...ng_your_software_paragraph_1_metadata.json | 11 +++++++ .../compiling_your_software_paragraph_2.txt | 13 ++++++++ ...ng_your_software_paragraph_2_metadata.json | 11 +++++++ .../compiling_your_software_paragraph_3.txt | 13 ++++++++ ...ng_your_software_paragraph_3_metadata.json | 11 +++++++ .../compiling_your_software_paragraph_4.txt | 15 +++++++++ ...ng_your_software_paragraph_4_metadata.json | 11 +++++++ .../compiling_your_software_paragraph_5.txt | 16 ++++++++++ ...ng_your_software_paragraph_5_metadata.json | 11 +++++++ .../compiling_your_software_paragraph_6.txt | 30 +++++++++++++++++ ...ng_your_software_paragraph_6_metadata.json | 14 ++++++++ .../compiling_your_software_paragraph_7.txt | 15 +++++++++ ...ng_your_software_paragraph_7_metadata.json | 11 +++++++ .../compiling_your_software_paragraph_8.txt | 19 +++++++++++ ...ng_your_software_paragraph_8_metadata.json | 11 +++++++ .../compiling_your_software_paragraph_9.txt | 32 +++++++++++++++++++ ...ng_your_software_paragraph_9_metadata.json | 11 +++++++ .../account/account_linux_paragraph_5.3.txt | 2 +- .../account/account_linux_paragraph_5.4.txt | 7 +--- .../account/account_linux_paragraph_7.1.txt | 2 +- .../account/account_linux_paragraph_7.2.txt | 2 +- .../connecting_linux_paragraph_10.1.txt | 10 +++--- .../connecting_linux_paragraph_12.4.txt | 2 +- .../connecting_linux_paragraph_12.5.txt | 2 +- .../connecting_linux_paragraph_12.6.txt | 2 +- .../connecting_linux_paragraph_5.1.txt | 2 +- .../connecting_linux_paragraph_5.2.txt | 2 +- .../account/account_macos_paragraph_5.3.txt | 2 +- .../account/account_macos_paragraph_5.4.txt | 7 +--- .../account/account_macos_paragraph_7.1.txt | 2 +- .../account/account_macos_paragraph_7.2.txt | 2 +- .../connecting_macos_paragraph_10.1.txt | 10 +++--- .../connecting_macos_paragraph_12.4.txt | 2 +- .../connecting_macos_paragraph_12.5.txt | 2 +- .../connecting_macos_paragraph_12.6.txt | 2 +- .../connecting_macos_paragraph_5.1.txt | 2 +- .../connecting_macos_paragraph_5.2.txt | 2 +- .../connecting_windows_paragraph_4.1.txt | 3 +- 47 files changed, 376 insertions(+), 51 deletions(-) create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_10.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_10_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_11.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_11_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_12.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_12_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_6.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_6_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_7.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_7_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_8.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_8_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_9.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_9_metadata.json diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 6ec1aa0e9b23..2b23fb4e962c 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -195,10 +195,6 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title): elif re.fullmatch(r'!--.*?--', content): curr_line = re.sub(r'<.*?>', "", curr_line) - # special case (ugly fix) - elif ' files', "", curr_line) - # keep the rest else: pass @@ -224,7 +220,7 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title): for i, content in enumerate(asterisks): curr_line = re.sub(r"(\*+)" + content[1] + r"\1", content[1], curr_line) - pluses = re.findall(r'\+\+(.+?)\+\+', curr_line) + pluses = list(set(re.findall(r'\+\+([^ ]+?)\+\+', curr_line) + re.findall(r'\+\+(".+?")\+\+', curr_line))) if pluses: for i, content in enumerate(pluses): curr_line = re.sub(r"\+\+" + content + r"\+\+", content, curr_line) @@ -437,6 +433,7 @@ def split_on_paragraphs(file, main_title, options, current_paragraph_number=-1, # detect whether the current line is in a list if re.search(r'^(\s*)([*+-]|\d+\.|[a-zA-Z]\.)\s+.*$', line): # beginning of a list entry in_list = True + # print("List entry found") elif re.search(r'^\s{2,}.+$', line) and in_list: # middle of a list entry pass elif re.search(r'^(\s*)([*+-]|\d+\.|[a-zA-Z]\.)\s+.*$|^\s{2,}.+$|^\n', nxt) and in_list: # line(s) between list entries @@ -444,9 +441,6 @@ def split_on_paragraphs(file, main_title, options, current_paragraph_number=-1, else: in_list = False - if in_list: - print(line[:-1]) - # only split up if current line is in a fully non-os-specific section if in_if_statement == 0: @@ -969,6 +963,7 @@ def main(options, verbose=True): # for loops over all files for filename in filenames.keys(): + print("Processing " + filename) ################### define/reset loop specific variables ################### # boolean indicating whether the current file is part of the linux tutorial diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10.txt index 371dd9db52b4..f486b9b13489 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10.txt @@ -10,7 +10,6 @@ https://account.vscentrum.be/ For further info please visit https://www.vscentrum.be/user-portal Kind regards, -- The VSC administrators - Now, you can start using the HPC. You can always look up your VSC id later by visiting . Adding multiple SSH public keys (optional) diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1.txt new file mode 100644 index 000000000000..db1afd43e680 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1.txt @@ -0,0 +1,10 @@ +Compiling and testing your software on the HPC +All nodes in the HPC cluster are running the "RHEL 8.8 (accelgor, doduo, donphan, gallade, joltik, skitty)" +Operating system, which is a specific version of Red Hat Enterprise Linux. This means that all the +software programs +(executable) that the end-user wants to run on the HPC first must be +compiled for RHEL 8.8 (accelgor, doduo, donphan, gallade, joltik, skitty). It also means that you first have to install all the +required external software packages on the HPC. +Most commonly used compilers are already pre-installed on the HPC and can be +used straight away. Also, many popular external software packages, which +are regularly used in the scientific community, are also pre-installed. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_10.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_10.txt new file mode 100644 index 000000000000..d49ba76b01aa --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_10.txt @@ -0,0 +1,19 @@ +The "mpi_hello.c" program is a simple source file, written in C with MPI +library calls. +Then, check the command line options for *"mpicc" (GNU C-Compiler with +MPI extensions)*, then we compile and list the contents of the directory +again: +mpicc --help +mpicc -o mpihello mpihello.c +ls -l +A new file "hello" has been created. Note that this program has +"execute" rights. +Let's test this program on the "login" node first: +$ ./mpihello +Hello World from Node 0. +It seems to work, now run it on the HPC. +qsub mpihello.pbs +Compiling a parallel program in Intel Parallel Studio Cluster Edition +We will now compile the same program, but using the Intel Parallel +Studio Cluster Edition compilers. We stay in the examples directory for +this chapter: diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_10_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_10_metadata.json new file mode 100644 index 000000000000..ca0d7d806690 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_10_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "compiling_your_software", + "subtitle": "Compiling-a-parallel-program-in-Intel-Parallel-Studio-Cluster-Edition", + "title_depth": 3, + "directory": "compiling_your_software", + "parent_title": "", + "previous_title": "compiling_your_software_paragraph_9", + "next_title": "compiling_your_software_paragraph_11", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-a-parallel-program-in-intel-parallel-studio-cluster-edition" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_11.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_11.txt new file mode 100644 index 000000000000..be02d069ac7f --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_11.txt @@ -0,0 +1,20 @@ +cd ~/examples/Compiling-and-testing-your-software-on-the-HPC +We will compile this C/MPI -file into an executable with the Intel +Parallel Studio Cluster Edition. First, clear the modules (purge) and +then load the latest "intel" module: +module purge +module load intel +Then, compile and list the contents of the directory again. The Intel +equivalent of mpicc is mpiicc. +mpiicc -o mpihello mpihello.c +ls -l +Note that the old "mpihello" file has been overwritten. Let's test this +program on the "login" node first: +$ ./mpihello +Hello World from Node 0. +It seems to work, now run it on the HPC. +qsub mpihello.pbs +Note: The AUGent only has a license for the Intel Parallel Studio Cluster +Edition for a fixed number of users. As such, it might happen that you +have to wait a few minutes before a floating license becomes available +for your use. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_11_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_11_metadata.json new file mode 100644 index 000000000000..808331a3f9d7 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_11_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "compiling_your_software", + "subtitle": "Compiling-a-parallel-program-in-Intel-Parallel-Studio-Cluster-Edition", + "title_depth": 3, + "directory": "compiling_your_software", + "parent_title": "", + "previous_title": "compiling_your_software_paragraph_10", + "next_title": "compiling_your_software_paragraph_12", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-a-parallel-program-in-intel-parallel-studio-cluster-edition" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_12.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_12.txt new file mode 100644 index 000000000000..1d37014a4263 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_12.txt @@ -0,0 +1,9 @@ +Note: The Intel Parallel Studio Cluster Edition contains equivalent +compilers for all GNU compilers. Hereafter the overview for C, C++ and +Fortran compilers. +| | Sequential Program | | **Parallel Program (with MPI)** | | +|-------------|------------------------|-----------|---------------------------------|-----------| +| | GNU | Intel | GNU | Intel | +| C | gcc | icc | mpicc | mpiicc | +| **C++** | g++ | icpc | mpicxx | mpiicpc | +| Fortran | gfortran | ifort | mpif90 | mpiifort | \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_12_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_12_metadata.json new file mode 100644 index 000000000000..d032428daf16 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_12_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "compiling_your_software", + "subtitle": "Compiling-a-parallel-program-in-Intel-Parallel-Studio-Cluster-Edition", + "title_depth": 3, + "directory": "compiling_your_software", + "parent_title": "", + "previous_title": "compiling_your_software_paragraph_11", + "next_title": null, + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-a-parallel-program-in-intel-parallel-studio-cluster-edition" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1_metadata.json new file mode 100644 index 000000000000..ec4b55c9a4df --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "compiling_your_software", + "subtitle": "Compiling-and-testing-your-software-on-the-HPC", + "title_depth": 1, + "directory": "compiling_your_software", + "parent_title": "", + "previous_title": null, + "next_title": "compiling_your_software_paragraph_2", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-and-testing-your-software-on-the-hpc" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2.txt new file mode 100644 index 000000000000..b52639b649d9 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2.txt @@ -0,0 +1,13 @@ +Check the pre-installed software on the HPC +In order to check all the available modules and their version numbers, +which are pre-installed on the HPC enter: +When your required application is not available on the HPC please contact +any HPC member. Be aware of potential "License Costs". "Open Source" +software is often preferred. +Porting your code +To port a software-program is to translate it from the operating system in +which it was developed (e.g., Windows 7) to another operating system +(e.g., Red Hat Enterprise Linux on our HPC) so that it can be used there. Porting implies some +degree of effort, but not nearly as much as redeveloping the program in +the new environment. It all depends on how "portable" you wrote your +code. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2_metadata.json new file mode 100644 index 000000000000..00750c81d976 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "compiling_your_software", + "subtitle": "Porting-your-code", + "title_depth": 2, + "directory": "compiling_your_software", + "parent_title": "", + "previous_title": "compiling_your_software_paragraph_1", + "next_title": "compiling_your_software_paragraph_3", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#porting-your-code" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3.txt new file mode 100644 index 000000000000..f994f0bc1482 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3.txt @@ -0,0 +1,13 @@ +In the simplest case the file or files may simply be copied from one +machine to the other. However, in many cases the software is installed +on a computer in a way, which depends upon its detailed hardware, +software, and setup, with device drivers for particular devices, using +installed operating system and supporting software components, and using +different directories. +In some cases software, usually described as "portable software" is +specifically designed to run on different computers with compatible +operating systems and processors without any machine-dependent +installation; it is sufficient to transfer specified directories and +their contents. Hardware- and software-specific information is often +stored in configuration files in specified locations (e.g., the registry +on machines running MS Windows). diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3_metadata.json new file mode 100644 index 000000000000..90e7d236beb6 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "compiling_your_software", + "subtitle": "Porting-your-code", + "title_depth": 2, + "directory": "compiling_your_software", + "parent_title": "", + "previous_title": "compiling_your_software_paragraph_2", + "next_title": "compiling_your_software_paragraph_4", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#porting-your-code" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4.txt new file mode 100644 index 000000000000..f7bf4172b71d --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4.txt @@ -0,0 +1,15 @@ +Software, which is not portable in this sense, will have to be +transferred with modifications to support the environment on the +destination machine. +Whilst programming, it would be wise to stick to certain standards +(e.g., ISO/ANSI/POSIX). This will ease the porting of your code to other +platforms. +Porting your code to the RHEL 8.8 (accelgor, doduo, donphan, gallade, joltik, skitty) platform is the responsibility of the end-user. +Compiling and building on the HPC +Compiling refers to the process of translating code written in some +programming language, e.g., Fortran, C, or C++, to machine code. +Building is similar, but includes gluing together the machine code +resulting from different source files into an executable (or library). +The text below guides you through some basic problems typical for small +software projects. For larger projects it is more appropriate to use +makefiles or even an advanced build system like CMake. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4_metadata.json new file mode 100644 index 000000000000..b7c9ef0f71b9 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "compiling_your_software", + "subtitle": "Compiling-and-building-on-the-HPC", + "title_depth": 2, + "directory": "compiling_your_software", + "parent_title": "", + "previous_title": "compiling_your_software_paragraph_3", + "next_title": "compiling_your_software_paragraph_5", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-and-building-on-the-hpc" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5.txt new file mode 100644 index 000000000000..342262b92640 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5.txt @@ -0,0 +1,16 @@ +All the HPC nodes run the same version of the Operating System, i.e. RHEL 8.8 (accelgor, doduo, donphan, gallade, joltik, skitty). So, +it is sufficient to compile your program on any compute node. Once you +have generated an executable with your compiler, this executable should +be able to run on any other compute-node. +A typical process looks like: +1. Copy your software to the login-node of the HPC +2. Start an interactive session on a compute node; +3. Compile it; +4. Test it locally; +5. Generate your job scripts; +6. Test it on the HPC +7. Run it (in parallel); +We assume you've copied your software to the HPC. The next step is to request +your private compute node. +$ qsub -I +qsub: waiting for job 123456 to start diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5_metadata.json new file mode 100644 index 000000000000..02a8fad0ae2b --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "compiling_your_software", + "subtitle": "Compiling-and-building-on-the-HPC", + "title_depth": 2, + "directory": "compiling_your_software", + "parent_title": "", + "previous_title": "compiling_your_software_paragraph_4", + "next_title": "compiling_your_software_paragraph_6", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-and-building-on-the-hpc" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_6.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_6.txt new file mode 100644 index 000000000000..7ebde6648789 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_6.txt @@ -0,0 +1,30 @@ +Compiling a sequential program in C +Go to the examples for chapter +Compiling and testing your software on the HPC and load the +foss module: +cd ~/examples/Compiling-and-testing-your-software-on-the-HPC +module load foss +We now list the directory and explore the contents of the "hello.c" +program: +$ ls -l +total 512 +-rw-r--r-- 1 vsc40000 214 Sep 16 09:42 hello.c +-rw-r--r-- 1 vsc40000 130 Sep 16 11:39 hello.pbs* +-rw-r--r-- 1 vsc40000 359 Sep 16 13:55 mpihello.c +-rw-r--r-- 1 vsc40000 304 Sep 16 13:55 mpihello.pbs +/* + * VSC : Flemish Supercomputing Centre + * Tutorial : Introduction to HPC + * Description: Print 500 numbers, whilst waiting 1 second in between + */ +#include "stdio.h" +int main( int argc, char *argv[] ) +{ + int i; + for (i=0; i<500; i++) + { + printf("Hello #%d\n", i); + fflush(stdout); + sleep(1); + } +} diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_6_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_6_metadata.json new file mode 100644 index 000000000000..16942249583b --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_6_metadata.json @@ -0,0 +1,14 @@ +{ + "main_title": "compiling_your_software", + "subtitle": "Compiling-a-sequential-program-in-C", + "title_depth": 3, + "directory": "compiling_your_software", + "links": { + "0": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-and-building-on-the-hpc" + }, + "parent_title": "", + "previous_title": "compiling_your_software_paragraph_5", + "next_title": "compiling_your_software_paragraph_7", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-a-sequential-program-in-c" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_7.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_7.txt new file mode 100644 index 000000000000..1d58d0d6ae4e --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_7.txt @@ -0,0 +1,15 @@ +The "hello.c" program is a simple source file, written in C. It'll print +500 times "Hello #<num>", and waits one second between 2 printouts. +We first need to compile this C-file into an executable with the +gcc-compiler. +First, check the command line options for *"gcc" (GNU C-Compiler)*, then +we compile. the O2 option enables a moderate level of optimization when compiling the code. +It instructs the compiler to optimize the code for better performance without significantly increasing compilation time. +Finally, list the contents of the directory again: +$ gcc -help +$ gcc -O2 -o hello hello.c +$ ls -l +total 512 +-rwxrwxr-x 1 vsc40000 7116 Sep 16 11:43 hello* +-rw-r--r-- 1 vsc40000 214 Sep 16 09:42 hello.c +-rwxr-xr-x 1 vsc40000 130 Sep 16 11:39 hello.pbs* diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_7_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_7_metadata.json new file mode 100644 index 000000000000..e5f3161c3f28 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_7_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "compiling_your_software", + "subtitle": "Compiling-a-sequential-program-in-C", + "title_depth": 3, + "directory": "compiling_your_software", + "parent_title": "", + "previous_title": "compiling_your_software_paragraph_6", + "next_title": "compiling_your_software_paragraph_8", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-a-sequential-program-in-c" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_8.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_8.txt new file mode 100644 index 000000000000..5ca5de1e6d44 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_8.txt @@ -0,0 +1,19 @@ +A new file "hello" has been created. Note that this file has "execute" +rights, i.e., it is an executable. More often than not, calling gcc -- +or any other compiler for that matter -- will provide you with a list of +errors and warnings referring to mistakes the programmer made, such as +typos, syntax errors. You will have to correct them first in order to +make the code compile. Warnings pinpoint less crucial issues that may +relate to performance problems, using unsafe or obsolete language +features, etc. It is good practice to remove all warnings from a +compilation process, even if they seem unimportant so that a code change +that produces a warning does not go unnoticed. +Let's test this program on the local compute node, which is at your +disposal after the qsub --I command: +$ ./hello +Hello #0 +Hello #1 +Hello #2 +Hello #3 +Hello #4 +... diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_8_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_8_metadata.json new file mode 100644 index 000000000000..942949951d1c --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_8_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "compiling_your_software", + "subtitle": "Compiling-a-sequential-program-in-C", + "title_depth": 3, + "directory": "compiling_your_software", + "parent_title": "", + "previous_title": "compiling_your_software_paragraph_7", + "next_title": "compiling_your_software_paragraph_9", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-a-sequential-program-in-c" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_9.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_9.txt new file mode 100644 index 000000000000..28982d2bd95a --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_9.txt @@ -0,0 +1,32 @@ +It seems to work, now run it on the HPC +qsub hello.pbs +Compiling a parallel program in C/MPI +cd ~/examples/Compiling-and-testing-your-software-on-the-HPC +List the directory and explore the contents of the "mpihello.c" +program: +$ ls -l +total 512 +total 512 +-rw-r--r-- 1 vsc40000 214 Sep 16 09:42 hello.c +-rw-r--r-- 1 vsc40000 130 Sep 16 11:39 hello.pbs* +-rw-r--r-- 1 vsc40000 359 Sep 16 13:55 mpihello.c +-rw-r--r-- 1 vsc40000 304 Sep 16 13:55 mpihello.pbs +/* + * VSC : Flemish Supercomputing Centre + * Tutorial : Introduction to HPC + * Description: Example program, to compile with MPI + */ +#include +#include +main(int argc, char **argv) +{ + int node, i, j; + float f; + MPI_Init(&argc,&argv); + MPI_Comm_rank(MPI_COMM_WORLD, &node); + + printf("Hello World from Node %d.\n", node); + for (i=0; i<=100000; i++) + f=i*2.718281828*i+i+i*3.141592654; + MPI_Finalize(); +} diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_9_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_9_metadata.json new file mode 100644 index 000000000000..fe51e423a96c --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_9_metadata.json @@ -0,0 +1,11 @@ +{ + "main_title": "compiling_your_software", + "subtitle": "Compiling-a-parallel-program-in-CMPI", + "title_depth": 3, + "directory": "compiling_your_software", + "parent_title": "", + "previous_title": "compiling_your_software_paragraph_8", + "next_title": "compiling_your_software_paragraph_10", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-a-parallel-program-in-cmpi" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3.txt index 9d84f459724c..5df90a3dd7c0 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3.txt @@ -2,7 +2,7 @@ Generate a public/private key pair with OpenSSH A key pair might already be present in the default location inside your home directory. Therefore, we first check if a key is available with the "list short" ("ls") command: -$ ls ~/.ssh +ls ~/.ssh If a key-pair is already available, you would normally get: authorized_keys id_rsa id_rsa.pub known_hosts Otherwise, the command will show: diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4.txt index 3cde4395d81f..d29d61d27d98 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4.txt @@ -10,9 +10,4 @@ the passphrase. Be sure to never give away your private key, it is private and should stay private. You should not even copy it to one of your other machines, instead, you should create a new public/private key pair for each machine. -$ ssh-keygen -t rsa -b 4096 -Generating public/private rsa key pair. Enter file in which to save the -key (/home/user/.ssh/id_rsa): Enter passphrase (empty for no -passphrase): Enter same passphrase again: Your identification has been -saved in /home/user/.ssh/id_rsa. Your public key has been saved in -/home/user/.ssh/id_rsa.pub. +ssh-keygen -t rsa -b 4096 diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1.txt index e3ef2176f09e..8e8429c16422 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1.txt @@ -7,7 +7,7 @@ Agent admitted failure to sign using the key. Permission denied (publickey,gssapi-keyex,gssapi-with-mic). This could be fixed using the ssh-add command. You can include the new private keys' identities in your keyring with: -$ ssh-add +ssh-add tip Without extra options ssh-add adds any key located at $HOME/.ssh directory, but you can specify the private key location path as diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2.txt index 93019fa1a6aa..c227dbbb6e2f 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2.txt @@ -1,5 +1,5 @@ Check that your key is available from the keyring with: -$ ssh-add -l +ssh-add -l After these changes the key agent will keep your SSH key to connect to the clusters as usual. tip diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1.txt index 3e588c709d44..4c8894438c9f 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1.txt @@ -14,7 +14,6 @@ First Time connection to the HPC infrastructure export LANG="en_US.UTF-8" ... - tip "tip: vi" To start entering text in vi: move to the place you want to start entering text with the arrow keys and type "i" to switch to insert mode. You can easily exit vi by entering: ""ESC" :wq" @@ -24,11 +23,10 @@ First Time connection to the HPC infrastructure or alternatively (if you are not comfortable with the Linux editors), again on your local machine: - $ echo "export LANGUAGE=\"en_US.UTF-8\"" >> ~/.profile - $ echo "export LC_ALL=\"en_US.UTF-8\"" >> ~/.profile - $ echo "export LC_CTYPE=\"en_US.UTF-8\"" >> ~/.profile - $ echo "export LANG=\"en_US.UTF-8\"" >> ~/.profile - + echo "export LANGUAGE=\"en_US.UTF-8\"" >> ~/.profile + echo "export LC_ALL=\"en_US.UTF-8\"" >> ~/.profile + echo "export LC_CTYPE=\"en_US.UTF-8\"" >> ~/.profile + echo "export LANG=\"en_US.UTF-8\"" >> ~/.profile You can now log out, open a new terminal/shell on your local machine and reconnect to the login node, and you should not get these warnings anymore. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.4.txt index 2664953ed0ce..d09b69552ef7 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.4.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.4.txt @@ -11,4 +11,4 @@ It's also possible to copy entire directories (and their contents) with the -r flag. For example, if we want to copy the local directory dataset to $VSC_SCRATCH, we can use the following command (assuming you've created the scratch symlink): -$ scp -r dataset vsc40000@login.hpc.ugent.be:scratch +scp -r dataset vsc40000@login.hpc.ugent.be:scratch diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.5.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.5.txt index 51d39b548c3a..532d57bb4a58 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.5.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.5.txt @@ -1,6 +1,6 @@ If you don't use the -r option to copy a directory, you will run into the following error: -$ scp -r dataset vsc40000@login.hpc.ugent.be:scratch +$ scp dataset vsc40000@login.hpc.ugent.be:scratch dataset: not a regular file Using sftp The SSH File Transfer Protocol (also Secure File Transfer Protocol, or SFTP) is a network protocol that provides file access, file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.6.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.6.txt index 4ae257101f16..1ef13b80c6f0 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.6.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.6.txt @@ -1,5 +1,5 @@ One easy way of starting a sftp session is -$ sftp vsc40000@login.hpc.ugent.be +sftp vsc40000@login.hpc.ugent.be Typical and popular commands inside an sftp session are: | | | |:--------------------------|:-------------------------------------------------------------------------------------| diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1.txt index 94d5d9500a3c..27ae3fb7bd45 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1.txt @@ -1,7 +1,7 @@ First Time connection to the HPC infrastructure Connect Open up a terminal and enter the following command to connect to the HPC. -$ ssh vsc40000@login.hpc.ugent.be +ssh vsc40000@login.hpc.ugent.be Here, user vsc40000 wants to make a connection to the "hpcugent" cluster at UGent via the login node "login.hpc.ugent.be", so replace vsc40000 with your own VSC id in the above command. The first time you make a connection to the login node, you will be diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2.txt index 312fe885cb0c..be01e09bba0f 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2.txt @@ -1,4 +1,4 @@ Permission denied (publickey,gssapi-keyex,gssapi-with-mic). In this case, use the -i option for the ssh command to specify the location of your private key. For example: -$ ssh -i /home/example/my_keys +ssh -i /home/example/my_keys diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3.txt index 2c97d5974257..a8c087f818b3 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3.txt @@ -6,7 +6,7 @@ Generate a public/private key pair with OpenSSH A key pair might already be present in the default location inside your home directory. Therefore, we first check if a key is available with the "list short" ("ls") command: -$ ls ~/.ssh +ls ~/.ssh If a key-pair is already available, you would normally get: authorized_keys id_rsa id_rsa.pub known_hosts Otherwise, the command will show: diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4.txt index 3cde4395d81f..d29d61d27d98 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4.txt @@ -10,9 +10,4 @@ the passphrase. Be sure to never give away your private key, it is private and should stay private. You should not even copy it to one of your other machines, instead, you should create a new public/private key pair for each machine. -$ ssh-keygen -t rsa -b 4096 -Generating public/private rsa key pair. Enter file in which to save the -key (/home/user/.ssh/id_rsa): Enter passphrase (empty for no -passphrase): Enter same passphrase again: Your identification has been -saved in /home/user/.ssh/id_rsa. Your public key has been saved in -/home/user/.ssh/id_rsa.pub. +ssh-keygen -t rsa -b 4096 diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1.txt index d204f4e43920..1069ebd9fbd3 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1.txt @@ -7,7 +7,7 @@ Agent admitted failure to sign using the key. Permission denied (publickey,gssapi-keyex,gssapi-with-mic). This could be fixed using the ssh-add command. You can include the new private keys' identities in your keyring with: -$ ssh-add +ssh-add tip Without extra options ssh-add adds any key located at $HOME/.ssh directory, but you can specify the private key location path as diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2.txt index 8fd93f6b4f60..c880ee4a228d 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2.txt @@ -1,5 +1,5 @@ Check that your key is available from the keyring with: -$ ssh-add -l +ssh-add -l After these changes the key agent will keep your SSH key to connect to the clusters as usual. tip diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1.txt index 3e588c709d44..4c8894438c9f 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1.txt @@ -14,7 +14,6 @@ First Time connection to the HPC infrastructure export LANG="en_US.UTF-8" ... - tip "tip: vi" To start entering text in vi: move to the place you want to start entering text with the arrow keys and type "i" to switch to insert mode. You can easily exit vi by entering: ""ESC" :wq" @@ -24,11 +23,10 @@ First Time connection to the HPC infrastructure or alternatively (if you are not comfortable with the Linux editors), again on your local machine: - $ echo "export LANGUAGE=\"en_US.UTF-8\"" >> ~/.profile - $ echo "export LC_ALL=\"en_US.UTF-8\"" >> ~/.profile - $ echo "export LC_CTYPE=\"en_US.UTF-8\"" >> ~/.profile - $ echo "export LANG=\"en_US.UTF-8\"" >> ~/.profile - + echo "export LANGUAGE=\"en_US.UTF-8\"" >> ~/.profile + echo "export LC_ALL=\"en_US.UTF-8\"" >> ~/.profile + echo "export LC_CTYPE=\"en_US.UTF-8\"" >> ~/.profile + echo "export LANG=\"en_US.UTF-8\"" >> ~/.profile You can now log out, open a new terminal/shell on your local machine and reconnect to the login node, and you should not get these warnings anymore. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.4.txt index 2664953ed0ce..d09b69552ef7 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.4.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.4.txt @@ -11,4 +11,4 @@ It's also possible to copy entire directories (and their contents) with the -r flag. For example, if we want to copy the local directory dataset to $VSC_SCRATCH, we can use the following command (assuming you've created the scratch symlink): -$ scp -r dataset vsc40000@login.hpc.ugent.be:scratch +scp -r dataset vsc40000@login.hpc.ugent.be:scratch diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.5.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.5.txt index 51d39b548c3a..532d57bb4a58 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.5.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.5.txt @@ -1,6 +1,6 @@ If you don't use the -r option to copy a directory, you will run into the following error: -$ scp -r dataset vsc40000@login.hpc.ugent.be:scratch +$ scp dataset vsc40000@login.hpc.ugent.be:scratch dataset: not a regular file Using sftp The SSH File Transfer Protocol (also Secure File Transfer Protocol, or SFTP) is a network protocol that provides file access, file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.6.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.6.txt index 4ae257101f16..1ef13b80c6f0 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.6.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.6.txt @@ -1,5 +1,5 @@ One easy way of starting a sftp session is -$ sftp vsc40000@login.hpc.ugent.be +sftp vsc40000@login.hpc.ugent.be Typical and popular commands inside an sftp session are: | | | |:--------------------------|:-------------------------------------------------------------------------------------| diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1.txt index d4c89b7e1c72..1e22cfc8b1f5 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1.txt @@ -2,7 +2,7 @@ First Time connection to the HPC infrastructure Connect Open up a terminal and enter the following command to connect to the HPC. You can open a terminal by navigation to Applications and then Utilities in the finder and open Terminal.app, or enter Terminal in Spotlight Search. -$ ssh vsc40000@login.hpc.ugent.be +ssh vsc40000@login.hpc.ugent.be Here, user vsc40000 wants to make a connection to the "hpcugent" cluster at UGent via the login node "login.hpc.ugent.be", so replace vsc40000 with your own VSC id in the above command. The first time you make a connection to the login node, you will be diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2.txt index 6fa418464dd6..f3f5ac6e7754 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2.txt @@ -4,4 +4,4 @@ private key somewhere else than the default location Permission denied (publickey,gssapi-keyex,gssapi-with-mic). In this case, use the -i option for the ssh command to specify the location of your private key. For example: -$ ssh -i /home/example/my_keys +ssh -i /home/example/my_keys diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt index b5ecfb93e889..69db57957dce 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt @@ -33,10 +33,9 @@ to the HPC cluster via the login node "login.hpc.ugent.be". login-node of the HPC. 10. To check you can now "Print the Working Directory" (pwd) and check the name of the computer, where you have logged in (hostname): - $ pwd + $ pwd /user/home/gent/vsc400/vsc40000 $ hostname -f gligar07.gastly.os - 11. For future PuTTY sessions, just select your saved session (i.e. "hpcugent") from the list, "Load" it and press "Open". From 6d04bbc7656406eaa1d00e0386ecfea76848b5e7 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 28 Aug 2024 15:45:41 +0200 Subject: [PATCH 132/145] add source-directory to metadata and verbose mode --- .../chatbot_parser.py | 34 +++++++++++-------- .../tps1/tps1_paragraph_1_metadata.json | 1 + .../tps1/tps1_paragraph_3_metadata.json | 1 + .../tps1_linux_paragraph_2.1_metadata.json | 1 + .../tps1_linux_paragraph_2.2_metadata.json | 1 + .../tps1_macos_paragraph_2.1_metadata.json | 1 + .../tps1_macos_paragraph_2.2_metadata.json | 1 + .../tps1_windows_paragraph_2.1_metadata.json | 1 + .../tps1_windows_paragraph_2.2_metadata.json | 1 + .../Subtitle-1/Subtitle-1_metadata.json | 1 + .../Subtitle-5-g/Subtitle-5-g_metadata.json | 1 + .../Subtitle-2-g/Subtitle-2-g_metadata.json | 1 + .../Subtitle-4-l&m_metadata.json | 1 + .../Subtitle-2-g/Subtitle-2-g_metadata.json | 1 + .../Subtitle-4-l&m_metadata.json | 1 + .../Subtitle-2-g/Subtitle-2-g_metadata.json | 1 + .../Subtitle-3-w/Subtitle-3-w_metadata.json | 1 + .../tests/test_full_script.py | 8 +++-- .../tests/test_write_metadata.py | 12 +++---- 19 files changed, 47 insertions(+), 23 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 2b23fb4e962c..e4ed00096549 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -21,6 +21,7 @@ INCLUDE_LINKS_IN_PLAINTEXT = "INCLUDE_LINKS_IN_PLAINTEXT" SPLIT_ON_PARAGRAPHS = "SPLIT_ON_PARAGRAPHS" DEEP_DIRECTORIES = "DEEP_DIRECTORIES" +VERBOSE = "VERBOSE" # directories PARSED_MDS = "parsed_mds" @@ -67,6 +68,7 @@ WRITE_TEXT_AND_CHECK_EXTRA_MESSAGE = "write_text_and_check_extra_message" # Metadata attributes +SOURCE_FILE = "source_file" MAIN_TITLE = "main_title" SUBTITLE = "subtitle" TITLE_DEPTH = "title_depth" @@ -207,7 +209,7 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title): if '???' in curr_line: curr_line = re.sub(r'\?\?\?', "", curr_line) - # get rid of other markdown indicators (`, *, +, _) + # get rid of other indicators (`, *, +, _) if not in_code_block: backquotes = re.findall(r'`(.*?)`', curr_line) @@ -320,7 +322,7 @@ def split_on_titles(file, main_title, options): paragraphs_os_free_text[title] = current_paragraph # write metadata of previous file - paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, last_dir) + paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, last_dir, options[SOURCE_DIRECTORY] + '/' + main_title + '.md') # make a new title title = make_valid_title(line[title_level + 1:-1]) @@ -357,7 +359,7 @@ def split_on_titles(file, main_title, options): paragraphs_os_text[title] = current_paragraph else: paragraphs_os_free_text[title] = current_paragraph - paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, curr_dirs[last_title_level]) + paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, curr_dirs[last_title_level], options[SOURCE_DIRECTORY] + '/' + main_title + '.md') return paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order @@ -407,7 +409,7 @@ def split_on_paragraphs(file, main_title, options, current_paragraph_number=-1, # metadata title metadata_title = main_title - # TODO: define metadata data if split occurs on paragraphs and last_title and title_level are known (placeholder in place right now) + # define metadata data if split occurs on paragraphs and last_title and title_level are known (will be replaced later on in the process) if current_paragraph_number != -1: last_title_level = 4 last_dir = "PLACEHOLDER" @@ -467,7 +469,7 @@ def split_on_paragraphs(file, main_title, options, current_paragraph_number=-1, paragraphs_os_free_text[paragraph_title] = current_paragraph # write metadata of previous file - paragraphs_metadata[paragraph_title] = write_metadata(main_title, metadata_title, link_list, last_title_level, last_dir) + paragraphs_metadata[paragraph_title] = write_metadata(main_title, metadata_title, link_list, last_title_level, last_dir, source_file=options[SOURCE_DIRECTORY] + '/' + main_title + '.md') subtitle_order.append(paragraph_title) # reset the current paragraph @@ -512,13 +514,13 @@ def split_on_paragraphs(file, main_title, options, current_paragraph_number=-1, paragraphs_os_text[paragraph_title] = current_paragraph else: paragraphs_os_free_text[paragraph_title] = current_paragraph - paragraphs_metadata[paragraph_title] = write_metadata(main_title, metadata_title, link_list, last_title_level, curr_dirs[last_title_level]) + paragraphs_metadata[paragraph_title] = write_metadata(main_title, metadata_title, link_list, last_title_level, curr_dirs[last_title_level], source_file=options[SOURCE_DIRECTORY] + '/' + main_title + '.md') subtitle_order.append(paragraph_title) return paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order -def write_metadata(main_title, subtitle, links, title_level, directory): +def write_metadata(main_title, subtitle, links, title_level, directory, source_file): """ Function that writes metadata about a text section to a dictionary @@ -527,10 +529,11 @@ def write_metadata(main_title, subtitle, links, title_level, directory): :param links: a list of links contained within the section :param title_level: the depth of the title of the section :param directory: the directory where the section will eventually be written (can either be generic or os-specific) + :param source_file: the source file that the section originates from :return paragraph_metadata: dictionary containing the metadata about the section """ - paragraph_metadata = {MAIN_TITLE: main_title, SUBTITLE: subtitle, TITLE_DEPTH: title_level, DIRECTORY: directory} + paragraph_metadata = {MAIN_TITLE: main_title, SUBTITLE: subtitle, SOURCE_FILE: source_file, TITLE_DEPTH: title_level, DIRECTORY: directory} if len(links) > 0: paragraph_metadata[LINKS] = {} @@ -918,7 +921,7 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or pass -def main(options, verbose=True): +def main(options): """ main function @@ -931,11 +934,10 @@ def main(options, verbose=True): MAX_TITLE_DEPTH: integer representing the maximum depth of a title for it to be used when splitting the text, INCLUDE_LINKS_IN_PLAINTEXT: boolean indicating whether links should be included in the plaintext, DEEP_DIRECTORIES: boolean indicating whether the generated directories should be nested by title-structure or not} - :param verbose: boolean indicating whether print statements from the main function should be print, only used when for testing :return: """ - if options[DEEP_DIRECTORIES] and verbose: + if options[DEEP_DIRECTORIES] and options[VERBOSE]: print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.") # remove the directories from a previous run of the parser if they weren't cleaned up properly for some reason @@ -963,7 +965,6 @@ def main(options, verbose=True): # for loops over all files for filename in filenames.keys(): - print("Processing " + filename) ################### define/reset loop specific variables ################### # boolean indicating whether the current file is part of the linux tutorial @@ -987,6 +988,9 @@ def main(options, verbose=True): ################### actually parse the md file ################### + if options[VERBOSE]: + print("Processing " + filename) + # create directories for the source markdown file for directory in [root_dir_generic, os.path.join(PARSED_MDS, OS_SPECIFIC_DIR), root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos, os.path.join(root_dir_generic, curr_dirs[0]), os.path.join(root_dir_os_specific_linux, curr_dirs[0]), os.path.join(root_dir_os_specific_windows, curr_dirs[0]), os.path.join(root_dir_os_specific_macos, curr_dirs[0])]: os.makedirs(directory, exist_ok=True) @@ -1015,7 +1019,7 @@ def main(options, verbose=True): if os.path.exists(TEMP_JINJA_FILE): os.remove(TEMP_JINJA_FILE) - if verbose: + if options[VERBOSE]: print("Parsing finished successfully") @@ -1031,6 +1035,7 @@ def main(options, verbose=True): parser.add_argument("-td", "--max_title_depth", type=int, default=4, help="Maximum depth of titles that divide the source text into sections, only works if split on titles is enabled (default: 4)") parser.add_argument("-l", "--links", action="store_true", help="Add links to the output texts") parser.add_argument("-dd", "--deep_directories", action="store_true", help="Generate a nested directory structure following the structure of the subtitles. Only works if split on titles is enabled") + parser.add_argument("-v", "--verbose", action="store_true", help="Run the script with verbose output") args = parser.parse_args() @@ -1041,6 +1046,7 @@ def main(options, verbose=True): MIN_PARAGRAPH_LENGTH: args.min_paragraph_length, MAX_TITLE_DEPTH: args.max_title_depth, INCLUDE_LINKS_IN_PLAINTEXT: args.links, - DEEP_DIRECTORIES: args.deep_directories and args.split_on_titles} + DEEP_DIRECTORIES: args.deep_directories and args.split_on_titles, + VERBOSE: args.verbose} main(options_dict) diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1_metadata.json index 19e44fad91d6..08c0b4e49731 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1_metadata.json @@ -1,6 +1,7 @@ { "main_title": "tps1", "subtitle": "Main-title", + "source_file": "tests/test_files/ftps/tps1.md", "title_depth": 1, "directory": "tps1", "links": { diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3_metadata.json index b4c98ff64658..2f1ea4dcd1fe 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3_metadata.json @@ -1,6 +1,7 @@ { "main_title": "tps1", "subtitle": "Conclusion", + "source_file": "tests/test_files/ftps/tps1.md", "title_depth": 2, "directory": "tps1", "parent_title": "", diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1_metadata.json index bac81ed87e3a..208cb3472f40 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1_metadata.json @@ -1,6 +1,7 @@ { "main_title": "tps1", "subtitle": "OS-specific-sections", + "source_file": "tests/test_files/ftps/tps1.md", "title_depth": 2, "directory": "tps1", "parent_title": "Main-title", diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2_metadata.json index 522265436ab3..b975dfe4e039 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2_metadata.json @@ -1,6 +1,7 @@ { "main_title": "tps1", "subtitle": "Non-Windows-section", + "source_file": "tests/test_files/ftps/tps1.md", "title_depth": 3, "directory": "tps1", "parent_title": "OS-specific-sections", diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1_metadata.json index 5d9ec163f99d..9c605eb9004e 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1_metadata.json @@ -1,6 +1,7 @@ { "main_title": "tps1", "subtitle": "OS-specific-sections", + "source_file": "tests/test_files/ftps/tps1.md", "title_depth": 2, "directory": "tps1", "parent_title": "Main-title", diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2_metadata.json index 7b06f06efddb..e3ca81d7cc5e 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2_metadata.json @@ -1,6 +1,7 @@ { "main_title": "tps1", "subtitle": "Non-Windows-section", + "source_file": "tests/test_files/ftps/tps1.md", "title_depth": 3, "directory": "tps1", "parent_title": "OS-specific-sections", diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1_metadata.json index e8e50aa6c322..ab58c622b8c5 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1_metadata.json @@ -1,6 +1,7 @@ { "main_title": "tps1", "subtitle": "OS-specific-sections", + "source_file": "tests/test_files/ftps/tps1.md", "title_depth": 2, "directory": "tps1", "parent_title": "Main-title", diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2_metadata.json index 84ea6ad53f9f..435c9e9c484e 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2_metadata.json @@ -1,6 +1,7 @@ { "main_title": "tps1", "subtitle": "Windows-specific-section", + "source_file": "tests/test_files/ftps/tps1.md", "title_depth": 3, "directory": "tps1", "parent_title": "OS-specific-sections", diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1_metadata.json index 9fdbce652bf1..b7786c066a7f 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1_metadata.json @@ -1,6 +1,7 @@ { "main_title": "tts1", "subtitle": "Subtitle-1", + "source_file": "tests/test_files/ftts/tts1.md", "title_depth": 2, "directory": "tts1\\Main-title\\Subtitle-1", "parent_title": "Main-title", diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g_metadata.json index b48bcaaa08c0..eb5403804e24 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g_metadata.json @@ -1,6 +1,7 @@ { "main_title": "tts1", "subtitle": "Subtitle-5-g", + "source_file": "tests/test_files/ftts/tts1.md", "title_depth": 2, "directory": "tts1\\Main-title\\Subtitle-5-g", "parent_title": "Main-title", diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json index a2b68c8865e2..f7330bec86d8 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json @@ -1,6 +1,7 @@ { "main_title": "tts1", "subtitle": "Subtitle-2-g", + "source_file": "tests/test_files/ftts/tts1.md", "title_depth": 2, "directory": "tts1\\Main-title\\Subtitle-2-g", "parent_title": "Main-title", diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json index 537541e2cb0f..a76f852c8749 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json @@ -1,6 +1,7 @@ { "main_title": "tts1", "subtitle": "Subtitle-4-l&m", + "source_file": "tests/test_files/ftts/tts1.md", "title_depth": 3, "directory": "tts1\\Main-title\\Subtitle-2-g\\Subtitle-4-l&m", "parent_title": "Subtitle-2-g", diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json index 6846da26b728..8b234c92fa6d 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json @@ -1,6 +1,7 @@ { "main_title": "tts1", "subtitle": "Subtitle-2-g", + "source_file": "tests/test_files/ftts/tts1.md", "title_depth": 2, "directory": "tts1\\Main-title\\Subtitle-2-g", "parent_title": "Main-title", diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json index 4e167b116d2a..732d309da81a 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json @@ -1,6 +1,7 @@ { "main_title": "tts1", "subtitle": "Subtitle-4-l&m", + "source_file": "tests/test_files/ftts/tts1.md", "title_depth": 3, "directory": "tts1\\Main-title\\Subtitle-2-g\\Subtitle-4-l&m", "parent_title": "Subtitle-2-g", diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json index c4620a940808..7a43426a85f3 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json @@ -1,6 +1,7 @@ { "main_title": "tts1", "subtitle": "Subtitle-2-g", + "source_file": "tests/test_files/ftts/tts1.md", "title_depth": 2, "directory": "tts1\\Main-title\\Subtitle-2-g", "parent_title": "Main-title", diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w_metadata.json index aa4b6317ce62..4d7f494320d7 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w_metadata.json @@ -1,6 +1,7 @@ { "main_title": "tts1", "subtitle": "Subtitle-3-w", + "source_file": "tests/test_files/ftts/tts1.md", "title_depth": 3, "directory": "tts1\\Main-title\\Subtitle-2-g\\Subtitle-3-w", "parent_title": "Subtitle-2-g", diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_full_script.py b/scripts/HPC_chatbot_preprocessor/tests/test_full_script.py index 61a6f3f1bdf6..91605dec651f 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_full_script.py +++ b/scripts/HPC_chatbot_preprocessor/tests/test_full_script.py @@ -14,7 +14,8 @@ "MIN_PARAGRAPH_LENGTH": 160, "MAX_TITLE_DEPTH": 4, "INCLUDE_LINKS_IN_PLAINTEXT": False, - "DEEP_DIRECTORIES": False} + "DEEP_DIRECTORIES": False, + "VERBOSE": False} ), ("tests/test_files/ftts", "tests/test_files/ftts/actual", "tests/test_files/ftts/output", @@ -25,12 +26,13 @@ "MIN_PARAGRAPH_LENGTH": 160, "MAX_TITLE_DEPTH": 4, "INCLUDE_LINKS_IN_PLAINTEXT": False, - "DEEP_DIRECTORIES": True} + "DEEP_DIRECTORIES": True, + "VERBOSE": False} ) ]) def test_full_script_generated_directories(input_directory, actual_output_directory, expected_output_directory, options): # run the script - main(options, verbose=False) + main(options) # Compare directories and files for dirpath, dirnames, filenames in os.walk(expected_output_directory): diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_write_metadata.py b/scripts/HPC_chatbot_preprocessor/tests/test_write_metadata.py index 68f1772cb242..6c30fef7985d 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_write_metadata.py +++ b/scripts/HPC_chatbot_preprocessor/tests/test_write_metadata.py @@ -3,13 +3,13 @@ from chatbot_parser import write_metadata -@pytest.mark.parametrize("main_title,subtitle,links,title_level,directory,output", [ - ("", "", [], 1, "", {"main_title": "", "subtitle": "", "title_depth": 1, "directory": "", "parent_title": ""}), +@pytest.mark.parametrize("main_title,subtitle,links,title_level,directory,source_file,output", [ + ("", "", [], 1, "", "", {"source_file": "", "main_title": "", "subtitle": "", "title_depth": 1, "directory": "", "parent_title": ""}), ("A_very_good_main_title", "An_extremely_good_subtitle", ["the_first.link", "the_second.link"], 2, - os.path.join("A_very_good_main_title", "An_awesome_parent_file", "An_extremely_good_subtitle"), - {"main_title": "A_very_good_main_title", "subtitle": "An_extremely_good_subtitle", "title_depth": 2, + os.path.join("A_very_good_main_title", "An_awesome_parent_file", "An_extremely_good_subtitle"), "source", + {"source_file": "source", "main_title": "A_very_good_main_title", "subtitle": "An_extremely_good_subtitle", "title_depth": 2, "directory": os.path.join("A_very_good_main_title", "An_awesome_parent_file", "An_extremely_good_subtitle"), "parent_title": "An_awesome_parent_file", "links": {"0": "the_first.link", "1": "the_second.link"}}) ]) -def test_write_metadata(main_title, subtitle, links, title_level, directory, output): - assert write_metadata(main_title, subtitle, links, title_level, directory) == output +def test_write_metadata(main_title, subtitle, links, title_level, directory, source_file, output): + assert write_metadata(main_title, subtitle, links, title_level, directory, source_file) == output From f33cfb3b22feacf540944dc8812d5a55c59763d4 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Wed, 28 Aug 2024 16:57:24 +0200 Subject: [PATCH 133/145] added verbose mode --- .../chatbot_parser.py | 124 +++++++++++++++--- 1 file changed, 107 insertions(+), 17 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index e4ed00096549..60776fcb379e 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -106,6 +106,9 @@ # Marker for comments for the bot INPUT_FOR_BOT = "INPUT_FOR_BOT" +# Standard strings for verbose output +LINE = "------------------------------------------------------------------------------------------------------\n" + ################### define functions ################### @@ -266,6 +269,10 @@ def split_on_titles(file, main_title, options): :return paragraphs_metadata: dictionary containing the metadata of each split section of text :return subtitle_order: list containing all encountered subtitles in order of appearance """ + + if options[VERBOSE]: + print("Splitting on titles\n") + # start of assuming we haven't encountered a title after_first_title = False @@ -302,15 +309,20 @@ def split_on_titles(file, main_title, options): # detect if-statements starting or ending on the current line in_if_statement += len(re.findall(IF_MANGLED_PATTERNS[IF], line)) - len(re.findall(IF_MANGLED_PATTERNS[ENDIF], line)) + # detect codeblocks to make sure titles aren't detected in them + if '```' in line or (('
' in line) ^ ('
' in line)): + in_code_block = not in_code_block + if options[VERBOSE]: + if in_code_block: + print("Detected start of a codeblock, not registering titles") + else: + print("Detected end of codeblock, registering titles again") + # only split up if current line is in a fully non-os-specific section if in_if_statement == 0: title_level = check_for_title(line, in_code_block, curr_dirs, options) - # detect codeblocks to make sure titles aren't detected in them - if '```' in line or (('
' in line) ^ ('
' in line)): - in_code_block = not in_code_block - # line is a title with a maximum depth of 4 if title_level > 0: if after_first_title: @@ -318,8 +330,12 @@ def split_on_titles(file, main_title, options): # write text of previous file if previous_contained_if: paragraphs_os_text[title] = current_paragraph + if options[VERBOSE]: + print("Saved os-specific chunk with temporary title: " + title + "\n") else: paragraphs_os_free_text[title] = current_paragraph + if options[VERBOSE]: + print("Saved generic chunk with title: " + title + "\n") # write metadata of previous file paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, last_dir, options[SOURCE_DIRECTORY] + '/' + main_title + '.md') @@ -357,8 +373,12 @@ def split_on_titles(file, main_title, options): # write dictionaries for the last file if previous_contained_if: paragraphs_os_text[title] = current_paragraph + if options[VERBOSE]: + print("Saved os-specific chunk with temporary title: " + title + "\n") else: paragraphs_os_free_text[title] = current_paragraph + if options[VERBOSE]: + print("Saved generic chunk with title: " + title + "\n") paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, curr_dirs[last_title_level], options[SOURCE_DIRECTORY] + '/' + main_title + '.md') return paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order @@ -377,6 +397,10 @@ def split_on_paragraphs(file, main_title, options, current_paragraph_number=-1, :return paragraphs_metadata: dictionary containing the metadata of each split section of text :return subtitle_order: list containing all encountered subtitles in order of appearance """ + + if options[VERBOSE]: + print("Splitting on paragraphs\n") + # start of assuming we are not in a code_block in_code_block = False @@ -435,23 +459,33 @@ def split_on_paragraphs(file, main_title, options, current_paragraph_number=-1, # detect whether the current line is in a list if re.search(r'^(\s*)([*+-]|\d+\.|[a-zA-Z]\.)\s+.*$', line): # beginning of a list entry in_list = True - # print("List entry found") + if options[VERBOSE]: + print("First line of new list entry found, not starting new paragraphs: " + line[:-1]) elif re.search(r'^\s{2,}.+$', line) and in_list: # middle of a list entry pass elif re.search(r'^(\s*)([*+-]|\d+\.|[a-zA-Z]\.)\s+.*$|^\s{2,}.+$|^\n', nxt) and in_list: # line(s) between list entries pass + elif in_list: + if options[VERBOSE]: + print("List ended, starting new paragraphs again") + in_list = False else: in_list = False + # detect codeblocks to make sure titles aren't detected in them + if '```' in line or (('
' in line) ^ ('
' in line)): + in_code_block = not in_code_block + if options[VERBOSE]: + if in_code_block: + print("Detected start of a codeblock, not starting new paragraphs") + else: + print("Detected end of codeblock, starting new paragraphs again") + # only split up if current line is in a fully non-os-specific section if in_if_statement == 0: title_level = check_for_title(line, in_code_block, curr_dirs, options) - # detect codeblocks to make sure titles aren't detected in them - if '```' in line or (('
' in line) ^ ('
' in line)): - in_code_block = not in_code_block - # check whether a new paragraph should be started if line == "\n" and len(re.sub(r'\{' + IF_MANGLED_PART + '%.*?%' + IF_MANGLED_PART + '}', "", current_paragraph)) >= options[MIN_PARAGRAPH_LENGTH] and not in_code_block and not in_list: @@ -465,8 +499,12 @@ def split_on_paragraphs(file, main_title, options, current_paragraph_number=-1, # write text of previous file if previous_contained_if: paragraphs_os_text[paragraph_title] = current_paragraph + if options[VERBOSE]: + print("Saved os-specific chunk with temporary title: " + paragraph_title + "\n") else: paragraphs_os_free_text[paragraph_title] = current_paragraph + if options[VERBOSE]: + print("Saved generic chunk with title: " + paragraph_title + "\n") # write metadata of previous file paragraphs_metadata[paragraph_title] = write_metadata(main_title, metadata_title, link_list, last_title_level, last_dir, source_file=options[SOURCE_DIRECTORY] + '/' + main_title + '.md') @@ -512,8 +550,12 @@ def split_on_paragraphs(file, main_title, options, current_paragraph_number=-1, # write dictionaries for the last file if previous_contained_if: paragraphs_os_text[paragraph_title] = current_paragraph + if options[VERBOSE]: + print("Saved os-specific chunk with temporary title: " + paragraph_title + "\n") else: paragraphs_os_free_text[paragraph_title] = current_paragraph + if options[VERBOSE]: + print("Saved generic chunk with title: " + paragraph_title + "\n") paragraphs_metadata[paragraph_title] = write_metadata(main_title, metadata_title, link_list, last_title_level, curr_dirs[last_title_level], source_file=options[SOURCE_DIRECTORY] + '/' + main_title + '.md') subtitle_order.append(paragraph_title) @@ -557,6 +599,9 @@ def jinja_parser(filename, copy_location, options): # YAML file location yml_file_path = os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, EXTRA_DIR, 'gent.yml') + if options[VERBOSE]: + print("Reading YAML file from location: " + yml_file_path) + # Read the YAML file with open(yml_file_path, 'r') as yml_file: words_dict = yaml.safe_load(yml_file) @@ -569,6 +614,9 @@ def jinja_parser(filename, copy_location, options): } combined_context = {**words_dict, **additional_context} + if options[VERBOSE]: + print("Mangling OS-specific if-statements") + # Mangle the OS-related if-statements mangle_ifs(copy_location, filename, options) @@ -578,6 +626,9 @@ def jinja_parser(filename, copy_location, options): template = templateEnv.get_template(filename) rendered_content = template.render(combined_context) + if options[VERBOSE]: + print("jinja parsing finished\nWriting to location: " + copy_location) + # Save the rendered content to a new file with open(copy_location, 'w', encoding='utf-8', errors='ignore') as output_file: output_file.write(rendered_content) @@ -601,7 +652,7 @@ def load_macros(name): return readfile.read() -def mangle_os_ifs(line, is_os): +def mangle_os_ifs(line, is_os, options): """ function that mangles the os-related if-statements. This is needed because we want to keep these if-statements intact after jinja-parsing to build the directory structure. We don't want to mangle all if-related statements (such as else and endif) so we need to keep track of the context of the last few if-statements. @@ -612,6 +663,7 @@ def mangle_os_ifs(line, is_os): NON_OS_IF_IN_OS_IF: in a non-os-if nested in an os-if OS_IF: in an os-if OS_IF_IN_OS_IF: in an os-if nested in an os-if + :param options: dictionary containing the options given by the user :return line: the modified line with mangled os-related if-statements """ @@ -640,6 +692,8 @@ def mangle_os_ifs(line, is_os): # this logic isn't flawless, there are number of nested if-constructions that are technically possible that would break this logic, but these don't appear in the documentation as it doesn't make sense to have these if endif_match: if is_os in (OS_IF, OS_IF_IN_OS_IF): + if options[VERBOSE]: + print("OS-specific endif statement found in line: " + line[:-1]) line = part_before_mangling + IF_MANGLED_PART + part_between_mangling + IF_MANGLED_PART + part_after_mangling added_length += 2 * len(IF_MANGLED_PART) if is_os == OS_IF: @@ -651,6 +705,8 @@ def mangle_os_ifs(line, is_os): elif if_match: if if_os_match: + if options[VERBOSE]: + print("OS-specific if statement found in line: " + line[:-1]) line = part_before_mangling + IF_MANGLED_PART + part_between_mangling + IF_MANGLED_PART + part_after_mangling added_length += 2 * len(IF_MANGLED_PART) if is_os == OS_IF: @@ -665,6 +721,8 @@ def mangle_os_ifs(line, is_os): elif else_match: if is_os in (OS_IF, OS_IF_IN_OS_IF): + if options[VERBOSE]: + print("OS-specific else statement found in line: " + line[:-1]) line = part_before_mangling + IF_MANGLED_PART + part_between_mangling + IF_MANGLED_PART + part_after_mangling added_length += 2 * len(IF_MANGLED_PART) @@ -688,7 +746,7 @@ def mangle_ifs(directory, filename, options): with open(os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES, filename), 'w') as write_file: with open(directory, 'r') as read_file: for line in read_file: - new_line, is_os = mangle_os_ifs(line, is_os) + new_line, is_os = mangle_os_ifs(line, is_os, options) write_file.write(new_line) @@ -733,6 +791,9 @@ def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, filepath = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, GENERIC_DIR, paragraphs_metadata[title][DIRECTORY]) os.makedirs(filepath, exist_ok=True) + if options[VERBOSE]: + print("Writing generic section " + title + " to filepath: " + str(filepath)) + write_files(title, paragraphs_text[title], paragraphs_metadata, title_order, title_order_number, filepath, GENERIC, options, is_linux_tutorial) else: # don't write empty files @@ -911,6 +972,9 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or filepath = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, OS_SPECIFIC_DIR, OS, os_specific_metadata[os_subtitle][DIRECTORY]) os.makedirs(filepath, exist_ok=True) + if options[VERBOSE]: + print("Writing os-specific section " + os_subtitle + " to filepath: " + str(filepath)) + # write to files write_files(os_subtitle, os_specific_text[os_subtitle], os_specific_metadata, total_subtitle_order, os_i + title_order_number, filepath, OS, options, is_linux_tutorial) else: @@ -933,10 +997,14 @@ def main(options): MIN_PARAGRAPH_LENGTH: integer representing the minimum length of a paragraph, MAX_TITLE_DEPTH: integer representing the maximum depth of a title for it to be used when splitting the text, INCLUDE_LINKS_IN_PLAINTEXT: boolean indicating whether links should be included in the plaintext, - DEEP_DIRECTORIES: boolean indicating whether the generated directories should be nested by title-structure or not} + DEEP_DIRECTORIES: boolean indicating whether the generated directories should be nested by title-structure or not, + VERBOSE: enable or disable verbose mode} :return: """ + if options[VERBOSE]: + print("Running chatbot parser with options: " + str(options)) + if options[DEEP_DIRECTORIES] and options[VERBOSE]: print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.") @@ -976,9 +1044,10 @@ def main(options): # variable that keeps track of the directories that are used to write in at different levels root_dir_generic = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, GENERIC_DIR) - root_dir_os_specific_linux = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, OS_SPECIFIC_DIR, LINUX) - root_dir_os_specific_windows = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, OS_SPECIFIC_DIR, WINDOWS) - root_dir_os_specific_macos = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, OS_SPECIFIC_DIR, MACOS) + root_dir_os_specific = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, OS_SPECIFIC_DIR) + root_dir_os_specific_linux = os.path.join(root_dir_os_specific, LINUX) + root_dir_os_specific_windows = os.path.join(root_dir_os_specific, WINDOWS) + root_dir_os_specific_macos = os.path.join(root_dir_os_specific, MACOS) # variable for the main title (needed for reference links) main_title = filename[:-3] @@ -989,18 +1058,31 @@ def main(options): ################### actually parse the md file ################### if options[VERBOSE]: - print("Processing " + filename) + print(LINE + "Processing " + filename) + print("Location: " + filenames[filename]) + print("\nMaking directories:") # create directories for the source markdown file - for directory in [root_dir_generic, os.path.join(PARSED_MDS, OS_SPECIFIC_DIR), root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos, os.path.join(root_dir_generic, curr_dirs[0]), os.path.join(root_dir_os_specific_linux, curr_dirs[0]), os.path.join(root_dir_os_specific_windows, curr_dirs[0]), os.path.join(root_dir_os_specific_macos, curr_dirs[0])]: + for directory in [root_dir_generic, root_dir_os_specific, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos, os.path.join(root_dir_generic, curr_dirs[0]), os.path.join(root_dir_os_specific_linux, curr_dirs[0]), os.path.join(root_dir_os_specific_windows, curr_dirs[0]), os.path.join(root_dir_os_specific_macos, curr_dirs[0])]: + if options[VERBOSE]: + print(directory) os.makedirs(directory, exist_ok=True) + if options[VERBOSE]: + print("\nParsing the sourcefile with jinja") + # process the jinja macros jinja_parser(filename, copy_file, options) + if options[VERBOSE]: + print("\nSplitting the file for the first time (split in sufficiently small generic sections and large os-specific chunks)") + # split the text in paragraphs paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order = split_text(copy_file, main_title, options) + if options[VERBOSE]: + print("\nFurther splitting os-specific chunks and writing generic and os-specific sections to files with metadata") + # for every section, either make the whole section generic, or create an os-specific file for each OS for i, subtitle in enumerate(subtitle_order): @@ -1012,6 +1094,14 @@ def main(options): else: split_and_write_os_specific_section(paragraphs_os_text[subtitle], paragraphs_metadata[subtitle], subtitle_order, i, paragraphs_metadata, options, is_linux_tutorial) + if options[VERBOSE]: + print("\nFinished processing " + filename) + + if options[VERBOSE]: + print(LINE + "Cleaning up directories:") + print(os.path.join(options[DESTINATION_DIRECTORY], COPIES)) + print(os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES)) + print(os.path.join(options[DESTINATION_DIRECTORY], LINUX_TUTORIAL)) # clean up temporary directories and files shutil.rmtree(os.path.join(options[DESTINATION_DIRECTORY], COPIES), ignore_errors=True) shutil.rmtree(os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES), ignore_errors=True) From 3227f1939ef3933d0a8fcc22835239021abae0aa Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Thu, 29 Aug 2024 09:17:04 +0200 Subject: [PATCH 134/145] Added limitation on lists --- scripts/HPC_chatbot_preprocessor/README.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md index b3bce665973d..1795ee71554e 100644 --- a/scripts/HPC_chatbot_preprocessor/README.md +++ b/scripts/HPC_chatbot_preprocessor/README.md @@ -164,6 +164,18 @@ Comments can be written in such a way that the script will keep them as input fo ``` +This will be reworked to + +``` +your comment for the bot +``` + +in the final output. + ### Long filepaths Due to the nature of this script, it can generate large directories with very long names if `deep_directories` is enabled. Depending on the operating system, this can cause problems with filepaths being to long, resulting in files not being able to open. A possible fix for this is to make sure the filepath to where the script is located is not too long. Another solution is lowering the `max_title_depth` or disabling `deep_directories`. + +### Markdown lists + +The parser is made in a way to detect lists and not split them in multiple paragraphs. The kinds of lists it can detect is all lists with denominators `-`, `+`, `*` and list indexed with numbers or letters (one letter per list entry). It can handle list entries being spread out over multiple lines if there is an indentation of at least two spaces. It can also handle multiple paragraph list entries in this way, as long as the indentation stays. From 67aed53662656f95c7a9b718cf372d1ca5349283 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Thu, 29 Aug 2024 10:46:56 +0200 Subject: [PATCH 135/145] fix for non os-specific if-statement not being recognised --- scripts/HPC_chatbot_preprocessor/README.md | 11 +++++ .../chatbot_parser.py | 34 +++++++++++++- .../generic/account/account_paragraph_10.txt | 8 ++-- .../account_paragraph_10_metadata.json | 5 +- .../account_paragraph_12_metadata.json | 1 + .../account/account_paragraph_1_metadata.json | 1 + .../account/account_paragraph_2_metadata.json | 1 + .../account/account_paragraph_3_metadata.json | 1 + .../generic/account/account_paragraph_8.txt | 11 +++-- .../account/account_paragraph_8_metadata.json | 1 + .../connecting/connecting_paragraph_10.txt | 24 ++++++++++ .../connecting_paragraph_10_metadata.json} | 7 +-- .../connecting/connecting_paragraph_14.txt | 7 --- .../connecting_paragraph_14_metadata.json | 14 ------ .../connecting/connecting_paragraph_15.txt | 19 +++----- .../connecting_paragraph_15_metadata.json | 6 +-- .../connecting/connecting_paragraph_16.txt | 11 +++++ .../connecting_paragraph_16_metadata.json | 16 +++++++ .../connecting_paragraph_1_metadata.json | 1 + .../connecting/connecting_paragraph_2.txt | 2 +- .../connecting_paragraph_2_metadata.json | 1 + .../connecting/connecting_paragraph_3.txt | 1 - .../connecting_paragraph_3_metadata.json | 1 + .../connecting/connecting_paragraph_6.txt | 2 - .../connecting_paragraph_6_metadata.json | 1 + .../connecting/connecting_paragraph_7.txt | 1 - .../connecting_paragraph_7_metadata.json | 1 + .../connecting/connecting_paragraph_8.txt | 3 +- .../connecting_paragraph_8_metadata.json | 1 + .../connecting/connecting_paragraph_9.txt | 46 ++++++++----------- .../connecting_paragraph_9_metadata.json | 1 + .../account/account_linux_paragraph_11.1.txt | 3 ++ ...account_linux_paragraph_11.1_metadata.json | 1 + .../account_linux_paragraph_4.1_metadata.json | 1 + .../account_linux_paragraph_5.1_metadata.json | 1 + .../account_linux_paragraph_5.2_metadata.json | 1 + .../account_linux_paragraph_5.3_metadata.json | 1 + .../account_linux_paragraph_5.4_metadata.json | 1 + .../account_linux_paragraph_5.5_metadata.json | 1 + .../account_linux_paragraph_6.1_metadata.json | 1 + .../account_linux_paragraph_7.1_metadata.json | 1 + .../account_linux_paragraph_7.2_metadata.json | 1 + .../account/account_linux_paragraph_9.1.txt | 9 ++-- .../account_linux_paragraph_9.1_metadata.json | 1 + .../connecting_linux_paragraph_10.1.txt | 33 ------------- ...necting_linux_paragraph_10.1_metadata.json | 11 ----- .../connecting_linux_paragraph_11.1.txt | 43 ++++++++++++++--- ...necting_linux_paragraph_11.1_metadata.json | 5 +- .../connecting_linux_paragraph_12.1.txt | 18 +++----- ...necting_linux_paragraph_12.1_metadata.json | 14 +++--- .../connecting_linux_paragraph_13.1.txt | 20 ++++---- ...necting_linux_paragraph_13.1_metadata.json | 10 ++-- ...xt => connecting_linux_paragraph_13.2.txt} | 0 ...ecting_linux_paragraph_13.2_metadata.json} | 5 +- ...xt => connecting_linux_paragraph_13.3.txt} | 0 ...ecting_linux_paragraph_13.3_metadata.json} | 5 +- ...xt => connecting_linux_paragraph_13.4.txt} | 0 ...ecting_linux_paragraph_13.4_metadata.json} | 5 +- ...xt => connecting_linux_paragraph_13.5.txt} | 0 ...ecting_linux_paragraph_13.5_metadata.json} | 5 +- ...xt => connecting_linux_paragraph_13.6.txt} | 0 ...ecting_linux_paragraph_13.6_metadata.json} | 5 +- .../connecting_linux_paragraph_14.1.txt | 10 ++++ ...necting_linux_paragraph_14.1_metadata.json | 12 +++++ ...nnecting_linux_paragraph_5.1_metadata.json | 1 + ...nnecting_linux_paragraph_5.2_metadata.json | 1 + .../account/account_macos_paragraph_11.1.txt | 3 ++ ...account_macos_paragraph_11.1_metadata.json | 1 + .../account_macos_paragraph_4.1_metadata.json | 1 + .../account_macos_paragraph_5.1_metadata.json | 1 + .../account_macos_paragraph_5.2_metadata.json | 1 + .../account_macos_paragraph_5.3_metadata.json | 1 + .../account_macos_paragraph_5.4_metadata.json | 1 + .../account_macos_paragraph_5.5_metadata.json | 1 + .../account_macos_paragraph_6.1_metadata.json | 1 + .../account_macos_paragraph_7.1_metadata.json | 1 + .../account_macos_paragraph_7.2_metadata.json | 1 + .../account/account_macos_paragraph_9.1.txt | 9 ++-- .../account_macos_paragraph_9.1_metadata.json | 1 + .../connecting_macos_paragraph_10.1.txt | 33 ------------- .../connecting_macos_paragraph_11.1.txt | 43 ++++++++++++++--- ...necting_macos_paragraph_11.1_metadata.json | 5 +- .../connecting_macos_paragraph_12.1.txt | 18 +++----- ...necting_macos_paragraph_12.1_metadata.json | 14 +++--- .../connecting_macos_paragraph_12.2.txt | 17 ------- ...necting_macos_paragraph_12.2_metadata.json | 11 ----- .../connecting_macos_paragraph_13.1.txt | 25 +++++----- ...necting_macos_paragraph_13.1_metadata.json | 8 +++- .../connecting_macos_paragraph_13.2.txt | 20 ++++++-- ...necting_macos_paragraph_13.2_metadata.json | 7 +-- ...xt => connecting_macos_paragraph_13.3.txt} | 0 ...ecting_macos_paragraph_13.3_metadata.json} | 5 +- ...xt => connecting_macos_paragraph_13.4.txt} | 0 ...ecting_macos_paragraph_13.4_metadata.json} | 5 +- ...xt => connecting_macos_paragraph_13.5.txt} | 0 ...ecting_macos_paragraph_13.5_metadata.json} | 5 +- ...xt => connecting_macos_paragraph_13.6.txt} | 0 ...ecting_macos_paragraph_13.6_metadata.json} | 5 +- .../connecting_macos_paragraph_14.1.txt | 15 ++++++ ...necting_macos_paragraph_14.1_metadata.json | 12 +++++ .../connecting_macos_paragraph_14.2.txt | 3 ++ ...necting_macos_paragraph_14.2_metadata.json | 12 +++++ ...nnecting_macos_paragraph_5.1_metadata.json | 1 + ...nnecting_macos_paragraph_5.2_metadata.json | 1 + .../account_windows_paragraph_11.1.txt | 3 ++ ...count_windows_paragraph_11.1_metadata.json | 1 + ...ccount_windows_paragraph_4.1_metadata.json | 1 + ...ccount_windows_paragraph_4.2_metadata.json | 1 + .../account/account_windows_paragraph_4.3.txt | 8 ++++ ...ccount_windows_paragraph_4.3_metadata.json | 4 ++ ...ccount_windows_paragraph_4.4_metadata.json | 1 + ...ccount_windows_paragraph_6.1_metadata.json | 1 + ...ccount_windows_paragraph_6.2_metadata.json | 1 + ...ccount_windows_paragraph_6.3_metadata.json | 1 + .../account/account_windows_paragraph_9.1.txt | 9 ++-- ...ccount_windows_paragraph_9.1_metadata.json | 1 + .../connecting_windows_paragraph_10.1.txt | 5 -- ...cting_windows_paragraph_10.1_metadata.json | 11 ----- .../connecting_windows_paragraph_11.1.txt | 29 ++++-------- ...cting_windows_paragraph_11.1_metadata.json | 11 +++-- .../connecting_windows_paragraph_12.1.txt | 22 +++++++++ ...ting_windows_paragraph_12.1_metadata.json} | 5 +- ... => connecting_windows_paragraph_12.2.txt} | 0 ...cting_windows_paragraph_12.2_metadata.json | 12 +++++ ... => connecting_windows_paragraph_12.3.txt} | 0 ...ting_windows_paragraph_12.3_metadata.json} | 5 +- .../connecting_windows_paragraph_4.1.txt | 1 + ...ecting_windows_paragraph_4.1_metadata.json | 1 + 128 files changed, 533 insertions(+), 355 deletions(-) create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_10.txt rename scripts/HPC_chatbot_preprocessor/parsed_mds/{os_specific/macos/connecting/connecting_macos_paragraph_10.1_metadata.json => generic/connecting/connecting_paragraph_10_metadata.json} (53%) delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_14.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_14_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_16.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_16_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1_metadata.json rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/{connecting_linux_paragraph_12.2.txt => connecting_linux_paragraph_13.2.txt} (100%) rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/{connecting_linux_paragraph_12.2_metadata.json => connecting_linux_paragraph_13.2_metadata.json} (61%) rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/{connecting_linux_paragraph_12.3.txt => connecting_linux_paragraph_13.3.txt} (100%) rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/{connecting_linux_paragraph_12.4_metadata.json => connecting_linux_paragraph_13.3_metadata.json} (61%) rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/{connecting_linux_paragraph_12.4.txt => connecting_linux_paragraph_13.4.txt} (100%) rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/{connecting_linux_paragraph_12.3_metadata.json => connecting_linux_paragraph_13.4_metadata.json} (61%) rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/{connecting_linux_paragraph_12.5.txt => connecting_linux_paragraph_13.5.txt} (100%) rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/{connecting_linux_paragraph_12.5_metadata.json => connecting_linux_paragraph_13.5_metadata.json} (61%) rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/{connecting_linux_paragraph_12.6.txt => connecting_linux_paragraph_13.6.txt} (100%) rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/{connecting_linux_paragraph_12.6_metadata.json => connecting_linux_paragraph_13.6_metadata.json} (65%) create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_14.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_14.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.2.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.2_metadata.json rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/{connecting_macos_paragraph_12.3.txt => connecting_macos_paragraph_13.3.txt} (100%) rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/{connecting_macos_paragraph_12.3_metadata.json => connecting_macos_paragraph_13.3_metadata.json} (61%) rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/{connecting_macos_paragraph_12.4.txt => connecting_macos_paragraph_13.4.txt} (100%) rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/{connecting_macos_paragraph_12.4_metadata.json => connecting_macos_paragraph_13.4_metadata.json} (61%) rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/{connecting_macos_paragraph_12.5.txt => connecting_macos_paragraph_13.5.txt} (100%) rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/{connecting_macos_paragraph_12.5_metadata.json => connecting_macos_paragraph_13.5_metadata.json} (61%) rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/{connecting_macos_paragraph_12.6.txt => connecting_macos_paragraph_13.6.txt} (100%) rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/{connecting_macos_paragraph_12.6_metadata.json => connecting_macos_paragraph_13.6_metadata.json} (65%) create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.2_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_10.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_10.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.1.txt rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/{connecting_windows_paragraph_11.2_metadata.json => connecting_windows_paragraph_12.1_metadata.json} (63%) rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/{connecting_windows_paragraph_11.2.txt => connecting_windows_paragraph_12.2.txt} (100%) create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.2_metadata.json rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/{connecting_windows_paragraph_11.3.txt => connecting_windows_paragraph_12.3.txt} (100%) rename scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/{connecting_windows_paragraph_11.3_metadata.json => connecting_windows_paragraph_12.3_metadata.json} (63%) diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md index 1795ee71554e..27c1bf3fea6a 100644 --- a/scripts/HPC_chatbot_preprocessor/README.md +++ b/scripts/HPC_chatbot_preprocessor/README.md @@ -145,6 +145,17 @@ endif This will also result in the parser "forgetting" it opened an os-specific if-statement with OS != windows and not properly closing it. +### Non OS-related if-statements + +Due to the way jinja parses the sourcefiles, the script slightly alters non os-specific if-statements as well. It expects if-statements of the following form: + +``` +{%- if site == gent %} +{% if site != (gent or brussel) %} +``` + +All spaces and the dash are optional. City names don't need to be fully lowercase since the parser will capitalize them properly anyway. + ### html syntax The input shouldn't contain any html syntax. While some failsafes are in place, the script isn't made with the use case of handling html syntax in mind. diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 60776fcb379e..3129ccaf5664 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -620,6 +620,12 @@ def jinja_parser(filename, copy_location, options): # Mangle the OS-related if-statements mangle_ifs(copy_location, filename, options) + if options[VERBOSE]: + print("Altering other if-statements to parse properly") + + # Alter the other if-statements + alter_ifs(filename, options) + # Use Jinja2 to replace the macros template_loader = ChoiceLoader([FileSystemLoader(searchpath=[os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES), options[SOURCE_DIRECTORY], os.path.join(options[SOURCE_DIRECTORY], RETURN_DIR)]), FunctionLoader(load_macros)]) templateEnv = Environment(loader=template_loader) @@ -627,7 +633,7 @@ def jinja_parser(filename, copy_location, options): rendered_content = template.render(combined_context) if options[VERBOSE]: - print("jinja parsing finished\nWriting to location: " + copy_location) + print("jinja parsing finished\nWriting jinja-parsed file to location: " + copy_location) # Save the rendered content to a new file with open(copy_location, 'w', encoding='utf-8', errors='ignore') as output_file: @@ -750,6 +756,32 @@ def mangle_ifs(directory, filename, options): write_file.write(new_line) +def alter_ifs(filename, options): + """ + Function that further adapts the if-statements in a file and writes it to a location where the jinja parser will use it. + This is because the jinja parser doesn't seem to be able to handle statements like {% site == gent %} with context {'site': 'Gent'} in this case. + These statements get changed to {% site == 'Gent' %} in this function. + + :param filename: the filename of the file to be transformed + :param options: dictionary containing the options given by the user + :return: + """ + + with open(os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES, filename), 'r') as read_file: + content = read_file.read() + + pattern = r'(\{%-?\s?[a-zA-Z\s]*?[!=]=\s?\(?)([a-zA-Z\s]+(?:\sor\s[a-zA-Z\s]+)*)(\)?\s?%})' + content = re.sub(pattern, + lambda match: (f"{match.group(1)}" + + " or ".join([f"'{city.strip().capitalize()}'" for city in match.group(2).split(" or ")]) + + f"{match.group(3)}" + ), + content) + + with open(os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES, filename), 'w') as write_file: + write_file.write(content) + + def make_valid_title(title): """ function that makes sure all titles can be used as valid filenames diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10.txt index f486b9b13489..7b0a39279e46 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10.txt @@ -1,3 +1,7 @@ +After you have uploaded your public key you will receive an e-mail with +a link to confirm your e-mail address. After confirming your e-mail +address the VSC staff will review and if applicable approve your +account. Welcome e-mail Within one day, you should receive a Welcome e-mail with your VSC account details. @@ -12,7 +16,3 @@ Kind regards, -- The VSC administrators Now, you can start using the HPC. You can always look up your VSC id later by visiting . -Adding multiple SSH public keys (optional) -In case you are connecting from different computers to the login nodes, -it is advised to use separate SSH public keys to do so. You should -follow these steps. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10_metadata.json index 4b5b5202d1ca..e417029c16f3 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10_metadata.json @@ -1,11 +1,12 @@ { "main_title": "account", - "subtitle": "Adding-multiple-SSH-public-keys-(optional)", + "subtitle": "Welcome-e-mail", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "", "previous_title": "account_paragraph_9", "next_title": "account_paragraph_11", "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/account/#adding-multiple-ssh-public-keys-optional" + "reference_link": "https://docs.hpc.ugent.be/account/#welcome-e-mail" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12_metadata.json index a5df035df493..e43e729aa744 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Computation-Workflow-on-the-HPC", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 2, "directory": "account", "parent_title": "", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json index 726ce9f94fa1..cdba091d7dfe 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Getting-ready-to-request-an-account", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 2, "directory": "account", "links": { diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2_metadata.json index 257f886c6e01..0b22e2986a00 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Getting-ready-to-request-an-account", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 2, "directory": "account", "links": { diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3_metadata.json index b94f233779b3..bd2f73195a6b 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "How-do-SSH-keys-work", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8.txt index 125b566419a1..6c5695dfff31 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8.txt @@ -6,8 +6,9 @@ Select "UGent" in the dropdown box and optionally select "Save my preference" and "permanently". Click "Confirm" You will now be taken to the authentication page of your institute. -After you log in using your UGent login and password, you will be asked to -upload the file that contains your public key, i.e., the file -"id_rsa.pub" which you have generated earlier. Make sure that your -public key is actually accepted for upload, because if it is in a wrong -format, wrong type or too short, then it will be refused. +You will now have to log in with CAS using your UGent account. +You either have a login name of maximum 8 characters, or a (non-UGent) +email address if you are an external user. In case of problems with your +UGent password, please visit: . After +logging in, you may be requested to share your information. Click "Yes, +continue". diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8_metadata.json index 6d186b6ff463..6a77c48dbd1f 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Applying-for-the-account", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 2, "directory": "account", "parent_title": "", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_10.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_10.txt new file mode 100644 index 000000000000..5c715d218a19 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_10.txt @@ -0,0 +1,24 @@ +You can exit the connection at anytime by entering: +$ exit +logout +Connection to login.hpc.ugent.be closed. + tip "tip: Setting your Language right" + You may encounter a warning message similar to the following one during connecting: + perl: warning: Setting locale failed. + perl: warning: Please check that your locale settings: + LANGUAGE = (unset), + LC_ALL = (unset), + LC_CTYPE = "UTF-8", + LANG = (unset) + are supported and installed on your system. + perl: warning: Falling back to the standard locale ("C"). + or any other error message complaining about the locale. + This means that the correct "locale" has not yet been properly specified on your local machine. Try: + LANG= + LC_COLLATE="C" + LC_CTYPE="UTF-8" + LC_MESSAGES="C" + LC_MONETARY="C" + LC_NUMERIC="C" + LC_TIME="C" + LC_ALL= diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_10_metadata.json similarity index 53% rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1_metadata.json rename to scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_10_metadata.json index 4c6e54771190..96a1f9cee80c 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_10_metadata.json @@ -1,11 +1,12 @@ { "main_title": "connecting", "subtitle": "First-Time-connection-to-the-HPC-infrastructure", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 2, "directory": "connecting", - "parent_title": "Connecting-to-the-HPC-infrastructure", + "parent_title": "", "previous_title": "connecting_paragraph_9", "next_title": "connecting_paragraph_11", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#first-time-connection-to-the-hpc-infrastructure" + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/connecting/#first-time-connection-to-the-hpc-infrastructure" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_14.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_14.txt deleted file mode 100644 index df00d4ed2a4a..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_14.txt +++ /dev/null @@ -1,7 +0,0 @@ -Fast file transfer for large datasets -See the section on rsync in chapter 5 of the Linux intro manual. -Changing login nodes -It can be useful to have control over which login node you are on. However, when you connect to the HPC (High-Performance Computing) system, you are directed to a random login node, which might not be the one where you already have an active session. To address this, there is a way to manually switch your active login node. -For instance, if you want to switch to the login node named gligar07.gastly.os, you can use the following command while you are connected to the gligar08.gastly.os login node on the HPC: -ssh gligar07.gastly.os -This is also possible the other way around. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_14_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_14_metadata.json deleted file mode 100644 index 0543efa40833..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_14_metadata.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Changing-login-nodes", - "title_depth": 2, - "directory": "connecting", - "links": { - "0": "https://docs.hpc.ugent.be/connecting/../linux-tutorial/uploading_files/#copying-faster-with-rsync" - }, - "parent_title": "", - "previous_title": "connecting_paragraph_13", - "next_title": "connecting_paragraph_15", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/connecting/#changing-login-nodes" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15.txt index b21976186473..df00d4ed2a4a 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15.txt @@ -1,12 +1,7 @@ -If you want to find out which login host you are connected to, you can use the hostname command. -$ hostname -gligar07.gastly.os -$ ssh gligar08.gastly.os -$ hostname -gligar08.gastly.os - -Rather than always starting a new session on the HPC, you can also use a terminal multiplexer like screen or tmux. -These can make sessions that 'survives' across disconnects. -You can find more information on how to use these tools here (or on other online sources): -- screen -- tmux \ No newline at end of file +Fast file transfer for large datasets +See the section on rsync in chapter 5 of the Linux intro manual. +Changing login nodes +It can be useful to have control over which login node you are on. However, when you connect to the HPC (High-Performance Computing) system, you are directed to a random login node, which might not be the one where you already have an active session. To address this, there is a way to manually switch your active login node. +For instance, if you want to switch to the login node named gligar07.gastly.os, you can use the following command while you are connected to the gligar08.gastly.os login node on the HPC: +ssh gligar07.gastly.os +This is also possible the other way around. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15_metadata.json index d23146ed79f0..ff9c22397d1c 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15_metadata.json @@ -1,15 +1,15 @@ { "main_title": "connecting", "subtitle": "Changing-login-nodes", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 2, "directory": "connecting", "links": { - "0": "https://www.howtogeek.com/662422/how-to-use-linuxs-screen-command/", - "1": "https://www.howtogeek.com/671422/how-to-use-tmux-on-linux-and-why-its-better-than-screen/" + "0": "https://docs.hpc.ugent.be/connecting/../linux-tutorial/uploading_files/#copying-faster-with-rsync" }, "parent_title": "", "previous_title": "connecting_paragraph_14", - "next_title": null, + "next_title": "connecting_paragraph_16", "OS": "generic", "reference_link": "https://docs.hpc.ugent.be/connecting/#changing-login-nodes" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_16.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_16.txt new file mode 100644 index 000000000000..dd4f3269fb56 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_16.txt @@ -0,0 +1,11 @@ +If you want to find out which login host you are connected to, you can use the hostname command. +$ hostname +gligar07.gastly.os +$ ssh gligar08.gastly.os +$ hostname +gligar08.gastly.os +Rather than always starting a new session on the HPC, you can also use a terminal multiplexer like screen or tmux. +These can make sessions that 'survives' across disconnects. +You can find more information on how to use these tools here (or on other online sources): +- screen +- tmux \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_16_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_16_metadata.json new file mode 100644 index 000000000000..623be877f5bb --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_16_metadata.json @@ -0,0 +1,16 @@ +{ + "main_title": "connecting", + "subtitle": "Changing-login-nodes", + "source_file": "../../mkdocs/docs/HPC/connecting.md", + "title_depth": 2, + "directory": "connecting", + "links": { + "0": "https://www.howtogeek.com/662422/how-to-use-linuxs-screen-command/", + "1": "https://www.howtogeek.com/671422/how-to-use-tmux-on-linux-and-why-its-better-than-screen/" + }, + "parent_title": "", + "previous_title": "connecting_paragraph_15", + "next_title": null, + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/connecting/#changing-login-nodes" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1_metadata.json index ef0bc5473b0d..783e60c1ab5f 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1_metadata.json @@ -1,6 +1,7 @@ { "main_title": "connecting", "subtitle": "Connecting-to-the-HPC-infrastructure", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 1, "directory": "connecting", "links": { diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2.txt index b150c8fbb28f..49c4572f3b24 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2.txt @@ -7,7 +7,7 @@ networks, and from (most) Belgian commercial internet providers. All other IP domains are blocked by default. If you are connecting from an IP address that is not allowed direct access, you have the following options to get access to VSC login nodes: -- Use an VPN connection to connect to UGent the network (recommended). +- Use an VPN connection to connect to UGent the network (recommended). See for more information. - Whitelist your IP address automatically by accessing and log in with your UGent account. - While this web connection is active new SSH sessions can be diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2_metadata.json index 39ee53fcf0b8..10f3e042d9ae 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2_metadata.json @@ -1,6 +1,7 @@ { "main_title": "connecting", "subtitle": "Connection-restrictions", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 2, "directory": "connecting", "parent_title": "", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3.txt index 31dd64632665..db490973b7fe 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3.txt @@ -2,7 +2,6 @@ Trying to establish an SSH connection from an IP address that does not adhere to these restrictions will result in an immediate failure to connect, with an error message like: ssh_exchange_identification: read: Connection reset by peer - First Time connection to the HPC infrastructure The remaining content in this chapter is primarily focused for people utilizing a terminal with SSH. If you are instead using the web portal, the corresponding chapter might be more helpful: Using the HPC-UGent web portal. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json index 471e6bfcbf2a..e30467d07990 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json @@ -1,6 +1,7 @@ { "main_title": "connecting", "subtitle": "First-Time-connection-to-the-HPC-infrastructure", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 2, "directory": "connecting", "links": { diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6.txt index 472991adada3..862e6952252f 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6.txt @@ -2,14 +2,12 @@ Congratulations, you're on the HPC infrastructure now! To find out where you have landed you can print the current working directory: $ pwd /user/home/gent/vsc400/vsc40000 - Your new private home directory is "/user/home/gent/vsc400/vsc40000". Here you can create your own subdirectory structure, copy and prepare your applications, compile and test them and submit your jobs on the HPC. $ cd /apps/gent/tutorials $ ls Intro-HPC/ - This directory currently contains all training material for the Introduction to the HPC. More relevant training material to work with the HPC can always be added later in this directory. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6_metadata.json index 1c7ae8ed2678..66b2a89fbb1f 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6_metadata.json @@ -1,6 +1,7 @@ { "main_title": "connecting", "subtitle": "First-Time-connection-to-the-HPC-infrastructure", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 2, "directory": "connecting", "parent_title": "", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7.txt index 35996afe4da5..aa590b9b2691 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7.txt @@ -15,7 +15,6 @@ $ tree -L 2 |-- example.pbs '-- example.sh 9 directories, 5 files - This directory contains: 1. This HPC Tutorial (in either a Mac, Linux or Windows version). 2. An examples subdirectory, containing all the examples that you need in this diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7_metadata.json index 709753e4dc46..6e3f90fbe8af 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7_metadata.json @@ -1,6 +1,7 @@ { "main_title": "connecting", "subtitle": "First-Time-connection-to-the-HPC-infrastructure", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 2, "directory": "connecting", "parent_title": "", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8.txt index 096c74c1372c..634df6034b10 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8.txt @@ -1,5 +1,4 @@ -$ cd examples - +cd examples tip Typing cd ex followed by tab (the Tab-key) will generate the cd examples command. Command-line completion (also tab completion) is a common feature of the bash command diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8_metadata.json index 0241e0bd6b9b..074e7e891ce0 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8_metadata.json @@ -1,6 +1,7 @@ { "main_title": "connecting", "subtitle": "First-Time-connection-to-the-HPC-infrastructure", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 2, "directory": "connecting", "links": { diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9.txt index 5a634e6bddc6..ad2fee7457f5 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9.txt @@ -1,27 +1,19 @@ -$ cp -r /apps/gent/tutorials/Intro-HPC/examples ~/ - -You can exit the connection at anytime by entering: -$ exit -logout -Connection to login.hpc.ugent.be closed. - - tip "tip: Setting your Language right" - You may encounter a warning message similar to the following one during connecting: - perl: warning: Setting locale failed. - perl: warning: Please check that your locale settings: - LANGUAGE = (unset), - LC_ALL = (unset), - LC_CTYPE = "UTF-8", - LANG = (unset) - are supported and installed on your system. - perl: warning: Falling back to the standard locale ("C"). - or any other error message complaining about the locale. - This means that the correct "locale" has not yet been properly specified on your local machine. Try: - LANG= - LC_COLLATE="C" - LC_CTYPE="UTF-8" - LC_MESSAGES="C" - LC_MONETARY="C" - LC_NUMERIC="C" - LC_TIME="C" - LC_ALL= +cp -r /apps/gent/tutorials/Intro-HPC/examples ~/ +Go to your home directory, check your own private examples directory, ...Ā and start working. +cd +ls -l +Upon connecting you will see a login message containing your last login time stamp and a basic overview of the current cluster utilisation. +Last login: Thu Mar 18 13:15:09 2021 from gligarha02.gastly.os + STEVIN HPC-UGent infrastructure status on Mon, 19 Feb 2024 10:00:01 + cluster - full - free - part - total - running - queued + nodes nodes free nodes jobs jobs + ------------------------------------------------------------------------- + skitty 39 0 26 68 1839 5588 + joltik 6 0 1 10 29 18 + doduo 22 0 75 128 1397 11933 + accelgor 4 3 2 9 18 1 + donphan 0 0 16 16 16 13 + gallade 2 0 5 16 19 136 +For a full view of the current loads and queues see: +https://hpc.ugent.be/clusterstate/ +Updates on current system status and planned maintenance can be found on https://www.ugent.be/hpc/en/infrastructure/status diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9_metadata.json index 40b04f24e9f1..bd1d462e614e 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9_metadata.json @@ -1,6 +1,7 @@ { "main_title": "connecting", "subtitle": "First-Time-connection-to-the-HPC-infrastructure", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 2, "directory": "connecting", "parent_title": "", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1.txt index b2734cc9f897..dfc592117923 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1.txt @@ -1,4 +1,7 @@ Adding multiple SSH public keys (optional) +In case you are connecting from different computers to the login nodes, +it is advised to use separate SSH public keys to do so. You should +follow these steps. 1. Create a new public/private SSH key pair from the new computer. Repeat the process described in sectionĀ Generate a public/private key pair with OpenSSH. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1_metadata.json index 72b9f92061c2..ffdeaf550e00 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Adding-multiple-SSH-public-keys-(optional)", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Applying-for-the-account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1_metadata.json index 52e1569a8a7a..bcc0552177dd 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "How-do-SSH-keys-work", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1_metadata.json index 4636f13a4b42..7654a65253ab 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Test-OpenSSH", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2_metadata.json index ca9c4c7dc1da..32f1120307fa 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Test-OpenSSH", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3_metadata.json index d902f6a0838e..722ba1a2ad49 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4_metadata.json index 1edae26d97b2..4f65f6ebf365 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5_metadata.json index 29affc0335eb..468fb5d09381 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1_metadata.json index acf12bc0a7d0..fb82c40a7d76 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Using-an-SSH-agent-(optional", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1_metadata.json index b6b1e052345e..4214d6cb321f 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Using-an-SSH-agent-(optional)", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2_metadata.json index 35466be5b567..de9700c7a5b8 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Using-an-SSH-agent-(optional)", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1.txt index a9059b224bf2..815c414e059b 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1.txt @@ -1,6 +1,7 @@ Applying for the account +After you log in using your UGent login and password, you will be asked to +upload the file that contains your public key, i.e., the file +"id_rsa.pub" which you have generated earlier. Make sure that your +public key is actually accepted for upload, because if it is in a wrong +format, wrong type or too short, then it will be refused. This file has been stored in the directory "~/.ssh/". -After you have uploaded your public key you will receive an e-mail with -a link to confirm your e-mail address. After confirming your e-mail -address the VSC staff will review and if applicable approve your -account. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1_metadata.json index 219883887235..31c14d853b39 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Applying-for-the-account", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 2, "directory": "account", "parent_title": "account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1.txt deleted file mode 100644 index 4c8894438c9f..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1.txt +++ /dev/null @@ -1,33 +0,0 @@ -First Time connection to the HPC infrastructure - A locale is a set of parameters that defines the user's language, country and - any special variant preferences that the user wants to see in their user - interface. Usually a locale identifier consists of at least a language - identifier and a region identifier. - Open the .bashrc on your local machine with your favourite editor and - add the following lines: - - $ nano ~/.bashrc - ... - export LANGUAGE="en_US.UTF-8" - export LC_ALL="en_US.UTF-8" - export LC_CTYPE="en_US.UTF-8" - export LANG="en_US.UTF-8" - ... - - tip "tip: vi" - To start entering text in vi: move to the place you want to start - entering text with the arrow keys and type "i" to switch to insert mode. You can easily exit vi by entering: ""ESC" :wq" - To exit vi without saving your changes, enter ""ESC":q!" - - - or alternatively (if you are not comfortable with the Linux editors), - again on your local machine: - - echo "export LANGUAGE=\"en_US.UTF-8\"" >> ~/.profile - echo "export LC_ALL=\"en_US.UTF-8\"" >> ~/.profile - echo "export LC_CTYPE=\"en_US.UTF-8\"" >> ~/.profile - echo "export LANG=\"en_US.UTF-8\"" >> ~/.profile - - You can now log out, open a new terminal/shell on your local machine and - reconnect to the login node, and you should not get these warnings anymore. - \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1_metadata.json deleted file mode 100644 index 364c81834cf8..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_10.1_metadata.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "First-Time-connection-to-the-HPC-infrastructure", - "title_depth": 2, - "directory": "connecting", - "parent_title": "Connecting-to-the-HPC-infrastructure", - "previous_title": "connecting_paragraph_9", - "next_title": "connecting_paragraph_11", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#first-time-connection-to-the-hpc-infrastructure" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1.txt index d872c89a0f83..1d9129245359 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1.txt @@ -1,6 +1,37 @@ -Transfer Files to/from the HPC -Before you can do some work, you'll have to transfer the files you need from your desktop or department to the cluster. At the end of a job, you might want to transfer some files back. -The preferred way to transfer files is by using an scp or sftp via the -secure OpenSSH protocol. ships with an implementation of OpenSSH, so you -don't need to install any third-party software to use it. Just open a -terminal window and jump in! +First Time connection to the HPC infrastructure + A locale is a set of parameters that defines the user's language, country and + any special variant preferences that the user wants to see in their user + interface. Usually a locale identifier consists of at least a language + identifier and a region identifier. + Note + If you try to set a non-supported locale, then it will be automatically + set to the default. Currently the default is en_US.UFT-8 or en_US, + depending on whether your originally (non-supported) locale was UTF-8 or not. + Open the .bashrc on your local machine with your favourite editor and + add the following lines: + + $ nano ~/.bashrc + ... + export LANGUAGE="en_US.UTF-8" + export LC_ALL="en_US.UTF-8" + export LC_CTYPE="en_US.UTF-8" + export LANG="en_US.UTF-8" + ... + + tip "tip: vi" + To start entering text in vi: move to the place you want to start + entering text with the arrow keys and type "i" to switch to insert mode. You can easily exit vi by entering: ""ESC" :wq" + To exit vi without saving your changes, enter ""ESC":q!" + + + or alternatively (if you are not comfortable with the Linux editors), + again on your local machine: + + echo "export LANGUAGE=\"en_US.UTF-8\"" >> ~/.profile + echo "export LC_ALL=\"en_US.UTF-8\"" >> ~/.profile + echo "export LC_CTYPE=\"en_US.UTF-8\"" >> ~/.profile + echo "export LANG=\"en_US.UTF-8\"" >> ~/.profile + + You can now log out, open a new terminal/shell on your local machine and + reconnect to the login node, and you should not get these warnings anymore. + \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1_metadata.json index 420f73742f5c..ef14b084e5f0 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1_metadata.json @@ -1,11 +1,12 @@ { "main_title": "connecting", - "subtitle": "Transfer-Files-tofrom-the-HPC", + "subtitle": "First-Time-connection-to-the-HPC-infrastructure", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 2, "directory": "connecting", "parent_title": "Connecting-to-the-HPC-infrastructure", "previous_title": "connecting_paragraph_10", "next_title": "connecting_paragraph_12", "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#transfer-files-tofrom-the-hpc" + "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#first-time-connection-to-the-hpc-infrastructure" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1.txt index 8d0031fcca9f..d872c89a0f83 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1.txt @@ -1,12 +1,6 @@ -Transfer Files tofrom the HPC -Using scp -Secure copy or SCP is a tool (command) for securely transferring files between a local -host (= your computer) and a remote host (the HPC). It is based on the -Secure Shell (SSH) protocol. The scp command is the equivalent of the cp (i.e., -copy) command, but can copy files to or from remote machines. -It's easier to copy files directly to $VSC_DATA and $VSC_SCRATCH if -you have symlinks to them in your home directory. See -the chapter titled "Uploading/downloading/editing files", section "Symlinks for data/scratch" in the intro to Linux - for how to do this. -Open an additional terminal window and check that you're working on your -local machine. +Transfer Files to/from the HPC +Before you can do some work, you'll have to transfer the files you need from your desktop or department to the cluster. At the end of a job, you might want to transfer some files back. +The preferred way to transfer files is by using an scp or sftp via the +secure OpenSSH protocol. ships with an implementation of OpenSSH, so you +don't need to install any third-party software to use it. Just open a +terminal window and jump in! diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1_metadata.json index 19eba778d90c..081156a5d163 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1_metadata.json @@ -1,14 +1,12 @@ { "main_title": "connecting", - "subtitle": "Using-scp", - "title_depth": 3, + "subtitle": "Transfer-Files-tofrom-the-HPC", + "source_file": "../../mkdocs/docs/HPC/connecting.md", + "title_depth": 2, "directory": "connecting", - "parent_title": "Transfer-Files-tofrom-the-HPC", - "links": { - "0": "https://docs.hpc.ugent.be/connecting/localhost:8000/Gent//intro-Linux/uploading_files/#symlinks-for-datascratch" - }, + "parent_title": "Connecting-to-the-HPC-infrastructure", "previous_title": "connecting_paragraph_11", - "next_title": "connecting_linux_paragraph_12.2", + "next_title": "connecting_paragraph_13", "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-scp" + "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#transfer-files-tofrom-the-hpc" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1.txt index a0496edfb14b..8d0031fcca9f 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1.txt @@ -1,10 +1,12 @@ Transfer Files tofrom the HPC -Using a GUI -If you prefer a GUI to transfer files back and forth to the HPC, you can -use your file browser. Open your file browser and press -++"Ctrl"+"l"++ -This should open up a address bar where you can enter a URL. -Alternatively, look for the "connect to server" option in your file -browsers menu. -Enter: sftp://vsc40000@login.hpc.ugent.be/ and press enter. -You should now be able to browse files on the HPC in your file browser. +Using scp +Secure copy or SCP is a tool (command) for securely transferring files between a local +host (= your computer) and a remote host (the HPC). It is based on the +Secure Shell (SSH) protocol. The scp command is the equivalent of the cp (i.e., +copy) command, but can copy files to or from remote machines. +It's easier to copy files directly to $VSC_DATA and $VSC_SCRATCH if +you have symlinks to them in your home directory. See +the chapter titled "Uploading/downloading/editing files", section "Symlinks for data/scratch" in the intro to Linux + for how to do this. +Open an additional terminal window and check that you're working on your +local machine. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1_metadata.json index d634a356654e..988c10028d80 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1_metadata.json @@ -1,11 +1,15 @@ { "main_title": "connecting", - "subtitle": "Using-a-GUI", + "subtitle": "Using-scp", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 3, "directory": "connecting", "parent_title": "Transfer-Files-tofrom-the-HPC", + "links": { + "0": "https://docs.hpc.ugent.be/connecting/localhost:8000/Gent//intro-Linux/uploading_files/#symlinks-for-datascratch" + }, "previous_title": "connecting_paragraph_12", - "next_title": "connecting_paragraph_14", + "next_title": "connecting_linux_paragraph_13.2", "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-a-gui" + "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-scp" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.2.txt similarity index 100% rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.2.txt rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.2.txt diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.2_metadata.json similarity index 61% rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.2_metadata.json rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.2_metadata.json index 0b3a3418c55d..43affa4e36c7 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.2_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.2_metadata.json @@ -1,11 +1,12 @@ { "main_title": "connecting", "subtitle": "Using-scp", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 3, "directory": "connecting", "parent_title": "Transfer-Files-tofrom-the-HPC", - "previous_title": "connecting_linux_paragraph_12.1", - "next_title": "connecting_linux_paragraph_12.3", + "previous_title": "connecting_linux_paragraph_13.1", + "next_title": "connecting_linux_paragraph_13.3", "OS": "linux", "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-scp" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.3.txt similarity index 100% rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.3.txt rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.3.txt diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.3_metadata.json similarity index 61% rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.4_metadata.json rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.3_metadata.json index 5a401911cab7..ccc74bb5b940 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.4_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.3_metadata.json @@ -1,11 +1,12 @@ { "main_title": "connecting", "subtitle": "Using-scp", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 3, "directory": "connecting", "parent_title": "Transfer-Files-tofrom-the-HPC", - "previous_title": "connecting_linux_paragraph_12.3", - "next_title": "connecting_linux_paragraph_12.5", + "previous_title": "connecting_linux_paragraph_13.2", + "next_title": "connecting_linux_paragraph_13.4", "OS": "linux", "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-scp" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.4.txt similarity index 100% rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.4.txt rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.4.txt diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.4_metadata.json similarity index 61% rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.3_metadata.json rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.4_metadata.json index 5624749ede84..9ffcc4121f41 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.3_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.4_metadata.json @@ -1,11 +1,12 @@ { "main_title": "connecting", "subtitle": "Using-scp", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 3, "directory": "connecting", "parent_title": "Transfer-Files-tofrom-the-HPC", - "previous_title": "connecting_linux_paragraph_12.2", - "next_title": "connecting_linux_paragraph_12.4", + "previous_title": "connecting_linux_paragraph_13.3", + "next_title": "connecting_linux_paragraph_13.5", "OS": "linux", "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-scp" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.5.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.5.txt similarity index 100% rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.5.txt rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.5.txt diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.5_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.5_metadata.json similarity index 61% rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.5_metadata.json rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.5_metadata.json index a479f66e7e04..8e3b4056b6b0 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.5_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.5_metadata.json @@ -1,11 +1,12 @@ { "main_title": "connecting", "subtitle": "Using-sftp", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 3, "directory": "connecting", "parent_title": "Transfer-Files-tofrom-the-HPC", - "previous_title": "connecting_linux_paragraph_12.4", - "next_title": "connecting_linux_paragraph_12.6", + "previous_title": "connecting_linux_paragraph_13.4", + "next_title": "connecting_linux_paragraph_13.6", "OS": "linux", "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-sftp" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.6.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.6.txt similarity index 100% rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.6.txt rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.6.txt diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.6_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.6_metadata.json similarity index 65% rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.6_metadata.json rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.6_metadata.json index 9c744fd5133a..1fc868ffab22 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.6_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.6_metadata.json @@ -1,6 +1,7 @@ { "main_title": "connecting", "subtitle": "Using-sftp", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 3, "directory": "connecting", "parent_title": "Transfer-Files-tofrom-the-HPC", @@ -8,8 +9,8 @@ "0": "", "1": "" }, - "previous_title": "connecting_linux_paragraph_12.5", - "next_title": "connecting_linux_paragraph_12.7", + "previous_title": "connecting_linux_paragraph_13.5", + "next_title": "connecting_linux_paragraph_13.7", "OS": "linux", "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-sftp" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_14.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_14.1.txt new file mode 100644 index 000000000000..a0496edfb14b --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_14.1.txt @@ -0,0 +1,10 @@ +Transfer Files tofrom the HPC +Using a GUI +If you prefer a GUI to transfer files back and forth to the HPC, you can +use your file browser. Open your file browser and press +++"Ctrl"+"l"++ +This should open up a address bar where you can enter a URL. +Alternatively, look for the "connect to server" option in your file +browsers menu. +Enter: sftp://vsc40000@login.hpc.ugent.be/ and press enter. +You should now be able to browse files on the HPC in your file browser. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_14.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_14.1_metadata.json new file mode 100644 index 000000000000..e3c48fe48297 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_14.1_metadata.json @@ -0,0 +1,12 @@ +{ + "main_title": "connecting", + "subtitle": "Using-a-GUI", + "source_file": "../../mkdocs/docs/HPC/connecting.md", + "title_depth": 3, + "directory": "connecting", + "parent_title": "Transfer-Files-tofrom-the-HPC", + "previous_title": "connecting_paragraph_13", + "next_title": "connecting_paragraph_15", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-a-gui" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1_metadata.json index 05996eb5df2c..55613bca732c 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1_metadata.json @@ -1,6 +1,7 @@ { "main_title": "connecting", "subtitle": "Connect", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 3, "directory": "connecting", "parent_title": "First-Time-connection-to-the-HPC-infrastructure", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2_metadata.json index 85a826e41a3e..21b63518804c 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2_metadata.json @@ -1,6 +1,7 @@ { "main_title": "connecting", "subtitle": "Connect", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 3, "directory": "connecting", "parent_title": "First-Time-connection-to-the-HPC-infrastructure", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1.txt index b2734cc9f897..dfc592117923 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1.txt @@ -1,4 +1,7 @@ Adding multiple SSH public keys (optional) +In case you are connecting from different computers to the login nodes, +it is advised to use separate SSH public keys to do so. You should +follow these steps. 1. Create a new public/private SSH key pair from the new computer. Repeat the process described in sectionĀ Generate a public/private key pair with OpenSSH. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1_metadata.json index dd8b3400419f..d9d3c33f876c 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Adding-multiple-SSH-public-keys-(optional)", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Applying-for-the-account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1_metadata.json index 33d083958b99..5400014a85c4 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "How-do-SSH-keys-work", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1_metadata.json index c75d6aede582..028d9d25f7fd 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Test-OpenSSH", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2_metadata.json index 7f6c80a32f64..dfec6f6fd5a8 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Test-OpenSSH", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3_metadata.json index 7c0f0d2a04d5..5a10e780b451 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4_metadata.json index 346108200ac7..8da465c1f24e 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5_metadata.json index 25baa1e073f3..9d6f7b1a741a 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1_metadata.json index b8931a423d3c..17a34a2f80b6 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Using-an-SSH-agent-(optional", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1_metadata.json index c43391b146ec..f9b6c751fd41 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Using-an-SSH-agent-(optional)", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2_metadata.json index 519b58bb1513..072a43cb3e43 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Using-an-SSH-agent-(optional)", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1.txt index d11380c25196..5a5a52da0629 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1.txt @@ -1,11 +1,12 @@ Applying for the account +After you log in using your UGent login and password, you will be asked to +upload the file that contains your public key, i.e., the file +"id_rsa.pub" which you have generated earlier. Make sure that your +public key is actually accepted for upload, because if it is in a wrong +format, wrong type or too short, then it will be refused. This file has been stored in the directory "~/.ssh/". tip As ".ssh" is an invisible directory, the Finder will not show it by default. The easiest way to access the folder, is by pressing ++cmd+shift+g++ (or ++cmd+shift+"."++), which will allow you to enter the name of a directory, which you would like to open in Finder. Here, type "~/.ssh" and press enter. -After you have uploaded your public key you will receive an e-mail with -a link to confirm your e-mail address. After confirming your e-mail -address the VSC staff will review and if applicable approve your -account. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1_metadata.json index 6b6e8c727031..86c8c2048bfd 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Applying-for-the-account", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 2, "directory": "account", "parent_title": "account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1.txt deleted file mode 100644 index 4c8894438c9f..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_10.1.txt +++ /dev/null @@ -1,33 +0,0 @@ -First Time connection to the HPC infrastructure - A locale is a set of parameters that defines the user's language, country and - any special variant preferences that the user wants to see in their user - interface. Usually a locale identifier consists of at least a language - identifier and a region identifier. - Open the .bashrc on your local machine with your favourite editor and - add the following lines: - - $ nano ~/.bashrc - ... - export LANGUAGE="en_US.UTF-8" - export LC_ALL="en_US.UTF-8" - export LC_CTYPE="en_US.UTF-8" - export LANG="en_US.UTF-8" - ... - - tip "tip: vi" - To start entering text in vi: move to the place you want to start - entering text with the arrow keys and type "i" to switch to insert mode. You can easily exit vi by entering: ""ESC" :wq" - To exit vi without saving your changes, enter ""ESC":q!" - - - or alternatively (if you are not comfortable with the Linux editors), - again on your local machine: - - echo "export LANGUAGE=\"en_US.UTF-8\"" >> ~/.profile - echo "export LC_ALL=\"en_US.UTF-8\"" >> ~/.profile - echo "export LC_CTYPE=\"en_US.UTF-8\"" >> ~/.profile - echo "export LANG=\"en_US.UTF-8\"" >> ~/.profile - - You can now log out, open a new terminal/shell on your local machine and - reconnect to the login node, and you should not get these warnings anymore. - \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1.txt index d872c89a0f83..1d9129245359 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1.txt @@ -1,6 +1,37 @@ -Transfer Files to/from the HPC -Before you can do some work, you'll have to transfer the files you need from your desktop or department to the cluster. At the end of a job, you might want to transfer some files back. -The preferred way to transfer files is by using an scp or sftp via the -secure OpenSSH protocol. ships with an implementation of OpenSSH, so you -don't need to install any third-party software to use it. Just open a -terminal window and jump in! +First Time connection to the HPC infrastructure + A locale is a set of parameters that defines the user's language, country and + any special variant preferences that the user wants to see in their user + interface. Usually a locale identifier consists of at least a language + identifier and a region identifier. + Note + If you try to set a non-supported locale, then it will be automatically + set to the default. Currently the default is en_US.UFT-8 or en_US, + depending on whether your originally (non-supported) locale was UTF-8 or not. + Open the .bashrc on your local machine with your favourite editor and + add the following lines: + + $ nano ~/.bashrc + ... + export LANGUAGE="en_US.UTF-8" + export LC_ALL="en_US.UTF-8" + export LC_CTYPE="en_US.UTF-8" + export LANG="en_US.UTF-8" + ... + + tip "tip: vi" + To start entering text in vi: move to the place you want to start + entering text with the arrow keys and type "i" to switch to insert mode. You can easily exit vi by entering: ""ESC" :wq" + To exit vi without saving your changes, enter ""ESC":q!" + + + or alternatively (if you are not comfortable with the Linux editors), + again on your local machine: + + echo "export LANGUAGE=\"en_US.UTF-8\"" >> ~/.profile + echo "export LC_ALL=\"en_US.UTF-8\"" >> ~/.profile + echo "export LC_CTYPE=\"en_US.UTF-8\"" >> ~/.profile + echo "export LANG=\"en_US.UTF-8\"" >> ~/.profile + + You can now log out, open a new terminal/shell on your local machine and + reconnect to the login node, and you should not get these warnings anymore. + \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1_metadata.json index 1425455ade89..323292b910e3 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1_metadata.json @@ -1,11 +1,12 @@ { "main_title": "connecting", - "subtitle": "Transfer-Files-tofrom-the-HPC", + "subtitle": "First-Time-connection-to-the-HPC-infrastructure", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 2, "directory": "connecting", "parent_title": "Connecting-to-the-HPC-infrastructure", "previous_title": "connecting_paragraph_10", "next_title": "connecting_paragraph_12", "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#transfer-files-tofrom-the-hpc" + "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#first-time-connection-to-the-hpc-infrastructure" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1.txt index 8d0031fcca9f..d872c89a0f83 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1.txt @@ -1,12 +1,6 @@ -Transfer Files tofrom the HPC -Using scp -Secure copy or SCP is a tool (command) for securely transferring files between a local -host (= your computer) and a remote host (the HPC). It is based on the -Secure Shell (SSH) protocol. The scp command is the equivalent of the cp (i.e., -copy) command, but can copy files to or from remote machines. -It's easier to copy files directly to $VSC_DATA and $VSC_SCRATCH if -you have symlinks to them in your home directory. See -the chapter titled "Uploading/downloading/editing files", section "Symlinks for data/scratch" in the intro to Linux - for how to do this. -Open an additional terminal window and check that you're working on your -local machine. +Transfer Files to/from the HPC +Before you can do some work, you'll have to transfer the files you need from your desktop or department to the cluster. At the end of a job, you might want to transfer some files back. +The preferred way to transfer files is by using an scp or sftp via the +secure OpenSSH protocol. ships with an implementation of OpenSSH, so you +don't need to install any third-party software to use it. Just open a +terminal window and jump in! diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1_metadata.json index 332e6ed2996f..8a420f36c2bd 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1_metadata.json @@ -1,14 +1,12 @@ { "main_title": "connecting", - "subtitle": "Using-scp", - "title_depth": 3, + "subtitle": "Transfer-Files-tofrom-the-HPC", + "source_file": "../../mkdocs/docs/HPC/connecting.md", + "title_depth": 2, "directory": "connecting", - "parent_title": "Transfer-Files-tofrom-the-HPC", - "links": { - "0": "https://docs.hpc.ugent.be/connecting/localhost:8000/Gent//intro-Linux/uploading_files/#symlinks-for-datascratch" - }, + "parent_title": "Connecting-to-the-HPC-infrastructure", "previous_title": "connecting_paragraph_11", - "next_title": "connecting_macos_paragraph_12.2", + "next_title": "connecting_paragraph_13", "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-scp" + "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#transfer-files-tofrom-the-hpc" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.2.txt deleted file mode 100644 index f1da0677a677..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.2.txt +++ /dev/null @@ -1,17 +0,0 @@ -$ hostname - -If you're still using the terminal that is connected to the HPC, close the -connection by typing "exit" in the terminal window. -For example, we will copy the (local) file "localfile.txt" to your -home directory on the HPC cluster. We first generate a small dummy -"localfile.txt", which contains the word "Hello". Use your own VSC -account, which is something like "vsc40000". Don't forget the colon (:) at the -end: if you forget it, it will just create a file named vsc40000@login.hpc.ugent.be on your -local filesystem. You can even specify where to save the file on the -remote filesystem by putting a path after the colon. -$ echo "Hello" > localfile.txt -$ ls -l -... --rw-r--r-- 1 user staff 6 Sep 18 09:37 localfile.txt -$ scp localfile.txt vsc40000@login.hpc.ugent.be: -localfile.txt 100% 6 0.0KB/s 00:00 diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.2_metadata.json deleted file mode 100644 index d86cdd989ac6..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.2_metadata.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Using-scp", - "title_depth": 3, - "directory": "connecting", - "parent_title": "Transfer-Files-tofrom-the-HPC", - "previous_title": "connecting_macos_paragraph_12.1", - "next_title": "connecting_macos_paragraph_12.3", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-scp" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1.txt index 20a4acb40a80..8d0031fcca9f 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1.txt @@ -1,15 +1,12 @@ Transfer Files tofrom the HPC -Using a GUI (Cyberduck) -Cyberduck is a graphical alternative to the scp command. It can be -installed from . -This is the one-time setup you will need to do before connecting: -1. After starting Cyberduck, the Bookmark tab will show up. To add a - new bookmark, click on the "+" sign on the bottom left of the - window. A new window will open. -2. In the drop-down menu on top, select "SFTP (SSH File Transfer Protocol)". -3. In the "Server" field, type in login.hpc.ugent.be. In the "Username" field, type in - your VSC account id (this looks like vsc40000). -4. Select the location of your SSH private key in the "SSH Private Key" field. -5. Finally, type in a name for the bookmark in the "Nickname" field and - close the window by pressing on the red circle in the top left - corner of the window. +Using scp +Secure copy or SCP is a tool (command) for securely transferring files between a local +host (= your computer) and a remote host (the HPC). It is based on the +Secure Shell (SSH) protocol. The scp command is the equivalent of the cp (i.e., +copy) command, but can copy files to or from remote machines. +It's easier to copy files directly to $VSC_DATA and $VSC_SCRATCH if +you have symlinks to them in your home directory. See +the chapter titled "Uploading/downloading/editing files", section "Symlinks for data/scratch" in the intro to Linux + for how to do this. +Open an additional terminal window and check that you're working on your +local machine. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1_metadata.json index bd02ed8502fe..9ec843ff0aa8 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1_metadata.json @@ -1,11 +1,15 @@ { "main_title": "connecting", - "subtitle": "Using-a-GUI-(Cyberduck)", + "subtitle": "Using-scp", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 3, "directory": "connecting", "parent_title": "Transfer-Files-tofrom-the-HPC", + "links": { + "0": "https://docs.hpc.ugent.be/connecting/localhost:8000/Gent//intro-Linux/uploading_files/#symlinks-for-datascratch" + }, "previous_title": "connecting_paragraph_12", "next_title": "connecting_macos_paragraph_13.2", "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-a-gui-cyberduck" + "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-scp" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2.txt index 1d20edf411f8..f1da0677a677 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2.txt @@ -1,3 +1,17 @@ -To open the connection, click on the "Bookmarks" icon (which -resembles an open book) and double-click on the bookmark you just -created. +$ hostname + +If you're still using the terminal that is connected to the HPC, close the +connection by typing "exit" in the terminal window. +For example, we will copy the (local) file "localfile.txt" to your +home directory on the HPC cluster. We first generate a small dummy +"localfile.txt", which contains the word "Hello". Use your own VSC +account, which is something like "vsc40000". Don't forget the colon (:) at the +end: if you forget it, it will just create a file named vsc40000@login.hpc.ugent.be on your +local filesystem. You can even specify where to save the file on the +remote filesystem by putting a path after the colon. +$ echo "Hello" > localfile.txt +$ ls -l +... +-rw-r--r-- 1 user staff 6 Sep 18 09:37 localfile.txt +$ scp localfile.txt vsc40000@login.hpc.ugent.be: +localfile.txt 100% 6 0.0KB/s 00:00 diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2_metadata.json index 344ff690d546..dc57de365bf1 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2_metadata.json @@ -1,11 +1,12 @@ { "main_title": "connecting", - "subtitle": "Using-a-GUI-(Cyberduck)", + "subtitle": "Using-scp", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 3, "directory": "connecting", "parent_title": "Transfer-Files-tofrom-the-HPC", "previous_title": "connecting_macos_paragraph_13.1", - "next_title": "connecting_paragraph_14", + "next_title": "connecting_macos_paragraph_13.3", "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-a-gui-cyberduck" + "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-scp" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.3.txt similarity index 100% rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.3.txt rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.3.txt diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.3_metadata.json similarity index 61% rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.3_metadata.json rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.3_metadata.json index 4fcc42d23375..5a4623c650db 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.3_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.3_metadata.json @@ -1,11 +1,12 @@ { "main_title": "connecting", "subtitle": "Using-scp", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 3, "directory": "connecting", "parent_title": "Transfer-Files-tofrom-the-HPC", - "previous_title": "connecting_macos_paragraph_12.2", - "next_title": "connecting_macos_paragraph_12.4", + "previous_title": "connecting_macos_paragraph_13.2", + "next_title": "connecting_macos_paragraph_13.4", "OS": "macos", "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-scp" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.4.txt similarity index 100% rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.4.txt rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.4.txt diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.4_metadata.json similarity index 61% rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.4_metadata.json rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.4_metadata.json index 757b533cf8df..54b3fe19d58f 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.4_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.4_metadata.json @@ -1,11 +1,12 @@ { "main_title": "connecting", "subtitle": "Using-scp", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 3, "directory": "connecting", "parent_title": "Transfer-Files-tofrom-the-HPC", - "previous_title": "connecting_macos_paragraph_12.3", - "next_title": "connecting_macos_paragraph_12.5", + "previous_title": "connecting_macos_paragraph_13.3", + "next_title": "connecting_macos_paragraph_13.5", "OS": "macos", "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-scp" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.5.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.5.txt similarity index 100% rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.5.txt rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.5.txt diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.5_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.5_metadata.json similarity index 61% rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.5_metadata.json rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.5_metadata.json index d18c7c7deb5c..0b9ba08e3b11 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.5_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.5_metadata.json @@ -1,11 +1,12 @@ { "main_title": "connecting", "subtitle": "Using-sftp", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 3, "directory": "connecting", "parent_title": "Transfer-Files-tofrom-the-HPC", - "previous_title": "connecting_macos_paragraph_12.4", - "next_title": "connecting_macos_paragraph_12.6", + "previous_title": "connecting_macos_paragraph_13.4", + "next_title": "connecting_macos_paragraph_13.6", "OS": "macos", "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-sftp" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.6.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.6.txt similarity index 100% rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.6.txt rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.6.txt diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.6_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.6_metadata.json similarity index 65% rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.6_metadata.json rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.6_metadata.json index a8a4f2a3bab0..fe899ad9dbc4 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.6_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.6_metadata.json @@ -1,6 +1,7 @@ { "main_title": "connecting", "subtitle": "Using-sftp", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 3, "directory": "connecting", "parent_title": "Transfer-Files-tofrom-the-HPC", @@ -8,8 +9,8 @@ "0": "", "1": "" }, - "previous_title": "connecting_macos_paragraph_12.5", - "next_title": "connecting_macos_paragraph_12.7", + "previous_title": "connecting_macos_paragraph_13.5", + "next_title": "connecting_macos_paragraph_13.7", "OS": "macos", "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-sftp" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.1.txt new file mode 100644 index 000000000000..20a4acb40a80 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.1.txt @@ -0,0 +1,15 @@ +Transfer Files tofrom the HPC +Using a GUI (Cyberduck) +Cyberduck is a graphical alternative to the scp command. It can be +installed from . +This is the one-time setup you will need to do before connecting: +1. After starting Cyberduck, the Bookmark tab will show up. To add a + new bookmark, click on the "+" sign on the bottom left of the + window. A new window will open. +2. In the drop-down menu on top, select "SFTP (SSH File Transfer Protocol)". +3. In the "Server" field, type in login.hpc.ugent.be. In the "Username" field, type in + your VSC account id (this looks like vsc40000). +4. Select the location of your SSH private key in the "SSH Private Key" field. +5. Finally, type in a name for the bookmark in the "Nickname" field and + close the window by pressing on the red circle in the top left + corner of the window. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.1_metadata.json new file mode 100644 index 000000000000..694b7682aa97 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.1_metadata.json @@ -0,0 +1,12 @@ +{ + "main_title": "connecting", + "subtitle": "Using-a-GUI-(Cyberduck)", + "source_file": "../../mkdocs/docs/HPC/connecting.md", + "title_depth": 3, + "directory": "connecting", + "parent_title": "Transfer-Files-tofrom-the-HPC", + "previous_title": "connecting_paragraph_13", + "next_title": "connecting_macos_paragraph_14.2", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-a-gui-cyberduck" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.2.txt new file mode 100644 index 000000000000..1d20edf411f8 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.2.txt @@ -0,0 +1,3 @@ +To open the connection, click on the "Bookmarks" icon (which +resembles an open book) and double-click on the bookmark you just +created. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.2_metadata.json new file mode 100644 index 000000000000..e32b1ab4c58e --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.2_metadata.json @@ -0,0 +1,12 @@ +{ + "main_title": "connecting", + "subtitle": "Using-a-GUI-(Cyberduck)", + "source_file": "../../mkdocs/docs/HPC/connecting.md", + "title_depth": 3, + "directory": "connecting", + "parent_title": "Transfer-Files-tofrom-the-HPC", + "previous_title": "connecting_macos_paragraph_14.1", + "next_title": "connecting_paragraph_15", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-a-gui-cyberduck" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1_metadata.json index e17629a55f3b..85b088b0e8c3 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1_metadata.json @@ -1,6 +1,7 @@ { "main_title": "connecting", "subtitle": "Connect", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 3, "directory": "connecting", "parent_title": "First-Time-connection-to-the-HPC-infrastructure", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2_metadata.json index 5c1d808739cc..047d58633612 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2_metadata.json @@ -1,6 +1,7 @@ { "main_title": "connecting", "subtitle": "Connect", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 3, "directory": "connecting", "parent_title": "First-Time-connection-to-the-HPC-infrastructure", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1.txt index 0863009f2906..ca00a8a0f651 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1.txt @@ -1,4 +1,7 @@ Adding multiple SSH public keys (optional) +In case you are connecting from different computers to the login nodes, +it is advised to use separate SSH public keys to do so. You should +follow these steps. 1. Create a new public/private SSH key pair from Putty. Repeat the process described in sectionĀ Generate a public/private key pair. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1_metadata.json index eb4dd3b3a57c..4614c053f2ce 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Adding-multiple-SSH-public-keys-(optional)", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Applying-for-the-account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json index ce74735c538f..7dc9b50fbdd2 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Get-PuTTY-A-free-telnetSSH-client", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json index 9616b41452a8..773acaabf239 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Generating-a-publicprivate-key-pair", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt index de5d164bb7a6..b082d381a64a 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt @@ -27,3 +27,11 @@ Start PuTTYgen.exe it and follow these steps: "C:\\Users\\%USERNAME%\\AppData\\Local\\PuTTY\\.ssh") with the buttons "Save public key" and "Save private key". We recommend using the name "id_rsa.pub" for the public key, and "id_rsa.ppk" for the private key. +6. Finally, save an "OpenSSH" version of your private key (in + particular for later "X2Go" usage, see x2go) by entering the + "Conversions" menu and selecting "Export OpenSSH key" (do not select the + "force new file format" variant). Save the file in the same location + as in the previous step with filename "id_rsa". (If there is no + "Conversions" menu, you must update your "puttygen" version. If you + want to do this conversion afterwards, you can start with loading an + existing "id_rsa.ppk" and only do this conversions export.) diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json index 06b6e998c081..d803aeadb25e 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json @@ -1,9 +1,13 @@ { "main_title": "account", "subtitle": "Generating-a-publicprivate-key-pair", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", + "links": { + "0": "" + }, "previous_title": "account_windows_paragraph_4.2", "next_title": "account_windows_paragraph_4.4", "OS": "windows", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4_metadata.json index fba810e72990..ebd55060657f 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Generating-a-publicprivate-key-pair", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1_metadata.json index 69771b48c868..5fd697066b62 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Using-an-SSH-agent-(optional)", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2_metadata.json index 246707080706..46808447a108 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Using-an-SSH-agent-(optional)", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3_metadata.json index d47ad3bd215d..e33d002d2485 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Using-an-SSH-agent-(optional)", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1.txt index 90c17263cf5f..9fd23612756d 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1.txt @@ -1,7 +1,8 @@ Applying for the account +After you log in using your UGent login and password, you will be asked to +upload the file that contains your public key, i.e., the file +"id_rsa.pub" which you have generated earlier. Make sure that your +public key is actually accepted for upload, because if it is in a wrong +format, wrong type or too short, then it will be refused. This file should have been stored in the directory "C:\\Users\\%USERNAME%\\AppData\\Local\\PuTTY\\.ssh" -After you have uploaded your public key you will receive an e-mail with -a link to confirm your e-mail address. After confirming your e-mail -address the VSC staff will review and if applicable approve your -account. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1_metadata.json index d01ac9c3c16e..87cda41283f4 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1_metadata.json @@ -1,6 +1,7 @@ { "main_title": "account", "subtitle": "Applying-for-the-account", + "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 2, "directory": "account", "parent_title": "account", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_10.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_10.1.txt deleted file mode 100644 index aaf5a585ebd5..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_10.1.txt +++ /dev/null @@ -1,5 +0,0 @@ -First Time connection to the HPC infrastructure - A locale is a set of parameters that defines the user's language, country and - any special variant preferences that the user wants to see in their user - interface. Usually a locale identifier consists of at least a language - identifier and a region identifier. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_10.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_10.1_metadata.json deleted file mode 100644 index 45c2bd2d90e4..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_10.1_metadata.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "First-Time-connection-to-the-HPC-infrastructure", - "title_depth": 2, - "directory": "connecting", - "parent_title": "Connecting-to-the-HPC-infrastructure", - "previous_title": "connecting_paragraph_9", - "next_title": "connecting_paragraph_11", - "OS": "windows", - "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#first-time-connection-to-the-hpc-infrastructure" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1.txt index a4f00ba7a5ff..5aa8ca033740 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1.txt @@ -1,20 +1,9 @@ -Transfer Files to/from the HPC -Before you can do some work, you'll have to transfer the files you need from your desktop or department to the cluster. At the end of a job, you might want to transfer some files back. -WinSCP -To transfer files to and from the cluster, we recommend the use of -WinSCP, a graphical file management tool which can transfer files using -secure protocols such as SFTP and SCP. WinSCP is freely available from -. -To transfer your files using WinSCP, -1. Open the program -2. The "Login" menu is shown automatically (if it is closed, click "New Session" to open it again). Fill in the necessary fields under "Session" - 1. Click "New Site". - 2. Enter "login.hpc.ugent.be" in the "Host name" field. - 3. Enter your "vsc-account" in the "User name" field. - 4. Select "SCP" as the "file" protocol. - 5. Note that the password field remains empty. - 6. Click "Advanced...". - 7. Click "SSH > Authentication". - 8. Select your private key in the field "Private key file". -3. Press the "Save" button, to save the session under "Session > Sites" for future access. -4. Finally, when clicking on "Login", you will be asked for your key passphrase. +First Time connection to the HPC infrastructure + A locale is a set of parameters that defines the user's language, country and + any special variant preferences that the user wants to see in their user + interface. Usually a locale identifier consists of at least a language + identifier and a region identifier. + Note + If you try to set a non-supported locale, then it will be automatically + set to the default. Currently the default is en_US.UFT-8 or en_US, + depending on whether your originally (non-supported) locale was UTF-8 or not. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1_metadata.json index d9fbc64790ad..d4b02dbc9fb4 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1_metadata.json @@ -1,11 +1,12 @@ { "main_title": "connecting", - "subtitle": "WinSCP", - "title_depth": 3, + "subtitle": "First-Time-connection-to-the-HPC-infrastructure", + "source_file": "../../mkdocs/docs/HPC/connecting.md", + "title_depth": 2, "directory": "connecting", - "parent_title": "First-Time-connection-to-the-HPC-infrastructure", + "parent_title": "Connecting-to-the-HPC-infrastructure", "previous_title": "connecting_paragraph_10", - "next_title": "connecting_windows_paragraph_11.2", + "next_title": "connecting_paragraph_12", "OS": "windows", - "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#winscp" + "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#first-time-connection-to-the-hpc-infrastructure" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.1.txt new file mode 100644 index 000000000000..67e5e4548529 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.1.txt @@ -0,0 +1,22 @@ +Transfer Files to/from the HPC +Before you can do some work, you'll have to transfer the files you need from your desktop or department to the cluster. At the end of a job, you might want to transfer some files back. +WinSCP +To transfer files to and from the cluster, we recommend the use of +WinSCP, a graphical file management tool which can transfer files using +secure protocols such as SFTP and SCP. WinSCP is freely available from +. +To transfer your files using WinSCP, +1. Open the program +2. The "Login" menu is shown automatically (if it is closed, click "New Session" to open it again). Fill in the necessary fields under "Session" + 1. Click "New Site". + 2. Enter "login.hpc.ugent.be" in the "Host name" field. + 3. Enter your "vsc-account" in the "User name" field. + 4. Select "SCP" as the "file" protocol. + 5. Note that the password field remains empty. + + 6. Click "Advanced...". + 7. Click "SSH > Authentication". + 8. Select your private key in the field "Private key file". +3. Press the "Save" button, to save the session under "Session > Sites" for future access. +4. Finally, when clicking on "Login", you will be asked for your key passphrase. + diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.1_metadata.json similarity index 63% rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.2_metadata.json rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.1_metadata.json index 65055dc07647..a4bbaee0f598 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.2_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.1_metadata.json @@ -1,11 +1,12 @@ { "main_title": "connecting", "subtitle": "WinSCP", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 3, "directory": "connecting", "parent_title": "First-Time-connection-to-the-HPC-infrastructure", - "previous_title": "connecting_windows_paragraph_11.1", - "next_title": "connecting_windows_paragraph_11.3", + "previous_title": "connecting_paragraph_11", + "next_title": "connecting_windows_paragraph_12.2", "OS": "windows", "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#winscp" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.2.txt similarity index 100% rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.2.txt rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.2.txt diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.2_metadata.json new file mode 100644 index 000000000000..80a8ef763a1b --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.2_metadata.json @@ -0,0 +1,12 @@ +{ + "main_title": "connecting", + "subtitle": "WinSCP", + "source_file": "../../mkdocs/docs/HPC/connecting.md", + "title_depth": 3, + "directory": "connecting", + "parent_title": "First-Time-connection-to-the-HPC-infrastructure", + "previous_title": "connecting_windows_paragraph_12.1", + "next_title": "connecting_windows_paragraph_12.3", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#winscp" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.3.txt similarity index 100% rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.3.txt rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.3.txt diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.3_metadata.json similarity index 63% rename from scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.3_metadata.json rename to scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.3_metadata.json index dd628f8e8cd6..07760730d56f 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.3_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.3_metadata.json @@ -1,11 +1,12 @@ { "main_title": "connecting", "subtitle": "WinSCP", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 3, "directory": "connecting", "parent_title": "First-Time-connection-to-the-HPC-infrastructure", - "previous_title": "connecting_windows_paragraph_11.2", - "next_title": "connecting_paragraph_12", + "previous_title": "connecting_windows_paragraph_12.2", + "next_title": "connecting_paragraph_13", "OS": "windows", "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#winscp" } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt index 69db57957dce..e45f4e63b85b 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt @@ -4,6 +4,7 @@ You've generated a public/private key pair with PuTTYgen and have an approved account on the VSC clusters. The next step is to setup the connection to (one of) the HPC. In the screenshots, we show the setup for user +"vsc20167" to the HPC cluster via the login node "login.hpc.ugent.be". 1. Start the PuTTY executable putty.exe in your directory C:\Program Files (x86)\PuTTY and the configuration screen will pop diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json index ef4de8bd8e47..8b6b6f698d1a 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json @@ -1,6 +1,7 @@ { "main_title": "connecting", "subtitle": "Open-a-Terminal", + "source_file": "../../mkdocs/docs/HPC/connecting.md", "title_depth": 3, "directory": "connecting", "parent_title": "First-Time-connection-to-the-HPC-infrastructure", From 9e297b18ef9827a20a1283053ad49c3e081044e7 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Thu, 29 Aug 2024 15:29:52 +0200 Subject: [PATCH 136/145] new test for links --- scripts/HPC_chatbot_preprocessor/README.md | 6 +- .../chatbot_parser.py | 89 ++++++++++++++----- .../account/account_paragraph_1_metadata.json | 2 +- .../connecting_paragraph_15_metadata.json | 2 +- .../connecting_paragraph_3_metadata.json | 2 +- .../connecting_paragraph_8_metadata.json | 2 +- .../account_linux_paragraph_4.1_metadata.json | 2 +- .../account_linux_paragraph_7.1_metadata.json | 2 +- ...necting_linux_paragraph_13.1_metadata.json | 2 +- ...necting_linux_paragraph_13.6_metadata.json | 4 +- ...nnecting_linux_paragraph_5.1_metadata.json | 2 +- .../account_macos_paragraph_4.1_metadata.json | 2 +- .../account_macos_paragraph_7.1_metadata.json | 2 +- ...ccount_windows_paragraph_4.1_metadata.json | 2 +- ...ccount_windows_paragraph_4.2_metadata.json | 4 +- ...ccount_windows_paragraph_4.3_metadata.json | 2 +- ...ccount_windows_paragraph_6.2_metadata.json | 4 +- ...ecting_windows_paragraph_4.1_metadata.json | 2 +- .../tests/test_links.py | 69 ++++++++++++++ 19 files changed, 158 insertions(+), 44 deletions(-) create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_links.py diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md index 27c1bf3fea6a..96a99498451f 100644 --- a/scripts/HPC_chatbot_preprocessor/README.md +++ b/scripts/HPC_chatbot_preprocessor/README.md @@ -172,7 +172,7 @@ Any comments within the markdown files (for example TODO's) should follow the fo Comments can be written in such a way that the script will keep them as input for the bot. To do that, the marker `INPUT_FOR_BOT` should be put in front of the content of the comment as such. ``` - + ``` This will be reworked to @@ -190,3 +190,7 @@ Due to the nature of this script, it can generate large directories with very lo ### Markdown lists The parser is made in a way to detect lists and not split them in multiple paragraphs. The kinds of lists it can detect is all lists with denominators `-`, `+`, `*` and list indexed with numbers or letters (one letter per list entry). It can handle list entries being spread out over multiple lines if there is an indentation of at least two spaces. It can also handle multiple paragraph list entries in this way, as long as the indentation stays. + +### Links + +Part of the metadata of the parser are links. In order for the links to be built up in the right way, links to external sites should always start with either `https://` or `http://`. diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 3129ccaf5664..9aa7dc972e54 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -104,7 +104,7 @@ METADATA_EXTENSION = "_metadata" # Marker for comments for the bot -INPUT_FOR_BOT = "INPUT_FOR_BOT" +INPUT_FOR_BOT = "INPUT_FOR_BOT: " # Standard strings for verbose output LINE = "------------------------------------------------------------------------------------------------------\n" @@ -138,7 +138,46 @@ def check_for_title(line, in_code_block, curr_dirs, options): return 0 -def replace_markdown_markers(curr_line, linklist, in_code_block, main_title): +def make_valid_link(link, main_title, is_linux_tutorial): + """ + Function that converts a string to a valid link to be used in the metadata + + :param link: the input string to be turned into a valid link + :param main_title: the main title of the file that contains the link + :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial + :return link: the valid link + """ + + # ugly fix for problem with links + linux_tutorial_files = ["beyond_the_basics", "common_pitfalls", "getting_started", "hpc_infrastructure", "index", "manipulating_files_and_directories", "navigating", "uploading_files"] + if is_linux_tutorial and any([linux_tutorial_files[i] in link for i in range(len(linux_tutorial_files))]): + linux_part = LINUX_TUTORIAL + '/' + else: + linux_part = "" + + if link.startswith('http://') or link.startswith('https://') or link.startswith('mailto:'): + pass + else: + if link.startswith("./"): + link = link.replace('./', '') + elif link.startswith("../"): + link = link.replace('../', '') + + if link.startswith("#"): + link = DOCS_URL + '/' + linux_part + main_title + "/" + link + elif link.endswith(".md") and ("/" not in link or "." not in link.split("/")[0]): + link = DOCS_URL + '/' + linux_part + link.replace(".md", "") + elif '.md#' in link: + link = DOCS_URL + '/' + linux_part + link.replace(".md", "/") + else: + link = DOCS_URL + '/' + linux_part + link + + link = link.replace('index/', '').replace('/index', '') + + return link + + +def replace_markdown_markers(curr_line, linklist, in_code_block, main_title, is_linux_tutorial): """ function that replaces certain markdown structures with the equivalent used on the website @@ -146,12 +185,13 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title): :param linklist: the list used to store links that need to be printed at the end of the file :param in_code_block: boolean indicating whether the current line is part of a code block :param main_title: the main title of the file that is being processed + :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial :return curr_line: the adapted current line :return linklist: the updated linklist """ # replace images with an empty line - if re.search(r'(?i)!\[image]\(.*?\)', curr_line) or re.search(r'!\[]\(img/.*?.png\)', curr_line): + if re.search(r'(?i)!\[image]\(.*?\)', curr_line) or re.search(r'!\[.*?]\(img/.*?\.png\)', curr_line): curr_line = "" # replace links with a reference @@ -159,13 +199,8 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title): if matches: for match in matches: curr_line = curr_line.replace(f"[{match[0]}]({match[1]})", match[0] + LINK_MARKER + str(len(linklist)) + LINK_MARKER) - if ".md" not in match[1]: - if "#" not in match[1]: - linklist.append(match[1]) - else: - linklist.append(DOCS_URL + "/" + main_title.replace(".md", "") + "/" + match[1]) - else: - linklist.append(DOCS_URL + "/" + match[1].replace(".md", "/").replace("index", "").rstrip("/")) + + linklist.append(make_valid_link(match[1], main_title, is_linux_tutorial)) # codeblock (with ``` -> always stands on a separate line, so line can be dropped) if '```' in curr_line: @@ -238,13 +273,14 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title): return curr_line, linklist -def split_text(file, main_title, options, current_paragraph_number=-1, OS=GENERIC): +def split_text(file, main_title, options, is_linux_tutorial, current_paragraph_number=-1, OS=GENERIC): """ Function that splits the text into smaller sections and makes them into two dictionaries containing text and metadata :param file: the filepath of the file to be split :param main_title: the main title of the file :param options: dictionary containing the options given by the user + :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial :param current_paragraph_number: number of the paragraph that is being split, only applicable when splitting an os-specific paragraph on paragraph level :param OS: the OS of the file to be split, only applicable when splitting an os-specific paragraph on paragraph level :return paragraphs_text: dictionary containing the split sections of text @@ -253,18 +289,19 @@ def split_text(file, main_title, options, current_paragraph_number=-1, OS=GENERI """ if options[SPLIT_ON_TITLES]: - return split_on_titles(file, main_title, options) + return split_on_titles(file, main_title, options, is_linux_tutorial) elif options[SPLIT_ON_PARAGRAPHS]: - return split_on_paragraphs(file, main_title, options, current_paragraph_number, OS) + return split_on_paragraphs(file, main_title, options, is_linux_tutorial, current_paragraph_number, OS) -def split_on_titles(file, main_title, options): +def split_on_titles(file, main_title, options, is_linux_tutorial): """ Function that splits the text into smaller sections based on the subtitle structure and makes them into two dictionaries containing text and metadata :param file: the filepath of the file to be split :param main_title: the main title of the file :param options: dictionary containing the options given by the user + :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial :return paragraphs_text: dictionary containing the split sections of text :return paragraphs_metadata: dictionary containing the metadata of each split section of text :return subtitle_order: list containing all encountered subtitles in order of appearance @@ -356,7 +393,7 @@ def split_on_titles(file, main_title, options): # line is not a title elif after_first_title: - line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title) + line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title, is_linux_tutorial) if line != "\n": current_paragraph += line @@ -366,7 +403,7 @@ def split_on_titles(file, main_title, options): last_dir = curr_dirs[last_title_level] else: previous_contained_if = True - line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title) + line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title, is_linux_tutorial) if line != "\n": current_paragraph += line @@ -384,13 +421,14 @@ def split_on_titles(file, main_title, options): return paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order -def split_on_paragraphs(file, main_title, options, current_paragraph_number=-1, OS=GENERIC): +def split_on_paragraphs(file, main_title, options, is_linux_tutorial, current_paragraph_number=-1, OS=GENERIC): """ Function that splits the text into smaller sections based on the paragraph structure and makes them into two dictionaries containing text and metadata :param file: the filepath of the file to be split :param main_title: the main title of the file :param options: dictionary containing the options given by the user + :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial :param current_paragraph_number: number of the paragraph that is being split, only applicable when splitting an os-specific paragraph :param OS: the OS of the file to be split, only applicable when splitting an os-specific paragraph :return paragraphs_text: dictionary containing the split sections of text @@ -524,12 +562,12 @@ def split_on_paragraphs(file, main_title, options, current_paragraph_number=-1, # make a new title metadata_title = make_valid_title(line[title_level + 1:-1]) - line, link_list = replace_markdown_markers(line[title_level + 1:], link_list, in_code_block, main_title) + line, link_list = replace_markdown_markers(line[title_level + 1:], link_list, in_code_block, main_title, is_linux_tutorial) current_paragraph += line # line is not a title or the beginning of a new paragraph elif line != "\n" or previous_contained_if: - line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title) + line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title, is_linux_tutorial) current_paragraph += line # keep track of title level and directory to write to metadata upon discovering a new subtitle @@ -538,7 +576,7 @@ def split_on_paragraphs(file, main_title, options, current_paragraph_number=-1, last_dir = curr_dirs[last_title_level] else: previous_contained_if = True - line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title) + line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title, is_linux_tutorial) current_paragraph += line # create a title for the last paragraph @@ -799,7 +837,7 @@ def make_valid_title(title): valid_filename = re.sub(invalid_chars, '', title) # Strip leading/trailing whitespace - valid_filename = valid_filename.strip().strip('-').replace(' ', '-') + valid_filename = valid_filename.strip().strip('-').replace(' ', '-').replace("--", "-") return valid_filename @@ -889,7 +927,10 @@ def write_files(title, text, paragraphs_metadata, title_order, title_order_numbe os_part = "" else: os_part = LINK_OS[OS] + "/" - metadata[REFERENCE_LINK] = DOCS_URL + "/" + os_part + linux_part + paragraphs_metadata[title][MAIN_TITLE] + "/#" + ''.join(char.lower() for char in paragraphs_metadata[title][SUBTITLE] if char.isalnum() or char == '-').strip('-') + if "index" not in paragraphs_metadata[title][MAIN_TITLE]: + metadata[REFERENCE_LINK] = DOCS_URL + "/" + os_part + linux_part + paragraphs_metadata[title][MAIN_TITLE] + "/#" + ''.join(char.lower() for char in paragraphs_metadata[title][SUBTITLE] if char.isalnum() or char == '-').strip('-') + else: + metadata[REFERENCE_LINK] = DOCS_URL # write metadata to file with open(os.path.join(filepath, file_title + METADATA_EXTENSION + ".json"), 'w') as writefile: @@ -964,7 +1005,7 @@ def split_and_write_os_specific_section(text, metadata, subtitle_order, title_or writefile.write(jinja_text) # split in right way - _, os_specific_text, os_specific_metadata, os_subtitle_order = split_text(TEMP_JINJA_FILE, metadata[MAIN_TITLE], options, current_paragraph_number=subtitle_order[title_order_number].split('_')[-1], OS=OS) + _, os_specific_text, os_specific_metadata, os_subtitle_order = split_text(TEMP_JINJA_FILE, metadata[MAIN_TITLE], options, is_linux_tutorial, current_paragraph_number=subtitle_order[title_order_number].split('_')[-1], OS=OS) # prepare variables to fix metadata total_subtitle_order = subtitle_order[:title_order_number] + os_subtitle_order + subtitle_order[title_order_number+1:] @@ -1110,7 +1151,7 @@ def main(options): print("\nSplitting the file for the first time (split in sufficiently small generic sections and large os-specific chunks)") # split the text in paragraphs - paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order = split_text(copy_file, main_title, options) + paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order = split_text(copy_file, main_title, options, is_linux_tutorial) if options[VERBOSE]: print("\nFurther splitting os-specific chunks and writing generic and os-specific sections to files with metadata") diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json index cdba091d7dfe..738d24cb42ed 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json @@ -5,7 +5,7 @@ "title_depth": 2, "directory": "account", "links": { - "0": "../sites/hpc_policies" + "0": "https://docs.hpc.ugent.be/sites/hpc_policies" }, "parent_title": "", "previous_title": null, diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15_metadata.json index ff9c22397d1c..74ea0125d713 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15_metadata.json @@ -5,7 +5,7 @@ "title_depth": 2, "directory": "connecting", "links": { - "0": "https://docs.hpc.ugent.be/connecting/../linux-tutorial/uploading_files/#copying-faster-with-rsync" + "0": "https://docs.hpc.ugent.be/linux-tutorial/uploading_files/#copying-faster-with-rsync" }, "parent_title": "", "previous_title": "connecting_paragraph_14", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json index e30467d07990..8d6b1696e08c 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json @@ -6,7 +6,7 @@ "directory": "connecting", "links": { "0": "https://docs.hpc.ugent.be/web_portal", - "1": "https://docs.hpc.ugent.be/connecting/../troubleshooting/#issues-connecting-to-login-node" + "1": "https://docs.hpc.ugent.be/troubleshooting/#issues-connecting-to-login-node" }, "parent_title": "", "previous_title": "connecting_paragraph_2", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8_metadata.json index 074e7e891ce0..38f265cfdcde 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8_metadata.json @@ -5,7 +5,7 @@ "title_depth": 2, "directory": "connecting", "links": { - "0": "../useful_linux_commands" + "0": "https://docs.hpc.ugent.be/useful_linux_commands" }, "parent_title": "", "previous_title": "connecting_paragraph_7", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1_metadata.json index bcc0552177dd..bc51f39d286c 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1_metadata.json @@ -6,7 +6,7 @@ "directory": "account", "parent_title": "Getting-ready-to-request-an-account", "links": { - "0": "../../linux-tutorial" + "0": "https://docs.hpc.ugent.be/linux-tutorial" }, "previous_title": "account_paragraph_3", "next_title": "account_paragraph_5", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1_metadata.json index 4214d6cb321f..2b3633d71e74 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1_metadata.json @@ -6,7 +6,7 @@ "directory": "account", "parent_title": "Getting-ready-to-request-an-account", "links": { - "0": "../connecting" + "0": "https://docs.hpc.ugent.be/connecting" }, "previous_title": "account_paragraph_6", "next_title": "account_linux_paragraph_7.2", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1_metadata.json index 988c10028d80..6b70790e1e36 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1_metadata.json @@ -6,7 +6,7 @@ "directory": "connecting", "parent_title": "Transfer-Files-tofrom-the-HPC", "links": { - "0": "https://docs.hpc.ugent.be/connecting/localhost:8000/Gent//intro-Linux/uploading_files/#symlinks-for-datascratch" + "0": "https://docs.hpc.ugent.be/localhost:8000/Gent//intro-Linux/uploading_files/#symlinks-for-datascratch" }, "previous_title": "connecting_paragraph_12", "next_title": "connecting_linux_paragraph_13.2", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.6_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.6_metadata.json index 1fc868ffab22..c7fe6bf6a44a 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.6_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.6_metadata.json @@ -6,8 +6,8 @@ "directory": "connecting", "parent_title": "Transfer-Files-tofrom-the-HPC", "links": { - "0": "", - "1": "" + "0": "https://docs.hpc.ugent.be/", + "1": "https://docs.hpc.ugent.be/" }, "previous_title": "connecting_linux_paragraph_13.5", "next_title": "connecting_linux_paragraph_13.7", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1_metadata.json index 55613bca732c..66c5dc4aeff7 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1_metadata.json @@ -6,7 +6,7 @@ "directory": "connecting", "parent_title": "First-Time-connection-to-the-HPC-infrastructure", "links": { - "0": "https://docs.hpc.ugent.be/connecting/../troubleshooting/#warning-message-when-first-connecting-to-new-host" + "0": "https://docs.hpc.ugent.be/troubleshooting/#warning-message-when-first-connecting-to-new-host" }, "previous_title": "connecting_paragraph_4", "next_title": "connecting_linux_paragraph_5.2", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1_metadata.json index 5400014a85c4..e3813cb647ef 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1_metadata.json @@ -6,7 +6,7 @@ "directory": "account", "parent_title": "Getting-ready-to-request-an-account", "links": { - "0": "../../linux-tutorial" + "0": "https://docs.hpc.ugent.be/linux-tutorial" }, "previous_title": "account_paragraph_3", "next_title": "account_paragraph_5", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1_metadata.json index f9b6c751fd41..18b3b3675deb 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1_metadata.json @@ -6,7 +6,7 @@ "directory": "account", "parent_title": "Getting-ready-to-request-an-account", "links": { - "0": "../connecting" + "0": "https://docs.hpc.ugent.be/connecting" }, "previous_title": "account_paragraph_6", "next_title": "account_macos_paragraph_7.2", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json index 7dc9b50fbdd2..dc5a8cb22b99 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json @@ -6,7 +6,7 @@ "directory": "account", "parent_title": "Getting-ready-to-request-an-account", "links": { - "0": "../../linux-tutorial" + "0": "https://docs.hpc.ugent.be/linux-tutorial" }, "previous_title": "account_paragraph_3", "next_title": "account_windows_paragraph_4.2", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json index 773acaabf239..534ebda0a1c3 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json @@ -6,8 +6,8 @@ "directory": "account", "parent_title": "Getting-ready-to-request-an-account", "links": { - "0": "https://docs.hpc.ugent.be/account/../connecting/#open-a-terminal", - "1": "https://docs.hpc.ugent.be/account/../account/#generating-a-publicprivate-key-pair" + "0": "https://docs.hpc.ugent.be/connecting/#open-a-terminal", + "1": "https://docs.hpc.ugent.be/account/#generating-a-publicprivate-key-pair" }, "previous_title": "account_windows_paragraph_4.1", "next_title": "account_windows_paragraph_4.3", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json index d803aeadb25e..4555638639d6 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json @@ -6,7 +6,7 @@ "directory": "account", "parent_title": "Getting-ready-to-request-an-account", "links": { - "0": "" + "0": "https://docs.hpc.ugent.be/" }, "previous_title": "account_windows_paragraph_4.2", "next_title": "account_windows_paragraph_4.4", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2_metadata.json index 46808447a108..11c693380290 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2_metadata.json @@ -6,8 +6,8 @@ "directory": "account", "parent_title": "Getting-ready-to-request-an-account", "links": { - "0": "https://docs.hpc.ugent.be/account/../account/#generating-a-publicprivate-key-pair", - "1": "https://docs.hpc.ugent.be/account/../account/#generating-a-publicprivate-key-pair" + "0": "https://docs.hpc.ugent.be/account/#generating-a-publicprivate-key-pair", + "1": "https://docs.hpc.ugent.be/account/#generating-a-publicprivate-key-pair" }, "previous_title": "account_windows_paragraph_6.1", "next_title": "account_windows_paragraph_6.3", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json index 8b6b6f698d1a..d3b7d581c943 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json @@ -6,7 +6,7 @@ "directory": "connecting", "parent_title": "First-Time-connection-to-the-HPC-infrastructure", "links": { - "0": "https://docs.hpc.ugent.be/connecting/../troubleshooting/#warning-message-when-first-connecting-to-new-host" + "0": "https://docs.hpc.ugent.be/troubleshooting/#warning-message-when-first-connecting-to-new-host" }, "previous_title": "connecting_paragraph_3", "next_title": "connecting_paragraph_5", diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_links.py b/scripts/HPC_chatbot_preprocessor/tests/test_links.py new file mode 100644 index 000000000000..d1acca1d7409 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_links.py @@ -0,0 +1,69 @@ +import os +import pytest +from urllib import request +from chatbot_parser import main +import json + +whitelist = ["mailto:hpc@ugent.be"] +slow_list = ["https://login.hpc.ugent.be", "https://www.edx.org/course/introduction-linux-linuxfoundationx-lfs101x-0"] + +options_general = {"SOURCE_DIRECTORY": "../../mkdocs/docs/HPC", + "DESTINATION_DIRECTORY": ".", + "SPLIT_ON_TITLES": False, + "SPLIT_ON_PARAGRAPHS": True, + "MIN_PARAGRAPH_LENGTH": 683, + "MAX_TITLE_DEPTH": 4, + "INCLUDE_LINKS_IN_PLAINTEXT": False, + "DEEP_DIRECTORIES": False, + "VERBOSE": False} +options_os_specific = {"SOURCE_DIRECTORY": "../../mkdocs/docs/HPC/linux-tutorial", + "DESTINATION_DIRECTORY": "./linux-tutorial", + "SPLIT_ON_TITLES": False, + "SPLIT_ON_PARAGRAPHS": True, + "MIN_PARAGRAPH_LENGTH": 683, + "MAX_TITLE_DEPTH": 4, + "INCLUDE_LINKS_IN_PLAINTEXT": False, + "DEEP_DIRECTORIES": False, + "VERBOSE": False} + + +@pytest.mark.parametrize("options", [options_general, options_os_specific]) +def test_all_links(options): + all_links = {} + main(options) + broken_links = {} + empty_links = {} + + for (dirpath, dirnames, filenames) in os.walk(os.path.join(options['DESTINATION_DIRECTORY'], 'parsed_mds')): + for filename in filenames: + all_links[filename] = [] + if filename.endswith('metadata.json'): + data = json.load(open(os.path.join(dirpath, filename))) + if 'links' in data.keys(): + for key in data['links'].keys(): + all_links[filename].append(data['links'][key]) + all_links[filename].append(data['reference_link'].split("#")[0]) + + for filename in all_links.keys(): + all_links[filename] = list(set(all_links[filename])) + for link in all_links[filename]: + if len(link) != 0: + try: + if link not in whitelist and link not in slow_list: + with request.urlopen(link) as res: + if res.status == 200: + pass + except: + print("Broken link in " + filename + ": " + link) + if filename in broken_links.keys(): + broken_links[filename].append(link) + else: + broken_links[filename] = [link] + else: + print("Empty link in " + filename) + if filename in empty_links.keys(): + empty_links[filename].append(link) + else: + empty_links[filename] = [link] + assert len(empty_links.keys()) == 0 + assert len(broken_links.keys()) == 0 From b6b861044b0b12f06ba9b59ac7406feef07761e8 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Thu, 29 Aug 2024 16:44:15 +0200 Subject: [PATCH 137/145] new test to make sure lists are kept as one section --- .../chatbot_parser.py | 2 ++ .../generic/account/account_paragraph_1.txt | 2 ++ .../account/account_paragraph_1_metadata.json | 3 ++- .../generic/account/account_paragraph_2.txt | 7 +++-- .../account/account_paragraph_2_metadata.json | 5 ++-- .../generic/account/account_paragraph_3.txt | 8 +++--- .../account/account_paragraph_3_metadata.json | 3 +++ .../account/account_linux_paragraph_4.1.txt | 4 --- .../account_linux_paragraph_4.1_metadata.json | 15 ----------- .../account/account_macos_paragraph_4.1.txt | 4 --- .../account_macos_paragraph_4.1_metadata.json | 15 ----------- .../account/account_macos_paragraph_5.1.txt | 3 +++ .../account/account_macos_paragraph_5.2.txt | 7 ++--- .../account/account_macos_paragraph_5.3.txt | 4 --- ...necting_macos_paragraph_13.1_metadata.json | 2 +- ...necting_macos_paragraph_13.6_metadata.json | 4 +-- ...nnecting_macos_paragraph_5.1_metadata.json | 2 +- .../account/account_windows_paragraph_4.1.txt | 9 ++++--- ...ccount_windows_paragraph_4.1_metadata.json | 3 --- .../account/account_windows_paragraph_4.2.txt | 13 +++++----- .../account/account_windows_paragraph_4.3.txt | 7 ----- .../tests/test_files/list_file/list_test.md | 15 +++++++++++ .../tests/test_lists.py | 26 +++++++++++++++++++ 23 files changed, 84 insertions(+), 79 deletions(-) delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/list_file/list_test.md create mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_lists.py diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 9aa7dc972e54..f5e5b452ff52 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -503,6 +503,8 @@ def split_on_paragraphs(file, main_title, options, is_linux_tutorial, current_pa pass elif re.search(r'^(\s*)([*+-]|\d+\.|[a-zA-Z]\.)\s+.*$|^\s{2,}.+$|^\n', nxt) and in_list: # line(s) between list entries pass + elif re.search(r'^(\s*)([*+-]|\d+\.|[a-zA-Z]\.)\s+.*$', nxt): + in_list = True elif in_list: if options[VERBOSE]: print("List ended, starting new paragraphs again") diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1.txt index 1b79fd223918..c3f86ade1802 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1.txt @@ -11,3 +11,5 @@ Brussels University Association, Antwerp University Association and the University Colleges-Limburg. The VSC is funded by the Flemish Government. There are two methods for connecting to HPC-UGent infrastructure: +- Using a terminal to connect via SSH. +- Using the web portal diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json index 738d24cb42ed..52a3ef55568e 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json @@ -5,7 +5,8 @@ "title_depth": 2, "directory": "account", "links": { - "0": "https://docs.hpc.ugent.be/sites/hpc_policies" + "0": "https://docs.hpc.ugent.be/sites/hpc_policies", + "1": "https://docs.hpc.ugent.be/web_portal" }, "parent_title": "", "previous_title": null, diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2.txt index 6ecd65e2184d..9614ed1447cb 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2.txt @@ -1,6 +1,9 @@ -- Using a terminal to connect via SSH. -- Using the web portal The web portal offers a convenient way to upload files and gain shell access to the HPC-UGent infrastructure from a standard web browser (no software installation or configuration required). If you would like use a terminal with SSH as this gives you more flexibility continue reading. However if you prefer to use the web portal, you can skip ahead to the following section: Applying for the account. Once you have successfully obtained an account, you can then delve into the details of utilizing the HPC-UGent web portal by reading Using the HPC-UGent web portal. +The HPC-UGent infrastructure clusters use public/private key pairs for user authentication +(rather than passwords). Technically, the private key is stored on your +local computer and always stays there; the public key is stored on the HPC. +Access to the HPC is granted to anyone who can prove to have access to the +corresponding private key on his local computer. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2_metadata.json index 0b22e2986a00..a41a19936744 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2_metadata.json @@ -5,9 +5,8 @@ "title_depth": 2, "directory": "account", "links": { - "0": "https://docs.hpc.ugent.be/web_portal", - "1": "https://docs.hpc.ugent.be/account/#applying-for-the-account", - "2": "https://docs.hpc.ugent.be/web_portal" + "0": "https://docs.hpc.ugent.be/account/#applying-for-the-account", + "1": "https://docs.hpc.ugent.be/web_portal" }, "parent_title": "", "previous_title": "account_paragraph_1", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3.txt index e49468692735..963b35c090bc 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3.txt @@ -1,8 +1,3 @@ -The HPC-UGent infrastructure clusters use public/private key pairs for user authentication -(rather than passwords). Technically, the private key is stored on your -local computer and always stays there; the public key is stored on the HPC. -Access to the HPC is granted to anyone who can prove to have access to the -corresponding private key on his local computer. How do SSH keys work? - an SSH public/private key pair can be seen as a lock and a key - the SSH public key is equivalent with a lock: you give it to the @@ -15,3 +10,6 @@ How do SSH keys work? locks (SSH public keys) attached to it, and you only need to open one lock with the corresponding key (SSH private key) to open the door (log in to the account). +Since all VSC clusters use Linux as their main operating system, you +will need to get acquainted with using the command-line interface and +using the terminal (see tutorial). diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3_metadata.json index bd2f73195a6b..4df622cc4aa0 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3_metadata.json @@ -4,6 +4,9 @@ "source_file": "../../mkdocs/docs/HPC/account.md", "title_depth": 3, "directory": "account", + "links": { + "0": "https://docs.hpc.ugent.be/linux-tutorial" + }, "parent_title": "", "previous_title": "account_paragraph_2", "next_title": "account_paragraph_4", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1.txt deleted file mode 100644 index 3a282a73a15d..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1.txt +++ /dev/null @@ -1,4 +0,0 @@ -How do SSH keys work -Since all VSC clusters use Linux as their main operating system, you -will need to get acquainted with using the command-line interface and -using the terminal (see tutorial). \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1_metadata.json deleted file mode 100644 index bc51f39d286c..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "account", - "subtitle": "How-do-SSH-keys-work", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Getting-ready-to-request-an-account", - "links": { - "0": "https://docs.hpc.ugent.be/linux-tutorial" - }, - "previous_title": "account_paragraph_3", - "next_title": "account_paragraph_5", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/account/#how-do-ssh-keys-work" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1.txt deleted file mode 100644 index 3a282a73a15d..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1.txt +++ /dev/null @@ -1,4 +0,0 @@ -How do SSH keys work -Since all VSC clusters use Linux as their main operating system, you -will need to get acquainted with using the command-line interface and -using the terminal (see tutorial). \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1_metadata.json deleted file mode 100644 index e3813cb647ef..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "account", - "subtitle": "How-do-SSH-keys-work", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Getting-ready-to-request-an-account", - "links": { - "0": "https://docs.hpc.ugent.be/linux-tutorial" - }, - "previous_title": "account_paragraph_3", - "next_title": "account_paragraph_5", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/account/#how-do-ssh-keys-work" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1.txt index f3483fcaef16..d96c80b42a2f 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1.txt @@ -10,3 +10,6 @@ other secure network services between two networked computers. In short, ssh provides a secure connection between 2 computers via insecure channels (Network, Internet, telephone lines, ...). "Secure" means that: +1. the User is authenticated to the System; and +2. the System is authenticated to the User; and +3. all data is encrypted during transfer. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2.txt index 5189a9530026..318f913fba34 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2.txt @@ -1,6 +1,3 @@ -1. the User is authenticated to the System; and -2. the System is authenticated to the User; and -3. all data is encrypted during transfer. OpenSSH is a FREE implementation of the SSH connectivity protocol. comes with its own implementation of OpenSSH, so you don't need to install any third-party software to use it. Just open a terminal window and jump in! @@ -11,3 +8,7 @@ $ ssh -V OpenSSH_7.4p1, OpenSSL 1.0.2k-fips 26 Jan 2017 To access the clusters and transfer your files, you will use the following commands: +1. ssh-keygen: to generate the SSH key pair (public + private key); +2. ssh: to open a shell on a remote machine; +3. sftp: a secure equivalent of ftp; +4. scp: a secure equivalent of the remote copy command rcp. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3.txt index a8c087f818b3..5df90a3dd7c0 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3.txt @@ -1,7 +1,3 @@ -1. ssh-keygen: to generate the SSH key pair (public + private key); -2. ssh: to open a shell on a remote machine; -3. sftp: a secure equivalent of ftp; -4. scp: a secure equivalent of the remote copy command rcp. Generate a public/private key pair with OpenSSH A key pair might already be present in the default location inside your home directory. Therefore, we first check if a key is available with the diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1_metadata.json index 9ec843ff0aa8..791570056009 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1_metadata.json @@ -6,7 +6,7 @@ "directory": "connecting", "parent_title": "Transfer-Files-tofrom-the-HPC", "links": { - "0": "https://docs.hpc.ugent.be/connecting/localhost:8000/Gent//intro-Linux/uploading_files/#symlinks-for-datascratch" + "0": "https://docs.hpc.ugent.be/localhost:8000/Gent//intro-Linux/uploading_files/#symlinks-for-datascratch" }, "previous_title": "connecting_paragraph_12", "next_title": "connecting_macos_paragraph_13.2", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.6_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.6_metadata.json index fe899ad9dbc4..9b08fbde5498 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.6_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.6_metadata.json @@ -6,8 +6,8 @@ "directory": "connecting", "parent_title": "Transfer-Files-tofrom-the-HPC", "links": { - "0": "", - "1": "" + "0": "https://docs.hpc.ugent.be/", + "1": "https://docs.hpc.ugent.be/" }, "previous_title": "connecting_macos_paragraph_13.5", "next_title": "connecting_macos_paragraph_13.7", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1_metadata.json index 85b088b0e8c3..f928fbfcdd6e 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1_metadata.json @@ -6,7 +6,7 @@ "directory": "connecting", "parent_title": "First-Time-connection-to-the-HPC-infrastructure", "links": { - "0": "https://docs.hpc.ugent.be/connecting/../troubleshooting/#warning-message-when-first-connecting-to-new-host" + "0": "https://docs.hpc.ugent.be/troubleshooting/#warning-message-when-first-connecting-to-new-host" }, "previous_title": "connecting_paragraph_4", "next_title": "connecting_macos_paragraph_5.2", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1.txt index 1e70493305f7..93ca7ac9da5f 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1.txt @@ -1,7 +1,4 @@ How do SSH keys work -Since all VSC clusters use Linux as their main operating system, you -will need to get acquainted with using the command-line interface and -using the terminal (see tutorial). A typical Windows environment does not come with pre-installed software to connect and run command-line executables on a HPC. Some tools need to be installed on your Windows machine first, before we can start the actual @@ -13,3 +10,9 @@ PuTTYgen executable and run it. This can be useful in situations where you do not have the required permissions to install software on the computer you are using. Alternatively, an installation package is also available. +You can download PuTTY from the official address: +. You +probably want the 64-bits version. If you can install software on your +computer, you can use the "Package files", if not, you can download and +use putty.exe and puttygen.exe in the "Alternative binary files" +section. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json index dc5a8cb22b99..e0024f40d556 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json @@ -5,9 +5,6 @@ "title_depth": 3, "directory": "account", "parent_title": "Getting-ready-to-request-an-account", - "links": { - "0": "https://docs.hpc.ugent.be/linux-tutorial" - }, "previous_title": "account_paragraph_3", "next_title": "account_windows_paragraph_4.2", "OS": "windows", diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2.txt index 1a30a219fecb..cebd1da3bafe 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2.txt @@ -1,12 +1,13 @@ -You can download PuTTY from the official address: -. You -probably want the 64-bits version. If you can install software on your -computer, you can use the "Package files", if not, you can download and -use putty.exe and puttygen.exe in the "Alternative binary files" -section. The PuTTY package consists of several components, but we'll only use two: 1. PuTTY: the Telnet and SSH client itself (to login, see Open a terminal) 2. PuTTYgen: an RSA and DSA key generation utility (to generate a key pair, see Generate a public/private key pair) Generating a public/private key pair +Before requesting a VSC account, you need to generate a pair of ssh +keys. You need 2 keys, a public and a private key. You can visualise the +public key as a lock to which only you have the key (your private key). +You can send a copy of your lock to anyone without any problems, because +only you can open it, as long as you keep your private key secure. To +generate a public/private key pair, you can use the PuTTYgen key +generator. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt index b082d381a64a..6e65300562da 100644 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt @@ -1,10 +1,3 @@ -Before requesting a VSC account, you need to generate a pair of ssh -keys. You need 2 keys, a public and a private key. You can visualise the -public key as a lock to which only you have the key (your private key). -You can send a copy of your lock to anyone without any problems, because -only you can open it, as long as you keep your private key secure. To -generate a public/private key pair, you can use the PuTTYgen key -generator. Start PuTTYgen.exe it and follow these steps: 1. In "Parameters" (at the bottom of the window), choose "RSA" and set the number of bits in the key to 4096. diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/list_file/list_test.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/list_file/list_test.md new file mode 100644 index 000000000000..1d1d3c210e8b --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/list_file/list_test.md @@ -0,0 +1,15 @@ +# Title + +Some explanation about the following list that is quite long. This could be problematic since this could mean that the explanation of the content of the list would be part of a different paragraph than the list. + +1. First entry + +2. Second entry + +3. Third entry + + ![image](img/an_image_for_the_third_entry.png) + +4. Fourth entry that is very verbose, so we hit the character limit for a section split, even though it shouldn't be necessary since the explanation of the list is already well above the character limit. + +And now the text continues like normal in a new section. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_lists.py b/scripts/HPC_chatbot_preprocessor/tests/test_lists.py new file mode 100644 index 000000000000..4975856a75f8 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/tests/test_lists.py @@ -0,0 +1,26 @@ +import pytest +from chatbot_parser import split_on_paragraphs + + +@pytest.mark.parametrize("file, main_title, options, is_linux_tutorial, expected_text", [ + ("./test_files/list_file/list_test.md", + "list_test.md", + { + "SOURCE_DIRECTORY": "./test_files/list_file", + "DESTINATION_DIRECTORY": "./test_files/list_file", + "SPLIT_ON_TITLES": False, + "SPLIT_ON_PARAGRAPHS": True, + "MIN_PARAGRAPH_LENGTH": 100, + "MAX_TITLE_DEPTH": 4, + "INCLUDE_LINKS_IN_PLAINTEXT": False, + "DEEP_DIRECTORIES": False, + "VERBOSE": False + }, + False, + { + 'list_test.md_paragraph_1': "Title\nSome explanation about the following list that is quite long. This could be problematic since this could mean that the explanation of the content of the list would be part of a different paragraph than the list.\n1. First entry\n2. Second entry\n3. Third entry\n4. Fourth entry that is very verbose, so we hit the character limit for a section split, even though it shouldn't be necessary since the explanation of the list is already well above the character limit.\n", + 'list_test.md_paragraph_2': 'And now the text continues like normal in a new section.'} + ) +]) +def test_links(file, main_title, options, is_linux_tutorial, expected_text): + assert split_on_paragraphs(file, main_title, options, is_linux_tutorial)[1] == expected_text From 57a21397a869cbcffb6fab5f4d14496043b9b174 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Thu, 29 Aug 2024 16:49:24 +0200 Subject: [PATCH 138/145] updated test_file for list test --- .../tests/test_files/list_file/list_test.md | 2 +- .../tests/test_lists.py | 23 ++++++++++--------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/list_file/list_test.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/list_file/list_test.md index 1d1d3c210e8b..1e18a1495d51 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/list_file/list_test.md +++ b/scripts/HPC_chatbot_preprocessor/tests/test_files/list_file/list_test.md @@ -2,7 +2,7 @@ Some explanation about the following list that is quite long. This could be problematic since this could mean that the explanation of the content of the list would be part of a different paragraph than the list. -1. First entry +1. First entry that is very verbose since we want to hit the character limit for a paragraph to make sure a list can't be split in the middle. If this entry is long enough, the character limit should make it so that any of the following newlines can be the start of a new section if the splitter doesn't know it is in a list. 2. Second entry diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_lists.py b/scripts/HPC_chatbot_preprocessor/tests/test_lists.py index 4975856a75f8..06e56a5cb2c3 100644 --- a/scripts/HPC_chatbot_preprocessor/tests/test_lists.py +++ b/scripts/HPC_chatbot_preprocessor/tests/test_lists.py @@ -6,20 +6,21 @@ ("./test_files/list_file/list_test.md", "list_test.md", { - "SOURCE_DIRECTORY": "./test_files/list_file", - "DESTINATION_DIRECTORY": "./test_files/list_file", - "SPLIT_ON_TITLES": False, - "SPLIT_ON_PARAGRAPHS": True, - "MIN_PARAGRAPH_LENGTH": 100, - "MAX_TITLE_DEPTH": 4, - "INCLUDE_LINKS_IN_PLAINTEXT": False, - "DEEP_DIRECTORIES": False, - "VERBOSE": False + "SOURCE_DIRECTORY": "./test_files/list_file", + "DESTINATION_DIRECTORY": "./test_files/list_file", + "SPLIT_ON_TITLES": False, + "SPLIT_ON_PARAGRAPHS": True, + "MIN_PARAGRAPH_LENGTH": 100, + "MAX_TITLE_DEPTH": 4, + "INCLUDE_LINKS_IN_PLAINTEXT": False, + "DEEP_DIRECTORIES": False, + "VERBOSE": False }, False, { - 'list_test.md_paragraph_1': "Title\nSome explanation about the following list that is quite long. This could be problematic since this could mean that the explanation of the content of the list would be part of a different paragraph than the list.\n1. First entry\n2. Second entry\n3. Third entry\n4. Fourth entry that is very verbose, so we hit the character limit for a section split, even though it shouldn't be necessary since the explanation of the list is already well above the character limit.\n", - 'list_test.md_paragraph_2': 'And now the text continues like normal in a new section.'} + 'list_test.md_paragraph_1': "Title\nSome explanation about the following list that is quite long. This could be problematic since this could mean that the explanation of the content of the list would be part of a different paragraph than the list.\n1. First entry that is very verbose since we want to hit the character limit for a paragraph to make sure a list can't be split in the middle. If this entry is long enough, the character limit should make it so that any of the following newlines can be the start of a new section if the splitter doesn't know it is in a list.\n2. Second entry\n3. Third entry\n4. Fourth entry that is very verbose, so we hit the character limit for a section split, even though it shouldn't be necessary since the explanation of the list is already well above the character limit.\n", + 'list_test.md_paragraph_2': 'And now the text continues like normal in a new section.' + } ) ]) def test_links(file, main_title, options, is_linux_tutorial, expected_text): From 170a10cb9eaa0d92482daef766dd2b0918e9a4cd Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 30 Aug 2024 09:53:12 +0200 Subject: [PATCH 139/145] dropped <> around links and started new function to calculate length of paragraphs --- .../HPC_chatbot_preprocessor/chatbot_parser.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index f5e5b452ff52..cff487f85893 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -235,6 +235,10 @@ def replace_markdown_markers(curr_line, linklist, in_code_block, main_title, is_ elif re.fullmatch(r'!--.*?--', content): curr_line = re.sub(r'<.*?>', "", curr_line) + # drop the <> around links + elif re.match(r'http://', content) or re.match(r'https://', content): + curr_line = re.sub(r'<' + content + '>', content, curr_line ) + # keep the rest else: pass @@ -527,7 +531,7 @@ def split_on_paragraphs(file, main_title, options, is_linux_tutorial, current_pa title_level = check_for_title(line, in_code_block, curr_dirs, options) # check whether a new paragraph should be started - if line == "\n" and len(re.sub(r'\{' + IF_MANGLED_PART + '%.*?%' + IF_MANGLED_PART + '}', "", current_paragraph)) >= options[MIN_PARAGRAPH_LENGTH] and not in_code_block and not in_list: + if line == "\n" and paragraph_long_enough(re.sub(r'\{' + IF_MANGLED_PART + '%.*?%' + IF_MANGLED_PART + '}', "", current_paragraph), options) and not in_code_block and not in_list: # create a title for the previous paragraph if current_paragraph_number == -1: @@ -602,6 +606,18 @@ def split_on_paragraphs(file, main_title, options, is_linux_tutorial, current_pa return paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order +def paragraph_long_enough(paragraph, options): + """ + Function that checks if the paragraph is long enough to be split of + + :param paragraph: current paragraph + :param options: dictionary containing the options given by the user + :return: + """ + # TODO: change this into something that uses the tokenizer + return len(paragraph) >= options[MIN_PARAGRAPH_LENGTH] + + def write_metadata(main_title, subtitle, links, title_level, directory, source_file): """ Function that writes metadata about a text section to a dictionary From 04efff6ca40a3b19f694e8168d83a77d45a1078b Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 30 Aug 2024 10:10:49 +0200 Subject: [PATCH 140/145] removed parsed mds --- .../generic/account/account_paragraph_1.txt | 15 ------- .../generic/account/account_paragraph_10.txt | 18 -------- .../account_paragraph_10_metadata.json | 12 ------ .../generic/account/account_paragraph_12.txt | 14 ------- .../account_paragraph_12_metadata.json | 12 ------ .../account/account_paragraph_1_metadata.json | 16 ------- .../generic/account/account_paragraph_2.txt | 9 ---- .../account/account_paragraph_2_metadata.json | 16 ------- .../generic/account/account_paragraph_3.txt | 15 ------- .../account/account_paragraph_3_metadata.json | 15 ------- .../generic/account/account_paragraph_8.txt | 14 ------- .../account/account_paragraph_8_metadata.json | 12 ------ .../compiling_your_software_paragraph_1.txt | 10 ----- .../compiling_your_software_paragraph_10.txt | 19 --------- ...g_your_software_paragraph_10_metadata.json | 11 ----- .../compiling_your_software_paragraph_11.txt | 20 --------- ...g_your_software_paragraph_11_metadata.json | 11 ----- .../compiling_your_software_paragraph_12.txt | 9 ---- ...g_your_software_paragraph_12_metadata.json | 11 ----- ...ng_your_software_paragraph_1_metadata.json | 11 ----- .../compiling_your_software_paragraph_2.txt | 13 ------ ...ng_your_software_paragraph_2_metadata.json | 11 ----- .../compiling_your_software_paragraph_3.txt | 13 ------ ...ng_your_software_paragraph_3_metadata.json | 11 ----- .../compiling_your_software_paragraph_4.txt | 15 ------- ...ng_your_software_paragraph_4_metadata.json | 11 ----- .../compiling_your_software_paragraph_5.txt | 16 ------- ...ng_your_software_paragraph_5_metadata.json | 11 ----- .../compiling_your_software_paragraph_6.txt | 30 ------------- ...ng_your_software_paragraph_6_metadata.json | 14 ------- .../compiling_your_software_paragraph_7.txt | 15 ------- ...ng_your_software_paragraph_7_metadata.json | 11 ----- .../compiling_your_software_paragraph_8.txt | 19 --------- ...ng_your_software_paragraph_8_metadata.json | 11 ----- .../compiling_your_software_paragraph_9.txt | 32 -------------- ...ng_your_software_paragraph_9_metadata.json | 11 ----- .../connecting/connecting_paragraph_1.txt | 18 -------- .../connecting/connecting_paragraph_10.txt | 24 ----------- .../connecting_paragraph_10_metadata.json | 12 ------ .../connecting/connecting_paragraph_15.txt | 7 ---- .../connecting_paragraph_15_metadata.json | 15 ------- .../connecting/connecting_paragraph_16.txt | 11 ----- .../connecting_paragraph_16_metadata.json | 16 ------- .../connecting_paragraph_1_metadata.json | 15 ------- .../connecting/connecting_paragraph_2.txt | 18 -------- .../connecting_paragraph_2_metadata.json | 12 ------ .../connecting/connecting_paragraph_3.txt | 9 ---- .../connecting_paragraph_3_metadata.json | 16 ------- .../connecting/connecting_paragraph_6.txt | 14 ------- .../connecting_paragraph_6_metadata.json | 12 ------ .../connecting/connecting_paragraph_7.txt | 21 ---------- .../connecting_paragraph_7_metadata.json | 12 ------ .../connecting/connecting_paragraph_8.txt | 12 ------ .../connecting_paragraph_8_metadata.json | 15 ------- .../connecting/connecting_paragraph_9.txt | 19 --------- .../connecting_paragraph_9_metadata.json | 12 ------ .../account/account_linux_paragraph_11.1.txt | 17 -------- ...account_linux_paragraph_11.1_metadata.json | 15 ------- .../account/account_linux_paragraph_5.1.txt | 14 ------- .../account_linux_paragraph_5.1_metadata.json | 12 ------ .../account/account_linux_paragraph_5.2.txt | 14 ------- .../account_linux_paragraph_5.2_metadata.json | 12 ------ .../account/account_linux_paragraph_5.3.txt | 16 ------- .../account_linux_paragraph_5.3_metadata.json | 12 ------ .../account/account_linux_paragraph_5.4.txt | 13 ------ .../account_linux_paragraph_5.4_metadata.json | 12 ------ .../account/account_linux_paragraph_5.5.txt | 6 --- .../account_linux_paragraph_5.5_metadata.json | 12 ------ .../account/account_linux_paragraph_6.1.txt | 1 - .../account_linux_paragraph_6.1_metadata.json | 12 ------ .../account/account_linux_paragraph_7.1.txt | 14 ------- .../account_linux_paragraph_7.1_metadata.json | 15 ------- .../account/account_linux_paragraph_7.2.txt | 8 ---- .../account_linux_paragraph_7.2_metadata.json | 12 ------ .../account/account_linux_paragraph_9.1.txt | 7 ---- .../account_linux_paragraph_9.1_metadata.json | 12 ------ .../connecting_linux_paragraph_11.1.txt | 37 ---------------- ...necting_linux_paragraph_11.1_metadata.json | 12 ------ .../connecting_linux_paragraph_12.1.txt | 6 --- ...necting_linux_paragraph_12.1_metadata.json | 12 ------ .../connecting_linux_paragraph_13.1.txt | 12 ------ ...necting_linux_paragraph_13.1_metadata.json | 15 ------- .../connecting_linux_paragraph_13.2.txt | 17 -------- ...necting_linux_paragraph_13.2_metadata.json | 12 ------ .../connecting_linux_paragraph_13.3.txt | 22 ---------- ...necting_linux_paragraph_13.3_metadata.json | 12 ------ .../connecting_linux_paragraph_13.4.txt | 14 ------- ...necting_linux_paragraph_13.4_metadata.json | 12 ------ .../connecting_linux_paragraph_13.5.txt | 14 ------- ...necting_linux_paragraph_13.5_metadata.json | 12 ------ .../connecting_linux_paragraph_13.6.txt | 18 -------- ...necting_linux_paragraph_13.6_metadata.json | 16 ------- .../connecting_linux_paragraph_14.1.txt | 10 ----- ...necting_linux_paragraph_14.1_metadata.json | 12 ------ .../connecting_linux_paragraph_5.1.txt | 12 ------ ...nnecting_linux_paragraph_5.1_metadata.json | 15 ------- .../connecting_linux_paragraph_5.2.txt | 4 -- ...nnecting_linux_paragraph_5.2_metadata.json | 12 ------ .../account/account_macos_paragraph_11.1.txt | 17 -------- ...account_macos_paragraph_11.1_metadata.json | 15 ------- .../account/account_macos_paragraph_5.1.txt | 15 ------- .../account_macos_paragraph_5.1_metadata.json | 12 ------ .../account/account_macos_paragraph_5.2.txt | 14 ------- .../account_macos_paragraph_5.2_metadata.json | 12 ------ .../account/account_macos_paragraph_5.3.txt | 16 ------- .../account_macos_paragraph_5.3_metadata.json | 12 ------ .../account/account_macos_paragraph_5.4.txt | 13 ------ .../account_macos_paragraph_5.4_metadata.json | 12 ------ .../account/account_macos_paragraph_5.5.txt | 6 --- .../account_macos_paragraph_5.5_metadata.json | 12 ------ .../account/account_macos_paragraph_6.1.txt | 1 - .../account_macos_paragraph_6.1_metadata.json | 12 ------ .../account/account_macos_paragraph_7.1.txt | 14 ------- .../account_macos_paragraph_7.1_metadata.json | 15 ------- .../account/account_macos_paragraph_7.2.txt | 7 ---- .../account_macos_paragraph_7.2_metadata.json | 12 ------ .../account/account_macos_paragraph_9.1.txt | 12 ------ .../account_macos_paragraph_9.1_metadata.json | 12 ------ .../connecting_macos_paragraph_11.1.txt | 37 ---------------- ...necting_macos_paragraph_11.1_metadata.json | 12 ------ .../connecting_macos_paragraph_12.1.txt | 6 --- ...necting_macos_paragraph_12.1_metadata.json | 12 ------ .../connecting_macos_paragraph_13.1.txt | 12 ------ ...necting_macos_paragraph_13.1_metadata.json | 15 ------- .../connecting_macos_paragraph_13.2.txt | 17 -------- ...necting_macos_paragraph_13.2_metadata.json | 12 ------ .../connecting_macos_paragraph_13.3.txt | 22 ---------- ...necting_macos_paragraph_13.3_metadata.json | 12 ------ .../connecting_macos_paragraph_13.4.txt | 14 ------- ...necting_macos_paragraph_13.4_metadata.json | 12 ------ .../connecting_macos_paragraph_13.5.txt | 14 ------- ...necting_macos_paragraph_13.5_metadata.json | 12 ------ .../connecting_macos_paragraph_13.6.txt | 18 -------- ...necting_macos_paragraph_13.6_metadata.json | 16 ------- .../connecting_macos_paragraph_14.1.txt | 15 ------- ...necting_macos_paragraph_14.1_metadata.json | 12 ------ .../connecting_macos_paragraph_14.2.txt | 3 -- ...necting_macos_paragraph_14.2_metadata.json | 12 ------ .../connecting_macos_paragraph_5.1.txt | 10 ----- ...nnecting_macos_paragraph_5.1_metadata.json | 15 ------- .../connecting_macos_paragraph_5.2.txt | 7 ---- ...nnecting_macos_paragraph_5.2_metadata.json | 12 ------ .../account_windows_paragraph_11.1.txt | 17 -------- ...count_windows_paragraph_11.1_metadata.json | 15 ------- .../account/account_windows_paragraph_4.1.txt | 18 -------- ...ccount_windows_paragraph_4.1_metadata.json | 12 ------ .../account/account_windows_paragraph_4.2.txt | 13 ------ ...ccount_windows_paragraph_4.2_metadata.json | 16 ------- .../account/account_windows_paragraph_4.3.txt | 30 ------------- ...ccount_windows_paragraph_4.3_metadata.json | 15 ------- .../account/account_windows_paragraph_4.4.txt | 2 - ...ccount_windows_paragraph_4.4_metadata.json | 12 ------ .../account/account_windows_paragraph_6.1.txt | 13 ------ ...ccount_windows_paragraph_6.1_metadata.json | 12 ------ .../account/account_windows_paragraph_6.2.txt | 11 ----- ...ccount_windows_paragraph_6.2_metadata.json | 16 ------- .../account/account_windows_paragraph_6.3.txt | 5 --- ...ccount_windows_paragraph_6.3_metadata.json | 12 ------ .../account/account_windows_paragraph_9.1.txt | 8 ---- ...ccount_windows_paragraph_9.1_metadata.json | 12 ------ .../connecting_windows_paragraph_11.1.txt | 9 ---- ...cting_windows_paragraph_11.1_metadata.json | 12 ------ .../connecting_windows_paragraph_12.1.txt | 22 ---------- ...cting_windows_paragraph_12.1_metadata.json | 12 ------ .../connecting_windows_paragraph_12.2.txt | 11 ----- ...cting_windows_paragraph_12.2_metadata.json | 12 ------ .../connecting_windows_paragraph_12.3.txt | 6 --- ...cting_windows_paragraph_12.3_metadata.json | 12 ------ .../connecting_windows_paragraph_4.1.txt | 42 ------------------- ...ecting_windows_paragraph_4.1_metadata.json | 15 ------- 170 files changed, 2310 deletions(-) delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_10.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_10_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_11.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_11_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_12.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_12_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_6.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_6_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_7.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_7_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_8.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_8_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_9.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_9_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_10.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_10_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_16.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_16_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.2.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.2_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.3.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.3_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.4.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.4_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.5.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.5_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.6.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.6_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_14.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_14.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.3.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.3_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.4.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.4_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.5.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.5_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.6.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.6_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.2.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.2_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.2.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.2_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.3.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.3_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1.txt deleted file mode 100644 index c3f86ade1802..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1.txt +++ /dev/null @@ -1,15 +0,0 @@ -Getting an HPC Account -Getting ready to request an account -All users of AUGent can request -an -account on the HPC, which is part of the Flemish Supercomputing Centre (VSC). -See HPC policies for more information on who is entitled to an account. -The VSC, abbreviation of Flemish Supercomputer Centre, is a virtual -supercomputer centre. It is a partnership between the five Flemish -associations: the Association KUĀ Leuven, Ghent University Association, -Brussels University Association, Antwerp University Association and the -University Colleges-Limburg. The VSC is funded by the Flemish -Government. -There are two methods for connecting to HPC-UGent infrastructure: -- Using a terminal to connect via SSH. -- Using the web portal diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10.txt deleted file mode 100644 index 7b0a39279e46..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10.txt +++ /dev/null @@ -1,18 +0,0 @@ -After you have uploaded your public key you will receive an e-mail with -a link to confirm your e-mail address. After confirming your e-mail -address the VSC staff will review and if applicable approve your -account. -Welcome e-mail -Within one day, you should receive a Welcome e-mail with your VSC -account details. -Dear (Username), -Your VSC-account has been approved by an administrator. -Your vsc-username is vsc40000 -Your account should be fully active within one hour. -To check or update your account information please visit -https://account.vscentrum.be/ -For further info please visit https://www.vscentrum.be/user-portal -Kind regards, --- The VSC administrators -Now, you can start using the HPC. You can always look up your VSC id later -by visiting . diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10_metadata.json deleted file mode 100644 index e417029c16f3..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_10_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Welcome-e-mail", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "", - "previous_title": "account_paragraph_9", - "next_title": "account_paragraph_11", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/account/#welcome-e-mail" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12.txt deleted file mode 100644 index 7ecd78e5c9f6..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12.txt +++ /dev/null @@ -1,14 +0,0 @@ -Computation Workflow on the HPC -A typical Computation workflow will be: -1. Connect to the HPC -2. Transfer your files to the HPC -3. Compile your code and test it -4. Create a job script -5. Submit your job -6. Wait while - 1. your job gets into the queue - 2. your job gets executed - 3. your job finishes -7. Move your results -We'll take you through the different tasks one by one in the following -chapters. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12_metadata.json deleted file mode 100644 index e43e729aa744..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_12_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Computation-Workflow-on-the-HPC", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 2, - "directory": "account", - "parent_title": "", - "previous_title": "account_paragraph_11", - "next_title": null, - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/account/#computation-workflow-on-the-hpc" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json deleted file mode 100644 index 52a3ef55568e..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Getting-ready-to-request-an-account", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 2, - "directory": "account", - "links": { - "0": "https://docs.hpc.ugent.be/sites/hpc_policies", - "1": "https://docs.hpc.ugent.be/web_portal" - }, - "parent_title": "", - "previous_title": null, - "next_title": "account_paragraph_2", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/account/#getting-ready-to-request-an-account" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2.txt deleted file mode 100644 index 9614ed1447cb..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2.txt +++ /dev/null @@ -1,9 +0,0 @@ -The web portal offers a convenient way to upload files and gain shell access to the HPC-UGent infrastructure from a standard web browser (no software installation or configuration required). -If you would like use a terminal with SSH as this gives you more flexibility continue reading. -However if you prefer to use the web portal, you can skip ahead to the following section: Applying for the account. -Once you have successfully obtained an account, you can then delve into the details of utilizing the HPC-UGent web portal by reading Using the HPC-UGent web portal. -The HPC-UGent infrastructure clusters use public/private key pairs for user authentication -(rather than passwords). Technically, the private key is stored on your -local computer and always stays there; the public key is stored on the HPC. -Access to the HPC is granted to anyone who can prove to have access to the -corresponding private key on his local computer. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2_metadata.json deleted file mode 100644 index a41a19936744..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_2_metadata.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Getting-ready-to-request-an-account", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 2, - "directory": "account", - "links": { - "0": "https://docs.hpc.ugent.be/account/#applying-for-the-account", - "1": "https://docs.hpc.ugent.be/web_portal" - }, - "parent_title": "", - "previous_title": "account_paragraph_1", - "next_title": "account_paragraph_3", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/account/#getting-ready-to-request-an-account" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3.txt deleted file mode 100644 index 963b35c090bc..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3.txt +++ /dev/null @@ -1,15 +0,0 @@ -How do SSH keys work? -- an SSH public/private key pair can be seen as a lock and a key -- the SSH public key is equivalent with a lock: you give it to the - VSC and they put it on the door that gives access to your account. -- the SSH private key is like a physical key: you don't hand it out - to other people. -- anyone who has the key (and the optional password) can unlock the - door and log in to the account. -- the door to your VSC account is special: it can have multiple - locks (SSH public keys) attached to it, and you only need to open - one lock with the corresponding key (SSH private key) to open - the door (log in to the account). -Since all VSC clusters use Linux as their main operating system, you -will need to get acquainted with using the command-line interface and -using the terminal (see tutorial). diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3_metadata.json deleted file mode 100644 index 4df622cc4aa0..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_3_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "account", - "subtitle": "How-do-SSH-keys-work", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "links": { - "0": "https://docs.hpc.ugent.be/linux-tutorial" - }, - "parent_title": "", - "previous_title": "account_paragraph_2", - "next_title": "account_paragraph_4", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/account/#how-do-ssh-keys-work" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8.txt deleted file mode 100644 index 6c5695dfff31..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8.txt +++ /dev/null @@ -1,14 +0,0 @@ -Applying for the account -Visit -You will be redirected to our WAYF (Where Are You From) service where -you have to select your "Home Organisation". -Select "UGent" in the dropdown box and optionally select "Save my preference" -and "permanently". -Click "Confirm" -You will now be taken to the authentication page of your institute. -You will now have to log in with CAS using your UGent account. -You either have a login name of maximum 8 characters, or a (non-UGent) -email address if you are an external user. In case of problems with your -UGent password, please visit: . After -logging in, you may be requested to share your information. Click "Yes, -continue". diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8_metadata.json deleted file mode 100644 index 6a77c48dbd1f..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_8_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Applying-for-the-account", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 2, - "directory": "account", - "parent_title": "", - "previous_title": "account_paragraph_7", - "next_title": "account_paragraph_9", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/account/#applying-for-the-account" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1.txt deleted file mode 100644 index db1afd43e680..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1.txt +++ /dev/null @@ -1,10 +0,0 @@ -Compiling and testing your software on the HPC -All nodes in the HPC cluster are running the "RHEL 8.8 (accelgor, doduo, donphan, gallade, joltik, skitty)" -Operating system, which is a specific version of Red Hat Enterprise Linux. This means that all the -software programs -(executable) that the end-user wants to run on the HPC first must be -compiled for RHEL 8.8 (accelgor, doduo, donphan, gallade, joltik, skitty). It also means that you first have to install all the -required external software packages on the HPC. -Most commonly used compilers are already pre-installed on the HPC and can be -used straight away. Also, many popular external software packages, which -are regularly used in the scientific community, are also pre-installed. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_10.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_10.txt deleted file mode 100644 index d49ba76b01aa..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_10.txt +++ /dev/null @@ -1,19 +0,0 @@ -The "mpi_hello.c" program is a simple source file, written in C with MPI -library calls. -Then, check the command line options for *"mpicc" (GNU C-Compiler with -MPI extensions)*, then we compile and list the contents of the directory -again: -mpicc --help -mpicc -o mpihello mpihello.c -ls -l -A new file "hello" has been created. Note that this program has -"execute" rights. -Let's test this program on the "login" node first: -$ ./mpihello -Hello World from Node 0. -It seems to work, now run it on the HPC. -qsub mpihello.pbs -Compiling a parallel program in Intel Parallel Studio Cluster Edition -We will now compile the same program, but using the Intel Parallel -Studio Cluster Edition compilers. We stay in the examples directory for -this chapter: diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_10_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_10_metadata.json deleted file mode 100644 index ca0d7d806690..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_10_metadata.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "main_title": "compiling_your_software", - "subtitle": "Compiling-a-parallel-program-in-Intel-Parallel-Studio-Cluster-Edition", - "title_depth": 3, - "directory": "compiling_your_software", - "parent_title": "", - "previous_title": "compiling_your_software_paragraph_9", - "next_title": "compiling_your_software_paragraph_11", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-a-parallel-program-in-intel-parallel-studio-cluster-edition" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_11.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_11.txt deleted file mode 100644 index be02d069ac7f..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_11.txt +++ /dev/null @@ -1,20 +0,0 @@ -cd ~/examples/Compiling-and-testing-your-software-on-the-HPC -We will compile this C/MPI -file into an executable with the Intel -Parallel Studio Cluster Edition. First, clear the modules (purge) and -then load the latest "intel" module: -module purge -module load intel -Then, compile and list the contents of the directory again. The Intel -equivalent of mpicc is mpiicc. -mpiicc -o mpihello mpihello.c -ls -l -Note that the old "mpihello" file has been overwritten. Let's test this -program on the "login" node first: -$ ./mpihello -Hello World from Node 0. -It seems to work, now run it on the HPC. -qsub mpihello.pbs -Note: The AUGent only has a license for the Intel Parallel Studio Cluster -Edition for a fixed number of users. As such, it might happen that you -have to wait a few minutes before a floating license becomes available -for your use. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_11_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_11_metadata.json deleted file mode 100644 index 808331a3f9d7..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_11_metadata.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "main_title": "compiling_your_software", - "subtitle": "Compiling-a-parallel-program-in-Intel-Parallel-Studio-Cluster-Edition", - "title_depth": 3, - "directory": "compiling_your_software", - "parent_title": "", - "previous_title": "compiling_your_software_paragraph_10", - "next_title": "compiling_your_software_paragraph_12", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-a-parallel-program-in-intel-parallel-studio-cluster-edition" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_12.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_12.txt deleted file mode 100644 index 1d37014a4263..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_12.txt +++ /dev/null @@ -1,9 +0,0 @@ -Note: The Intel Parallel Studio Cluster Edition contains equivalent -compilers for all GNU compilers. Hereafter the overview for C, C++ and -Fortran compilers. -| | Sequential Program | | **Parallel Program (with MPI)** | | -|-------------|------------------------|-----------|---------------------------------|-----------| -| | GNU | Intel | GNU | Intel | -| C | gcc | icc | mpicc | mpiicc | -| **C++** | g++ | icpc | mpicxx | mpiicpc | -| Fortran | gfortran | ifort | mpif90 | mpiifort | \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_12_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_12_metadata.json deleted file mode 100644 index d032428daf16..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_12_metadata.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "main_title": "compiling_your_software", - "subtitle": "Compiling-a-parallel-program-in-Intel-Parallel-Studio-Cluster-Edition", - "title_depth": 3, - "directory": "compiling_your_software", - "parent_title": "", - "previous_title": "compiling_your_software_paragraph_11", - "next_title": null, - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-a-parallel-program-in-intel-parallel-studio-cluster-edition" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1_metadata.json deleted file mode 100644 index ec4b55c9a4df..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1_metadata.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "main_title": "compiling_your_software", - "subtitle": "Compiling-and-testing-your-software-on-the-HPC", - "title_depth": 1, - "directory": "compiling_your_software", - "parent_title": "", - "previous_title": null, - "next_title": "compiling_your_software_paragraph_2", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-and-testing-your-software-on-the-hpc" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2.txt deleted file mode 100644 index b52639b649d9..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2.txt +++ /dev/null @@ -1,13 +0,0 @@ -Check the pre-installed software on the HPC -In order to check all the available modules and their version numbers, -which are pre-installed on the HPC enter: -When your required application is not available on the HPC please contact -any HPC member. Be aware of potential "License Costs". "Open Source" -software is often preferred. -Porting your code -To port a software-program is to translate it from the operating system in -which it was developed (e.g., Windows 7) to another operating system -(e.g., Red Hat Enterprise Linux on our HPC) so that it can be used there. Porting implies some -degree of effort, but not nearly as much as redeveloping the program in -the new environment. It all depends on how "portable" you wrote your -code. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2_metadata.json deleted file mode 100644 index 00750c81d976..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2_metadata.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "main_title": "compiling_your_software", - "subtitle": "Porting-your-code", - "title_depth": 2, - "directory": "compiling_your_software", - "parent_title": "", - "previous_title": "compiling_your_software_paragraph_1", - "next_title": "compiling_your_software_paragraph_3", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#porting-your-code" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3.txt deleted file mode 100644 index f994f0bc1482..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3.txt +++ /dev/null @@ -1,13 +0,0 @@ -In the simplest case the file or files may simply be copied from one -machine to the other. However, in many cases the software is installed -on a computer in a way, which depends upon its detailed hardware, -software, and setup, with device drivers for particular devices, using -installed operating system and supporting software components, and using -different directories. -In some cases software, usually described as "portable software" is -specifically designed to run on different computers with compatible -operating systems and processors without any machine-dependent -installation; it is sufficient to transfer specified directories and -their contents. Hardware- and software-specific information is often -stored in configuration files in specified locations (e.g., the registry -on machines running MS Windows). diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3_metadata.json deleted file mode 100644 index 90e7d236beb6..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3_metadata.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "main_title": "compiling_your_software", - "subtitle": "Porting-your-code", - "title_depth": 2, - "directory": "compiling_your_software", - "parent_title": "", - "previous_title": "compiling_your_software_paragraph_2", - "next_title": "compiling_your_software_paragraph_4", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#porting-your-code" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4.txt deleted file mode 100644 index f7bf4172b71d..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4.txt +++ /dev/null @@ -1,15 +0,0 @@ -Software, which is not portable in this sense, will have to be -transferred with modifications to support the environment on the -destination machine. -Whilst programming, it would be wise to stick to certain standards -(e.g., ISO/ANSI/POSIX). This will ease the porting of your code to other -platforms. -Porting your code to the RHEL 8.8 (accelgor, doduo, donphan, gallade, joltik, skitty) platform is the responsibility of the end-user. -Compiling and building on the HPC -Compiling refers to the process of translating code written in some -programming language, e.g., Fortran, C, or C++, to machine code. -Building is similar, but includes gluing together the machine code -resulting from different source files into an executable (or library). -The text below guides you through some basic problems typical for small -software projects. For larger projects it is more appropriate to use -makefiles or even an advanced build system like CMake. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4_metadata.json deleted file mode 100644 index b7c9ef0f71b9..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4_metadata.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "main_title": "compiling_your_software", - "subtitle": "Compiling-and-building-on-the-HPC", - "title_depth": 2, - "directory": "compiling_your_software", - "parent_title": "", - "previous_title": "compiling_your_software_paragraph_3", - "next_title": "compiling_your_software_paragraph_5", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-and-building-on-the-hpc" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5.txt deleted file mode 100644 index 342262b92640..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5.txt +++ /dev/null @@ -1,16 +0,0 @@ -All the HPC nodes run the same version of the Operating System, i.e. RHEL 8.8 (accelgor, doduo, donphan, gallade, joltik, skitty). So, -it is sufficient to compile your program on any compute node. Once you -have generated an executable with your compiler, this executable should -be able to run on any other compute-node. -A typical process looks like: -1. Copy your software to the login-node of the HPC -2. Start an interactive session on a compute node; -3. Compile it; -4. Test it locally; -5. Generate your job scripts; -6. Test it on the HPC -7. Run it (in parallel); -We assume you've copied your software to the HPC. The next step is to request -your private compute node. -$ qsub -I -qsub: waiting for job 123456 to start diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5_metadata.json deleted file mode 100644 index 02a8fad0ae2b..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5_metadata.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "main_title": "compiling_your_software", - "subtitle": "Compiling-and-building-on-the-HPC", - "title_depth": 2, - "directory": "compiling_your_software", - "parent_title": "", - "previous_title": "compiling_your_software_paragraph_4", - "next_title": "compiling_your_software_paragraph_6", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-and-building-on-the-hpc" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_6.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_6.txt deleted file mode 100644 index 7ebde6648789..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_6.txt +++ /dev/null @@ -1,30 +0,0 @@ -Compiling a sequential program in C -Go to the examples for chapter -Compiling and testing your software on the HPC and load the -foss module: -cd ~/examples/Compiling-and-testing-your-software-on-the-HPC -module load foss -We now list the directory and explore the contents of the "hello.c" -program: -$ ls -l -total 512 --rw-r--r-- 1 vsc40000 214 Sep 16 09:42 hello.c --rw-r--r-- 1 vsc40000 130 Sep 16 11:39 hello.pbs* --rw-r--r-- 1 vsc40000 359 Sep 16 13:55 mpihello.c --rw-r--r-- 1 vsc40000 304 Sep 16 13:55 mpihello.pbs -/* - * VSC : Flemish Supercomputing Centre - * Tutorial : Introduction to HPC - * Description: Print 500 numbers, whilst waiting 1 second in between - */ -#include "stdio.h" -int main( int argc, char *argv[] ) -{ - int i; - for (i=0; i<500; i++) - { - printf("Hello #%d\n", i); - fflush(stdout); - sleep(1); - } -} diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_6_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_6_metadata.json deleted file mode 100644 index 16942249583b..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_6_metadata.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "main_title": "compiling_your_software", - "subtitle": "Compiling-a-sequential-program-in-C", - "title_depth": 3, - "directory": "compiling_your_software", - "links": { - "0": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-and-building-on-the-hpc" - }, - "parent_title": "", - "previous_title": "compiling_your_software_paragraph_5", - "next_title": "compiling_your_software_paragraph_7", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-a-sequential-program-in-c" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_7.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_7.txt deleted file mode 100644 index 1d58d0d6ae4e..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_7.txt +++ /dev/null @@ -1,15 +0,0 @@ -The "hello.c" program is a simple source file, written in C. It'll print -500 times "Hello #<num>", and waits one second between 2 printouts. -We first need to compile this C-file into an executable with the -gcc-compiler. -First, check the command line options for *"gcc" (GNU C-Compiler)*, then -we compile. the O2 option enables a moderate level of optimization when compiling the code. -It instructs the compiler to optimize the code for better performance without significantly increasing compilation time. -Finally, list the contents of the directory again: -$ gcc -help -$ gcc -O2 -o hello hello.c -$ ls -l -total 512 --rwxrwxr-x 1 vsc40000 7116 Sep 16 11:43 hello* --rw-r--r-- 1 vsc40000 214 Sep 16 09:42 hello.c --rwxr-xr-x 1 vsc40000 130 Sep 16 11:39 hello.pbs* diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_7_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_7_metadata.json deleted file mode 100644 index e5f3161c3f28..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_7_metadata.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "main_title": "compiling_your_software", - "subtitle": "Compiling-a-sequential-program-in-C", - "title_depth": 3, - "directory": "compiling_your_software", - "parent_title": "", - "previous_title": "compiling_your_software_paragraph_6", - "next_title": "compiling_your_software_paragraph_8", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-a-sequential-program-in-c" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_8.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_8.txt deleted file mode 100644 index 5ca5de1e6d44..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_8.txt +++ /dev/null @@ -1,19 +0,0 @@ -A new file "hello" has been created. Note that this file has "execute" -rights, i.e., it is an executable. More often than not, calling gcc -- -or any other compiler for that matter -- will provide you with a list of -errors and warnings referring to mistakes the programmer made, such as -typos, syntax errors. You will have to correct them first in order to -make the code compile. Warnings pinpoint less crucial issues that may -relate to performance problems, using unsafe or obsolete language -features, etc. It is good practice to remove all warnings from a -compilation process, even if they seem unimportant so that a code change -that produces a warning does not go unnoticed. -Let's test this program on the local compute node, which is at your -disposal after the qsub --I command: -$ ./hello -Hello #0 -Hello #1 -Hello #2 -Hello #3 -Hello #4 -... diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_8_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_8_metadata.json deleted file mode 100644 index 942949951d1c..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_8_metadata.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "main_title": "compiling_your_software", - "subtitle": "Compiling-a-sequential-program-in-C", - "title_depth": 3, - "directory": "compiling_your_software", - "parent_title": "", - "previous_title": "compiling_your_software_paragraph_7", - "next_title": "compiling_your_software_paragraph_9", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-a-sequential-program-in-c" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_9.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_9.txt deleted file mode 100644 index 28982d2bd95a..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_9.txt +++ /dev/null @@ -1,32 +0,0 @@ -It seems to work, now run it on the HPC -qsub hello.pbs -Compiling a parallel program in C/MPI -cd ~/examples/Compiling-and-testing-your-software-on-the-HPC -List the directory and explore the contents of the "mpihello.c" -program: -$ ls -l -total 512 -total 512 --rw-r--r-- 1 vsc40000 214 Sep 16 09:42 hello.c --rw-r--r-- 1 vsc40000 130 Sep 16 11:39 hello.pbs* --rw-r--r-- 1 vsc40000 359 Sep 16 13:55 mpihello.c --rw-r--r-- 1 vsc40000 304 Sep 16 13:55 mpihello.pbs -/* - * VSC : Flemish Supercomputing Centre - * Tutorial : Introduction to HPC - * Description: Example program, to compile with MPI - */ -#include -#include -main(int argc, char **argv) -{ - int node, i, j; - float f; - MPI_Init(&argc,&argv); - MPI_Comm_rank(MPI_COMM_WORLD, &node); - - printf("Hello World from Node %d.\n", node); - for (i=0; i<=100000; i++) - f=i*2.718281828*i+i+i*3.141592654; - MPI_Finalize(); -} diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_9_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_9_metadata.json deleted file mode 100644 index fe51e423a96c..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_9_metadata.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "main_title": "compiling_your_software", - "subtitle": "Compiling-a-parallel-program-in-CMPI", - "title_depth": 3, - "directory": "compiling_your_software", - "parent_title": "", - "previous_title": "compiling_your_software_paragraph_8", - "next_title": "compiling_your_software_paragraph_10", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-a-parallel-program-in-cmpi" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1.txt deleted file mode 100644 index bc5a1f80140f..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1.txt +++ /dev/null @@ -1,18 +0,0 @@ -Connecting to the HPC infrastructure -Before you can really start using the HPC clusters, there are several things -you need to do or know: -1. You need to log on to the cluster using an SSH client to one of - the login nodes or by using the HPC web portal. - This will give you command-line access. - A standard web browser like Firefox or Chrome for the web portal will suffice. -2. Before you can do some work, you'll have to transfer the files - that you need from your desktop computer to the cluster. At the end - of a job, you might want to transfer some files back. -3. Optionally, if you wish to use programs with a **graphical user - interface**, you will need an X-server on your client system and log - in to the login nodes with X-forwarding enabled. -4. Often several versions of software packages and libraries are - installed, so you need to select the ones you need. To manage - different versions efficiently, the VSC clusters use so-called - modules, so you will need to select and load the modules that - you need. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_10.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_10.txt deleted file mode 100644 index 5c715d218a19..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_10.txt +++ /dev/null @@ -1,24 +0,0 @@ -You can exit the connection at anytime by entering: -$ exit -logout -Connection to login.hpc.ugent.be closed. - tip "tip: Setting your Language right" - You may encounter a warning message similar to the following one during connecting: - perl: warning: Setting locale failed. - perl: warning: Please check that your locale settings: - LANGUAGE = (unset), - LC_ALL = (unset), - LC_CTYPE = "UTF-8", - LANG = (unset) - are supported and installed on your system. - perl: warning: Falling back to the standard locale ("C"). - or any other error message complaining about the locale. - This means that the correct "locale" has not yet been properly specified on your local machine. Try: - LANG= - LC_COLLATE="C" - LC_CTYPE="UTF-8" - LC_MESSAGES="C" - LC_MONETARY="C" - LC_NUMERIC="C" - LC_TIME="C" - LC_ALL= diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_10_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_10_metadata.json deleted file mode 100644 index 96a1f9cee80c..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_10_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "First-Time-connection-to-the-HPC-infrastructure", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 2, - "directory": "connecting", - "parent_title": "", - "previous_title": "connecting_paragraph_9", - "next_title": "connecting_paragraph_11", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/connecting/#first-time-connection-to-the-hpc-infrastructure" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15.txt deleted file mode 100644 index df00d4ed2a4a..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15.txt +++ /dev/null @@ -1,7 +0,0 @@ -Fast file transfer for large datasets -See the section on rsync in chapter 5 of the Linux intro manual. -Changing login nodes -It can be useful to have control over which login node you are on. However, when you connect to the HPC (High-Performance Computing) system, you are directed to a random login node, which might not be the one where you already have an active session. To address this, there is a way to manually switch your active login node. -For instance, if you want to switch to the login node named gligar07.gastly.os, you can use the following command while you are connected to the gligar08.gastly.os login node on the HPC: -ssh gligar07.gastly.os -This is also possible the other way around. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15_metadata.json deleted file mode 100644 index 74ea0125d713..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_15_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Changing-login-nodes", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 2, - "directory": "connecting", - "links": { - "0": "https://docs.hpc.ugent.be/linux-tutorial/uploading_files/#copying-faster-with-rsync" - }, - "parent_title": "", - "previous_title": "connecting_paragraph_14", - "next_title": "connecting_paragraph_16", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/connecting/#changing-login-nodes" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_16.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_16.txt deleted file mode 100644 index dd4f3269fb56..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_16.txt +++ /dev/null @@ -1,11 +0,0 @@ -If you want to find out which login host you are connected to, you can use the hostname command. -$ hostname -gligar07.gastly.os -$ ssh gligar08.gastly.os -$ hostname -gligar08.gastly.os -Rather than always starting a new session on the HPC, you can also use a terminal multiplexer like screen or tmux. -These can make sessions that 'survives' across disconnects. -You can find more information on how to use these tools here (or on other online sources): -- screen -- tmux \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_16_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_16_metadata.json deleted file mode 100644 index 623be877f5bb..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_16_metadata.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Changing-login-nodes", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 2, - "directory": "connecting", - "links": { - "0": "https://www.howtogeek.com/662422/how-to-use-linuxs-screen-command/", - "1": "https://www.howtogeek.com/671422/how-to-use-tmux-on-linux-and-why-its-better-than-screen/" - }, - "parent_title": "", - "previous_title": "connecting_paragraph_15", - "next_title": null, - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/connecting/#changing-login-nodes" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1_metadata.json deleted file mode 100644 index 783e60c1ab5f..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Connecting-to-the-HPC-infrastructure", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 1, - "directory": "connecting", - "links": { - "0": "https://docs.hpc.ugent.be/web_portal" - }, - "parent_title": "", - "previous_title": null, - "next_title": "connecting_paragraph_2", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/connecting/#connecting-to-the-hpc-infrastructure" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2.txt deleted file mode 100644 index 49c4572f3b24..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2.txt +++ /dev/null @@ -1,18 +0,0 @@ -Connection restrictions -Since March 20th 2020, restrictions are in place that limit from where -you can connect to the VSC HPC infrastructure, in response to security -incidents involving several European HPC centres. -VSC login nodes are only directly accessible from within university -networks, and from (most) Belgian commercial internet providers. -All other IP domains are blocked by default. If you are connecting from -an IP address that is not allowed direct access, you have the following -options to get access to VSC login nodes: -- Use an VPN connection to connect to UGent the network (recommended). See for more information. -- Whitelist your IP address automatically by accessing - and log in with your UGent account. - - While this web connection is active new SSH sessions can be - started. - - Active SSH sessions will remain active even when this web page - is closed. -- Contact your HPC support team (via hpc@ugent.be) and ask them to whitelist your - IP range (e.g., for industry access, automated processes). diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2_metadata.json deleted file mode 100644 index 10f3e042d9ae..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_2_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Connection-restrictions", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 2, - "directory": "connecting", - "parent_title": "", - "previous_title": "connecting_paragraph_1", - "next_title": "connecting_paragraph_3", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/connecting/#connection-restrictions" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3.txt deleted file mode 100644 index db490973b7fe..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3.txt +++ /dev/null @@ -1,9 +0,0 @@ -Trying to establish an SSH connection from an IP address that does not -adhere to these restrictions will result in an immediate failure to -connect, with an error message like: -ssh_exchange_identification: read: Connection reset by peer -First Time connection to the HPC infrastructure -The remaining content in this chapter is primarily focused for people utilizing a terminal with SSH. -If you are instead using the web portal, the corresponding chapter might be more helpful: Using the HPC-UGent web portal. -If you have any issues connecting to the HPC after you've followed these -steps, see Issues connecting to login node to troubleshoot. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json deleted file mode 100644 index 8d6b1696e08c..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_3_metadata.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "First-Time-connection-to-the-HPC-infrastructure", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 2, - "directory": "connecting", - "links": { - "0": "https://docs.hpc.ugent.be/web_portal", - "1": "https://docs.hpc.ugent.be/troubleshooting/#issues-connecting-to-login-node" - }, - "parent_title": "", - "previous_title": "connecting_paragraph_2", - "next_title": "connecting_paragraph_4", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/connecting/#first-time-connection-to-the-hpc-infrastructure" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6.txt deleted file mode 100644 index 862e6952252f..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6.txt +++ /dev/null @@ -1,14 +0,0 @@ -Congratulations, you're on the HPC infrastructure now! -To find out where you have landed you can print the current working directory: -$ pwd -/user/home/gent/vsc400/vsc40000 -Your new private home directory is "/user/home/gent/vsc400/vsc40000". Here you can create your own -subdirectory structure, copy and prepare your applications, compile and -test them and submit your jobs on the HPC. -$ cd /apps/gent/tutorials -$ ls -Intro-HPC/ -This directory currently contains all training material for the Introduction to the HPC. More -relevant training material to work with the HPC can always be added later in -this directory. -You can now explore the content of this directory with the "ls --l" (lists long) and the "cd" (change directory) commands: diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6_metadata.json deleted file mode 100644 index 66b2a89fbb1f..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_6_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "First-Time-connection-to-the-HPC-infrastructure", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 2, - "directory": "connecting", - "parent_title": "", - "previous_title": "connecting_paragraph_5", - "next_title": "connecting_paragraph_7", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/connecting/#first-time-connection-to-the-hpc-infrastructure" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7.txt deleted file mode 100644 index aa590b9b2691..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7.txt +++ /dev/null @@ -1,21 +0,0 @@ -As we are interested in the use of the HPC, move further to Intro-HPC and explore the -contents up to 2 levels deep: -$ cd Intro-HPC -$ tree -L 2 -. -'-- examples - |-- Compiling-and-testing-your-software-on-the-HPC - |-- Fine-tuning-Job-Specifications - |-- Multi-core-jobs-Parallel-Computing - |-- Multi-job-submission - |-- Program-examples - |-- Running-batch-jobs - |-- Running-jobs-with-input - |-- Running-jobs-with-input-output-data - |-- example.pbs - '-- example.sh -9 directories, 5 files -This directory contains: -1. This HPC Tutorial (in either a Mac, Linux or Windows version). -2. An examples subdirectory, containing all the examples that you need in this - Tutorial, as well as examples that might be useful for your specific applications. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7_metadata.json deleted file mode 100644 index 6e3f90fbe8af..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_7_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "First-Time-connection-to-the-HPC-infrastructure", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 2, - "directory": "connecting", - "parent_title": "", - "previous_title": "connecting_paragraph_6", - "next_title": "connecting_paragraph_8", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/connecting/#first-time-connection-to-the-hpc-infrastructure" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8.txt deleted file mode 100644 index 634df6034b10..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8.txt +++ /dev/null @@ -1,12 +0,0 @@ -cd examples - tip - Typing cd ex followed by tab (the Tab-key) will generate the cd examples - command. Command-line completion (also tab completion) is a common feature of the bash command - line interpreter, in which the program automatically fills in partially - typed commands. - tip - For more exhaustive tutorials about Linux usage, see Appendix Useful Linux Commands -The first action is to copy the contents of the HPC examples directory to -your home directory, so that you have your own personal copy and that -you can start using the examples. The "-r" option of the copy command -will also copy the contents of the sub-directories "recursively". diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8_metadata.json deleted file mode 100644 index 38f265cfdcde..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_8_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "First-Time-connection-to-the-HPC-infrastructure", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 2, - "directory": "connecting", - "links": { - "0": "https://docs.hpc.ugent.be/useful_linux_commands" - }, - "parent_title": "", - "previous_title": "connecting_paragraph_7", - "next_title": "connecting_paragraph_9", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/connecting/#first-time-connection-to-the-hpc-infrastructure" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9.txt deleted file mode 100644 index ad2fee7457f5..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9.txt +++ /dev/null @@ -1,19 +0,0 @@ -cp -r /apps/gent/tutorials/Intro-HPC/examples ~/ -Go to your home directory, check your own private examples directory, ...Ā and start working. -cd -ls -l -Upon connecting you will see a login message containing your last login time stamp and a basic overview of the current cluster utilisation. -Last login: Thu Mar 18 13:15:09 2021 from gligarha02.gastly.os - STEVIN HPC-UGent infrastructure status on Mon, 19 Feb 2024 10:00:01 - cluster - full - free - part - total - running - queued - nodes nodes free nodes jobs jobs - ------------------------------------------------------------------------- - skitty 39 0 26 68 1839 5588 - joltik 6 0 1 10 29 18 - doduo 22 0 75 128 1397 11933 - accelgor 4 3 2 9 18 1 - donphan 0 0 16 16 16 13 - gallade 2 0 5 16 19 136 -For a full view of the current loads and queues see: -https://hpc.ugent.be/clusterstate/ -Updates on current system status and planned maintenance can be found on https://www.ugent.be/hpc/en/infrastructure/status diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9_metadata.json deleted file mode 100644 index bd1d462e614e..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "First-Time-connection-to-the-HPC-infrastructure", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 2, - "directory": "connecting", - "parent_title": "", - "previous_title": "connecting_paragraph_8", - "next_title": "connecting_paragraph_10", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/connecting/#first-time-connection-to-the-hpc-infrastructure" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1.txt deleted file mode 100644 index dfc592117923..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1.txt +++ /dev/null @@ -1,17 +0,0 @@ -Adding multiple SSH public keys (optional) -In case you are connecting from different computers to the login nodes, -it is advised to use separate SSH public keys to do so. You should -follow these steps. -1. Create a new public/private SSH key pair from the new computer. - Repeat the process described in - sectionĀ Generate a public/private key pair with OpenSSH. -2. Go to -3. Upload the new SSH public key using the Add public key section. Make sure that your - public key is actually saved, because a public key will be refused - if it is too short, wrong type, or in a wrong format. -4. (optional) If you lost your key, you can delete the old key on the - same page. You should keep at least one valid public SSH key in your - account. -5. Take into account that it will take some time before the new SSH - public key is active in your account on the system; waiting for - 15-30 minutes should be sufficient. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1_metadata.json deleted file mode 100644 index ffdeaf550e00..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_11.1_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Adding-multiple-SSH-public-keys-(optional)", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Applying-for-the-account", - "links": { - "0": "https://docs.hpc.ugent.be/account/#generate-a-publicprivate-key-pair-with-openssh" - }, - "previous_title": "account_paragraph_10", - "next_title": "account_paragraph_12", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/account/#adding-multiple-ssh-public-keys-optional" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1.txt deleted file mode 100644 index caaaea5ee919..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1.txt +++ /dev/null @@ -1,14 +0,0 @@ -How do SSH keys work -Launch a terminal from your desktop's application menu and you will see -the bash shell. There are other shells, but most Linux distributions use -bash by default. -Test OpenSSH -Secure Shell (ssh) is a cryptographic network protocol for secure data -communication, remote command-line login, remote command execution, and -other secure network services between two networked computers. In short, -ssh provides a secure connection between 2 computers via insecure -channels (Network, Internet, telephone lines, ...). -"Secure" means that: -1. the User is authenticated to the System; and -2. the System is authenticated to the User; and -3. all data is encrypted during transfer. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1_metadata.json deleted file mode 100644 index 7654a65253ab..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Test-OpenSSH", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Getting-ready-to-request-an-account", - "previous_title": "account_paragraph_4", - "next_title": "account_linux_paragraph_5.2", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/account/#test-openssh" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2.txt deleted file mode 100644 index 318f913fba34..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2.txt +++ /dev/null @@ -1,14 +0,0 @@ -OpenSSH is a FREE implementation of the SSH connectivity protocol. comes -with its own implementation of OpenSSH, so you don't need to install any -third-party software to use it. Just open a terminal window and jump in! -On all popular Linux distributions, the OpenSSH software is readily -available, and most often installed by default. You can check whether -the OpenSSH software is installed by opening a terminal and typing: -$ ssh -V -OpenSSH_7.4p1, OpenSSL 1.0.2k-fips 26 Jan 2017 -To access the clusters and transfer your files, you will use the -following commands: -1. ssh-keygen: to generate the SSH key pair (public + private key); -2. ssh: to open a shell on a remote machine; -3. sftp: a secure equivalent of ftp; -4. scp: a secure equivalent of the remote copy command rcp. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2_metadata.json deleted file mode 100644 index 32f1120307fa..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.2_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Test-OpenSSH", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Getting-ready-to-request-an-account", - "previous_title": "account_linux_paragraph_5.1", - "next_title": "account_linux_paragraph_5.3", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/account/#test-openssh" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3.txt deleted file mode 100644 index 5df90a3dd7c0..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3.txt +++ /dev/null @@ -1,16 +0,0 @@ -Generate a public/private key pair with OpenSSH -A key pair might already be present in the default location inside your -home directory. Therefore, we first check if a key is available with the -"list short" ("ls") command: -ls ~/.ssh -If a key-pair is already available, you would normally get: -authorized_keys id_rsa id_rsa.pub known_hosts -Otherwise, the command will show: -ls: .ssh: No such file or directory -You can recognise a public/private key pair when a pair of files has the -same name except for the extension ".pub" added to one of them. In this -particular case, the private key is "id_rsa" and public key is -"id_rsa.pub". You may have multiple keys (not necessarily in the -directory "~/.ssh") if you or your operating system requires this. Be -aware that your existing key pair might be too short, or not the right -type. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3_metadata.json deleted file mode 100644 index 722ba1a2ad49..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.3_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Getting-ready-to-request-an-account", - "previous_title": "account_linux_paragraph_5.2", - "next_title": "account_linux_paragraph_5.4", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/account/#generate-a-publicprivate-key-pair-with-openssh" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4.txt deleted file mode 100644 index d29d61d27d98..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4.txt +++ /dev/null @@ -1,13 +0,0 @@ -You will need to generate a new key pair, when: -1. you don't have a key pair yet -2. you forgot the passphrase protecting your private key -3. your private key was compromised -4. your key pair is too short or not the right type -For extra security, the private key itself can be encrypted using a -"passphrase", to prevent anyone from using your private key even when -they manage to copy it. You have to "unlock" the private key by typing -the passphrase. Be sure to never give away your private key, it is -private and should stay private. You should not even copy it to one of -your other machines, instead, you should create a new public/private key -pair for each machine. -ssh-keygen -t rsa -b 4096 diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4_metadata.json deleted file mode 100644 index 4f65f6ebf365..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.4_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Getting-ready-to-request-an-account", - "previous_title": "account_linux_paragraph_5.3", - "next_title": "account_linux_paragraph_5.5", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/account/#generate-a-publicprivate-key-pair-with-openssh" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5.txt deleted file mode 100644 index 78c142e82e00..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5.txt +++ /dev/null @@ -1,6 +0,0 @@ -This will ask you for a file name to store the private and public key, -and a passphrase to protect your private key. It needs to be emphasised -that you really should choose the passphrase wisely! The system will ask -you for it every time you want to use the private key that is every time -you want to access the cluster or transfer your files. -Without your key pair, you won't be able to apply for a personal VSC account. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5_metadata.json deleted file mode 100644 index 468fb5d09381..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.5_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Getting-ready-to-request-an-account", - "previous_title": "account_linux_paragraph_5.4", - "next_title": "account_paragraph_6", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/account/#generate-a-publicprivate-key-pair-with-openssh" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1.txt deleted file mode 100644 index c3b395b52962..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1.txt +++ /dev/null @@ -1 +0,0 @@ -Using an SSH agent (optional) \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1_metadata.json deleted file mode 100644 index fb82c40a7d76..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Using-an-SSH-agent-(optional", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Getting-ready-to-request-an-account", - "previous_title": "account_paragraph_5", - "next_title": "account_paragraph_7", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/account/#using-an-ssh-agent-optional" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1.txt deleted file mode 100644 index 8e8429c16422..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1.txt +++ /dev/null @@ -1,14 +0,0 @@ -Using an SSH agent (optional) -Most recent Unix derivatives include by default an SSH agent ("gnome-keyring-daemon" in most cases) -to keep and manage the user SSH keys. If you use one of these derivatives you must include the new keys into -the SSH manager keyring to be able to connect to the HPC cluster. If -not, SSH client will display an error message (see Connecting) similar to this: -Agent admitted failure to sign using the key. -Permission denied (publickey,gssapi-keyex,gssapi-with-mic). -This could be fixed using the ssh-add command. You can include the new -private keys' identities in your keyring with: -ssh-add - tip - Without extra options ssh-add adds any key located at $HOME/.ssh - directory, but you can specify the private key location path as - argument, as example: ssh-add /path/to/my/id_rsa. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1_metadata.json deleted file mode 100644 index 2b3633d71e74..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.1_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Using-an-SSH-agent-(optional)", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Getting-ready-to-request-an-account", - "links": { - "0": "https://docs.hpc.ugent.be/connecting" - }, - "previous_title": "account_paragraph_6", - "next_title": "account_linux_paragraph_7.2", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/account/#using-an-ssh-agent-optional" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2.txt deleted file mode 100644 index c227dbbb6e2f..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2.txt +++ /dev/null @@ -1,8 +0,0 @@ -Check that your key is available from the keyring with: -ssh-add -l -After these changes the key agent will keep your SSH key to connect to -the clusters as usual. - tip - You should execute ssh-add command again if you generate a new SSH - key. -Visit for more information. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2_metadata.json deleted file mode 100644 index de9700c7a5b8..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_7.2_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Using-an-SSH-agent-(optional)", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Getting-ready-to-request-an-account", - "previous_title": "account_linux_paragraph_7.1", - "next_title": "account_paragraph_8", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/account/#using-an-ssh-agent-optional" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1.txt deleted file mode 100644 index 815c414e059b..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1.txt +++ /dev/null @@ -1,7 +0,0 @@ -Applying for the account -After you log in using your UGent login and password, you will be asked to -upload the file that contains your public key, i.e., the file -"id_rsa.pub" which you have generated earlier. Make sure that your -public key is actually accepted for upload, because if it is in a wrong -format, wrong type or too short, then it will be refused. -This file has been stored in the directory "~/.ssh/". diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1_metadata.json deleted file mode 100644 index 31c14d853b39..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_9.1_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Applying-for-the-account", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 2, - "directory": "account", - "parent_title": "account", - "previous_title": "account_paragraph_8", - "next_title": "account_paragraph_10", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/account/#applying-for-the-account" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1.txt deleted file mode 100644 index 1d9129245359..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1.txt +++ /dev/null @@ -1,37 +0,0 @@ -First Time connection to the HPC infrastructure - A locale is a set of parameters that defines the user's language, country and - any special variant preferences that the user wants to see in their user - interface. Usually a locale identifier consists of at least a language - identifier and a region identifier. - Note - If you try to set a non-supported locale, then it will be automatically - set to the default. Currently the default is en_US.UFT-8 or en_US, - depending on whether your originally (non-supported) locale was UTF-8 or not. - Open the .bashrc on your local machine with your favourite editor and - add the following lines: - - $ nano ~/.bashrc - ... - export LANGUAGE="en_US.UTF-8" - export LC_ALL="en_US.UTF-8" - export LC_CTYPE="en_US.UTF-8" - export LANG="en_US.UTF-8" - ... - - tip "tip: vi" - To start entering text in vi: move to the place you want to start - entering text with the arrow keys and type "i" to switch to insert mode. You can easily exit vi by entering: ""ESC" :wq" - To exit vi without saving your changes, enter ""ESC":q!" - - - or alternatively (if you are not comfortable with the Linux editors), - again on your local machine: - - echo "export LANGUAGE=\"en_US.UTF-8\"" >> ~/.profile - echo "export LC_ALL=\"en_US.UTF-8\"" >> ~/.profile - echo "export LC_CTYPE=\"en_US.UTF-8\"" >> ~/.profile - echo "export LANG=\"en_US.UTF-8\"" >> ~/.profile - - You can now log out, open a new terminal/shell on your local machine and - reconnect to the login node, and you should not get these warnings anymore. - \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1_metadata.json deleted file mode 100644 index ef14b084e5f0..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_11.1_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "First-Time-connection-to-the-HPC-infrastructure", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 2, - "directory": "connecting", - "parent_title": "Connecting-to-the-HPC-infrastructure", - "previous_title": "connecting_paragraph_10", - "next_title": "connecting_paragraph_12", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#first-time-connection-to-the-hpc-infrastructure" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1.txt deleted file mode 100644 index d872c89a0f83..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1.txt +++ /dev/null @@ -1,6 +0,0 @@ -Transfer Files to/from the HPC -Before you can do some work, you'll have to transfer the files you need from your desktop or department to the cluster. At the end of a job, you might want to transfer some files back. -The preferred way to transfer files is by using an scp or sftp via the -secure OpenSSH protocol. ships with an implementation of OpenSSH, so you -don't need to install any third-party software to use it. Just open a -terminal window and jump in! diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1_metadata.json deleted file mode 100644 index 081156a5d163..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_12.1_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Transfer-Files-tofrom-the-HPC", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 2, - "directory": "connecting", - "parent_title": "Connecting-to-the-HPC-infrastructure", - "previous_title": "connecting_paragraph_11", - "next_title": "connecting_paragraph_13", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#transfer-files-tofrom-the-hpc" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1.txt deleted file mode 100644 index 8d0031fcca9f..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1.txt +++ /dev/null @@ -1,12 +0,0 @@ -Transfer Files tofrom the HPC -Using scp -Secure copy or SCP is a tool (command) for securely transferring files between a local -host (= your computer) and a remote host (the HPC). It is based on the -Secure Shell (SSH) protocol. The scp command is the equivalent of the cp (i.e., -copy) command, but can copy files to or from remote machines. -It's easier to copy files directly to $VSC_DATA and $VSC_SCRATCH if -you have symlinks to them in your home directory. See -the chapter titled "Uploading/downloading/editing files", section "Symlinks for data/scratch" in the intro to Linux - for how to do this. -Open an additional terminal window and check that you're working on your -local machine. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1_metadata.json deleted file mode 100644 index 6b70790e1e36..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.1_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Using-scp", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 3, - "directory": "connecting", - "parent_title": "Transfer-Files-tofrom-the-HPC", - "links": { - "0": "https://docs.hpc.ugent.be/localhost:8000/Gent//intro-Linux/uploading_files/#symlinks-for-datascratch" - }, - "previous_title": "connecting_paragraph_12", - "next_title": "connecting_linux_paragraph_13.2", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-scp" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.2.txt deleted file mode 100644 index f1da0677a677..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.2.txt +++ /dev/null @@ -1,17 +0,0 @@ -$ hostname - -If you're still using the terminal that is connected to the HPC, close the -connection by typing "exit" in the terminal window. -For example, we will copy the (local) file "localfile.txt" to your -home directory on the HPC cluster. We first generate a small dummy -"localfile.txt", which contains the word "Hello". Use your own VSC -account, which is something like "vsc40000". Don't forget the colon (:) at the -end: if you forget it, it will just create a file named vsc40000@login.hpc.ugent.be on your -local filesystem. You can even specify where to save the file on the -remote filesystem by putting a path after the colon. -$ echo "Hello" > localfile.txt -$ ls -l -... --rw-r--r-- 1 user staff 6 Sep 18 09:37 localfile.txt -$ scp localfile.txt vsc40000@login.hpc.ugent.be: -localfile.txt 100% 6 0.0KB/s 00:00 diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.2_metadata.json deleted file mode 100644 index 43affa4e36c7..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.2_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Using-scp", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 3, - "directory": "connecting", - "parent_title": "Transfer-Files-tofrom-the-HPC", - "previous_title": "connecting_linux_paragraph_13.1", - "next_title": "connecting_linux_paragraph_13.3", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-scp" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.3.txt deleted file mode 100644 index 9585900e3564..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.3.txt +++ /dev/null @@ -1,22 +0,0 @@ -Connect to the HPC via another terminal, print the working directory (to -make sure you're in the home directory) and check whether the file has -arrived: -$ pwd -/user/home/gent/vsc400/vsc40000 -$ ls -l -total 1536 -drwxrwxr-x 2 -drwxrwxr-x 2 -drwxrwxr-x 10 --rw-r--r-- 1 -$ cat localfile.txt -Hello -The scp command can also be used to copy files from the cluster to your -local machine. Let us copy the remote file "intro-HPC--Gent.pdf" from your "docs" -subdirectory on the cluster to your local computer. -First, we will confirm that the file is indeed in the "docs" -subdirectory. In the terminal on the login node, enter: -$ cd ~/docs -$ ls -l -total 1536 --rw-r--r-- 1 vsc40000 Sep 11 09:53 intro-HPC--Gent.pdf diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.3_metadata.json deleted file mode 100644 index ccc74bb5b940..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.3_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Using-scp", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 3, - "directory": "connecting", - "parent_title": "Transfer-Files-tofrom-the-HPC", - "previous_title": "connecting_linux_paragraph_13.2", - "next_title": "connecting_linux_paragraph_13.4", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-scp" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.4.txt deleted file mode 100644 index d09b69552ef7..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.4.txt +++ /dev/null @@ -1,14 +0,0 @@ -Now we will copy the file to the local machine. On the terminal on your -own local computer, enter: -$ scp vsc40000@login.hpc.ugent.be:./docs/intro-HPC--Gent.pdf . -intro-HPC--Gent.pdf 100% 725KB 724.6KB/s 00:01 -$ ls -l -total 899 --rw-r--r-- 1 user staff 741995 Sep 18 09:53 --rw-r--r-- 1 user staff 6 Sep 18 09:37 localfile.txt -The file has been copied from the HPC to your local computer. -It's also possible to copy entire directories (and their contents) with -the -r flag. For example, if we want to copy the local directory -dataset to $VSC_SCRATCH, we can use the following command (assuming -you've created the scratch symlink): -scp -r dataset vsc40000@login.hpc.ugent.be:scratch diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.4_metadata.json deleted file mode 100644 index 9ffcc4121f41..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.4_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Using-scp", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 3, - "directory": "connecting", - "parent_title": "Transfer-Files-tofrom-the-HPC", - "previous_title": "connecting_linux_paragraph_13.3", - "next_title": "connecting_linux_paragraph_13.5", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-scp" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.5.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.5.txt deleted file mode 100644 index 532d57bb4a58..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.5.txt +++ /dev/null @@ -1,14 +0,0 @@ -If you don't use the -r option to copy a directory, you will run into -the following error: -$ scp dataset vsc40000@login.hpc.ugent.be:scratch -dataset: not a regular file -Using sftp -The SSH File Transfer Protocol (also Secure File Transfer Protocol, or SFTP) is a network protocol that provides file access, file -transfer and file management functionalities over any reliable data -stream. It was designed as an extension of the Secure Shell protocol -(SSH) version 2.0. This protocol assumes that it is run over a secure -channel, such as SSH, that the server has already authenticated the -client, and that the identity of the client user is available to the -protocol. -The sftp is an equivalent of the ftp command, with the difference that -it uses the secure ssh protocol to connect to the clusters. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.5_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.5_metadata.json deleted file mode 100644 index 8e3b4056b6b0..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.5_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Using-sftp", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 3, - "directory": "connecting", - "parent_title": "Transfer-Files-tofrom-the-HPC", - "previous_title": "connecting_linux_paragraph_13.4", - "next_title": "connecting_linux_paragraph_13.6", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-sftp" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.6.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.6.txt deleted file mode 100644 index 1ef13b80c6f0..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.6.txt +++ /dev/null @@ -1,18 +0,0 @@ -One easy way of starting a sftp session is -sftp vsc40000@login.hpc.ugent.be -Typical and popular commands inside an sftp session are: -| | | -|:--------------------------|:-------------------------------------------------------------------------------------| -| cd ~/exmples/fibo | Move to the examples/fibo subdirectory on the (i.e., the HPC remote machine) | -| ls | Get a list of the files in the current directory on the HPC. | -| get fibo.py | Copy the file "fibo.py" from the HPC | -| get tutorial/HPC.pdf | Copy the file "HPC.pdf" from the HPC, which is in the "tutorial" subdirectory. | -| lcd test | Move to the "test" subdirectory on your local machine. | -| lcd .. | Move up one level in the local directory. | -| lls | Get local directory listing. | -| put test.py | Copy the local file test.py to the HPC. | -| put test1.py test2.py | Copy the local file test1.py to the and rename it to test2.py. | -| bye | Quit the sftp session | -| **mget *.cc** | Copy all the remote files with extension ".cc" to the local directory. | -| **mput *.h** | Copy all the local files with extension ".h" to the HPC. | -| | | diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.6_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.6_metadata.json deleted file mode 100644 index c7fe6bf6a44a..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_13.6_metadata.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Using-sftp", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 3, - "directory": "connecting", - "parent_title": "Transfer-Files-tofrom-the-HPC", - "links": { - "0": "https://docs.hpc.ugent.be/", - "1": "https://docs.hpc.ugent.be/" - }, - "previous_title": "connecting_linux_paragraph_13.5", - "next_title": "connecting_linux_paragraph_13.7", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-sftp" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_14.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_14.1.txt deleted file mode 100644 index a0496edfb14b..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_14.1.txt +++ /dev/null @@ -1,10 +0,0 @@ -Transfer Files tofrom the HPC -Using a GUI -If you prefer a GUI to transfer files back and forth to the HPC, you can -use your file browser. Open your file browser and press -++"Ctrl"+"l"++ -This should open up a address bar where you can enter a URL. -Alternatively, look for the "connect to server" option in your file -browsers menu. -Enter: sftp://vsc40000@login.hpc.ugent.be/ and press enter. -You should now be able to browse files on the HPC in your file browser. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_14.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_14.1_metadata.json deleted file mode 100644 index e3c48fe48297..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_14.1_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Using-a-GUI", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 3, - "directory": "connecting", - "parent_title": "Transfer-Files-tofrom-the-HPC", - "previous_title": "connecting_paragraph_13", - "next_title": "connecting_paragraph_15", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-a-gui" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1.txt deleted file mode 100644 index 27ae3fb7bd45..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1.txt +++ /dev/null @@ -1,12 +0,0 @@ -First Time connection to the HPC infrastructure -Connect -Open up a terminal and enter the following command to connect to the HPC. -ssh vsc40000@login.hpc.ugent.be -Here, user vsc40000 wants to make a connection to the "hpcugent" cluster at UGent via the login -node "login.hpc.ugent.be", so replace vsc40000 with your own VSC id in the above command. -The first time you make a connection to the login node, you will be -asked to verify the authenticity of the login node. Please check -Warning message when first connecting to new host on how to do this. -A possible error message you can get if you previously saved your -private key somewhere else than the default location -($HOME/.ssh/id_rsa): diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1_metadata.json deleted file mode 100644 index 66c5dc4aeff7..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Connect", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 3, - "directory": "connecting", - "parent_title": "First-Time-connection-to-the-HPC-infrastructure", - "links": { - "0": "https://docs.hpc.ugent.be/troubleshooting/#warning-message-when-first-connecting-to-new-host" - }, - "previous_title": "connecting_paragraph_4", - "next_title": "connecting_linux_paragraph_5.2", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#connect" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2.txt deleted file mode 100644 index be01e09bba0f..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2.txt +++ /dev/null @@ -1,4 +0,0 @@ -Permission denied (publickey,gssapi-keyex,gssapi-with-mic). -In this case, use the -i option for the ssh command to specify the -location of your private key. For example: -ssh -i /home/example/my_keys diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2_metadata.json deleted file mode 100644 index 21b63518804c..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.2_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Connect", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 3, - "directory": "connecting", - "parent_title": "First-Time-connection-to-the-HPC-infrastructure", - "previous_title": "connecting_linux_paragraph_5.1", - "next_title": "connecting_paragraph_6", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#connect" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1.txt deleted file mode 100644 index dfc592117923..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1.txt +++ /dev/null @@ -1,17 +0,0 @@ -Adding multiple SSH public keys (optional) -In case you are connecting from different computers to the login nodes, -it is advised to use separate SSH public keys to do so. You should -follow these steps. -1. Create a new public/private SSH key pair from the new computer. - Repeat the process described in - sectionĀ Generate a public/private key pair with OpenSSH. -2. Go to -3. Upload the new SSH public key using the Add public key section. Make sure that your - public key is actually saved, because a public key will be refused - if it is too short, wrong type, or in a wrong format. -4. (optional) If you lost your key, you can delete the old key on the - same page. You should keep at least one valid public SSH key in your - account. -5. Take into account that it will take some time before the new SSH - public key is active in your account on the system; waiting for - 15-30 minutes should be sufficient. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1_metadata.json deleted file mode 100644 index d9d3c33f876c..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_11.1_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Adding-multiple-SSH-public-keys-(optional)", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Applying-for-the-account", - "links": { - "0": "https://docs.hpc.ugent.be/account/#generate-a-publicprivate-key-pair-with-openssh" - }, - "previous_title": "account_paragraph_10", - "next_title": "account_paragraph_12", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/account/#adding-multiple-ssh-public-keys-optional" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1.txt deleted file mode 100644 index d96c80b42a2f..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1.txt +++ /dev/null @@ -1,15 +0,0 @@ -How do SSH keys work -To open a Terminal window in macOS, open the Finder and choose -*\>\> Applications \> Utilities \> Terminal* -Before requesting an account, you need to generate a pair of ssh keys. -One popular way to do this on is using the OpenSSH client included with , which you can then also use to log on to the clusters. -Test OpenSSH -Secure Shell (ssh) is a cryptographic network protocol for secure data -communication, remote command-line login, remote command execution, and -other secure network services between two networked computers. In short, -ssh provides a secure connection between 2 computers via insecure -channels (Network, Internet, telephone lines, ...). -"Secure" means that: -1. the User is authenticated to the System; and -2. the System is authenticated to the User; and -3. all data is encrypted during transfer. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1_metadata.json deleted file mode 100644 index 028d9d25f7fd..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Test-OpenSSH", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Getting-ready-to-request-an-account", - "previous_title": "account_paragraph_4", - "next_title": "account_macos_paragraph_5.2", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/account/#test-openssh" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2.txt deleted file mode 100644 index 318f913fba34..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2.txt +++ /dev/null @@ -1,14 +0,0 @@ -OpenSSH is a FREE implementation of the SSH connectivity protocol. comes -with its own implementation of OpenSSH, so you don't need to install any -third-party software to use it. Just open a terminal window and jump in! -On all popular Linux distributions, the OpenSSH software is readily -available, and most often installed by default. You can check whether -the OpenSSH software is installed by opening a terminal and typing: -$ ssh -V -OpenSSH_7.4p1, OpenSSL 1.0.2k-fips 26 Jan 2017 -To access the clusters and transfer your files, you will use the -following commands: -1. ssh-keygen: to generate the SSH key pair (public + private key); -2. ssh: to open a shell on a remote machine; -3. sftp: a secure equivalent of ftp; -4. scp: a secure equivalent of the remote copy command rcp. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2_metadata.json deleted file mode 100644 index dfec6f6fd5a8..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.2_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Test-OpenSSH", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Getting-ready-to-request-an-account", - "previous_title": "account_macos_paragraph_5.1", - "next_title": "account_macos_paragraph_5.3", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/account/#test-openssh" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3.txt deleted file mode 100644 index 5df90a3dd7c0..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3.txt +++ /dev/null @@ -1,16 +0,0 @@ -Generate a public/private key pair with OpenSSH -A key pair might already be present in the default location inside your -home directory. Therefore, we first check if a key is available with the -"list short" ("ls") command: -ls ~/.ssh -If a key-pair is already available, you would normally get: -authorized_keys id_rsa id_rsa.pub known_hosts -Otherwise, the command will show: -ls: .ssh: No such file or directory -You can recognise a public/private key pair when a pair of files has the -same name except for the extension ".pub" added to one of them. In this -particular case, the private key is "id_rsa" and public key is -"id_rsa.pub". You may have multiple keys (not necessarily in the -directory "~/.ssh") if you or your operating system requires this. Be -aware that your existing key pair might be too short, or not the right -type. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3_metadata.json deleted file mode 100644 index 5a10e780b451..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.3_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Getting-ready-to-request-an-account", - "previous_title": "account_macos_paragraph_5.2", - "next_title": "account_macos_paragraph_5.4", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/account/#generate-a-publicprivate-key-pair-with-openssh" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4.txt deleted file mode 100644 index d29d61d27d98..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4.txt +++ /dev/null @@ -1,13 +0,0 @@ -You will need to generate a new key pair, when: -1. you don't have a key pair yet -2. you forgot the passphrase protecting your private key -3. your private key was compromised -4. your key pair is too short or not the right type -For extra security, the private key itself can be encrypted using a -"passphrase", to prevent anyone from using your private key even when -they manage to copy it. You have to "unlock" the private key by typing -the passphrase. Be sure to never give away your private key, it is -private and should stay private. You should not even copy it to one of -your other machines, instead, you should create a new public/private key -pair for each machine. -ssh-keygen -t rsa -b 4096 diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4_metadata.json deleted file mode 100644 index 8da465c1f24e..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.4_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Getting-ready-to-request-an-account", - "previous_title": "account_macos_paragraph_5.3", - "next_title": "account_macos_paragraph_5.5", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/account/#generate-a-publicprivate-key-pair-with-openssh" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5.txt deleted file mode 100644 index 78c142e82e00..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5.txt +++ /dev/null @@ -1,6 +0,0 @@ -This will ask you for a file name to store the private and public key, -and a passphrase to protect your private key. It needs to be emphasised -that you really should choose the passphrase wisely! The system will ask -you for it every time you want to use the private key that is every time -you want to access the cluster or transfer your files. -Without your key pair, you won't be able to apply for a personal VSC account. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5_metadata.json deleted file mode 100644 index 9d6f7b1a741a..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.5_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Getting-ready-to-request-an-account", - "previous_title": "account_macos_paragraph_5.4", - "next_title": "account_paragraph_6", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/account/#generate-a-publicprivate-key-pair-with-openssh" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1.txt deleted file mode 100644 index c3b395b52962..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1.txt +++ /dev/null @@ -1 +0,0 @@ -Using an SSH agent (optional) \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1_metadata.json deleted file mode 100644 index 17a34a2f80b6..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Using-an-SSH-agent-(optional", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Getting-ready-to-request-an-account", - "previous_title": "account_paragraph_5", - "next_title": "account_paragraph_7", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/account/#using-an-ssh-agent-optional" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1.txt deleted file mode 100644 index 1069ebd9fbd3..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1.txt +++ /dev/null @@ -1,14 +0,0 @@ -Using an SSH agent (optional) -Most recent Unix derivatives include by default an SSH agent -to keep and manage the user SSH keys. If you use one of these derivatives you must include the new keys into -the SSH manager keyring to be able to connect to the HPC cluster. If -not, SSH client will display an error message (see Connecting) similar to this: -Agent admitted failure to sign using the key. -Permission denied (publickey,gssapi-keyex,gssapi-with-mic). -This could be fixed using the ssh-add command. You can include the new -private keys' identities in your keyring with: -ssh-add - tip - Without extra options ssh-add adds any key located at $HOME/.ssh - directory, but you can specify the private key location path as - argument, as example: ssh-add /path/to/my/id_rsa. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1_metadata.json deleted file mode 100644 index 18b3b3675deb..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.1_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Using-an-SSH-agent-(optional)", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Getting-ready-to-request-an-account", - "links": { - "0": "https://docs.hpc.ugent.be/connecting" - }, - "previous_title": "account_paragraph_6", - "next_title": "account_macos_paragraph_7.2", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/account/#using-an-ssh-agent-optional" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2.txt deleted file mode 100644 index c880ee4a228d..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2.txt +++ /dev/null @@ -1,7 +0,0 @@ -Check that your key is available from the keyring with: -ssh-add -l -After these changes the key agent will keep your SSH key to connect to -the clusters as usual. - tip - You should execute ssh-add command again if you generate a new SSH - key. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2_metadata.json deleted file mode 100644 index 072a43cb3e43..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_7.2_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Using-an-SSH-agent-(optional)", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Getting-ready-to-request-an-account", - "previous_title": "account_macos_paragraph_7.1", - "next_title": "account_paragraph_8", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/account/#using-an-ssh-agent-optional" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1.txt deleted file mode 100644 index 5a5a52da0629..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1.txt +++ /dev/null @@ -1,12 +0,0 @@ -Applying for the account -After you log in using your UGent login and password, you will be asked to -upload the file that contains your public key, i.e., the file -"id_rsa.pub" which you have generated earlier. Make sure that your -public key is actually accepted for upload, because if it is in a wrong -format, wrong type or too short, then it will be refused. -This file has been stored in the directory "~/.ssh/". - tip - As ".ssh" is an invisible directory, the Finder will not show it by - default. The easiest way to access the folder, is by pressing ++cmd+shift+g++ (or ++cmd+shift+"."++), - which will allow you to enter the name of a directory, which you would - like to open in Finder. Here, type "~/.ssh" and press enter. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1_metadata.json deleted file mode 100644 index 86c8c2048bfd..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_9.1_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Applying-for-the-account", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 2, - "directory": "account", - "parent_title": "account", - "previous_title": "account_paragraph_8", - "next_title": "account_paragraph_10", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/account/#applying-for-the-account" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1.txt deleted file mode 100644 index 1d9129245359..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1.txt +++ /dev/null @@ -1,37 +0,0 @@ -First Time connection to the HPC infrastructure - A locale is a set of parameters that defines the user's language, country and - any special variant preferences that the user wants to see in their user - interface. Usually a locale identifier consists of at least a language - identifier and a region identifier. - Note - If you try to set a non-supported locale, then it will be automatically - set to the default. Currently the default is en_US.UFT-8 or en_US, - depending on whether your originally (non-supported) locale was UTF-8 or not. - Open the .bashrc on your local machine with your favourite editor and - add the following lines: - - $ nano ~/.bashrc - ... - export LANGUAGE="en_US.UTF-8" - export LC_ALL="en_US.UTF-8" - export LC_CTYPE="en_US.UTF-8" - export LANG="en_US.UTF-8" - ... - - tip "tip: vi" - To start entering text in vi: move to the place you want to start - entering text with the arrow keys and type "i" to switch to insert mode. You can easily exit vi by entering: ""ESC" :wq" - To exit vi without saving your changes, enter ""ESC":q!" - - - or alternatively (if you are not comfortable with the Linux editors), - again on your local machine: - - echo "export LANGUAGE=\"en_US.UTF-8\"" >> ~/.profile - echo "export LC_ALL=\"en_US.UTF-8\"" >> ~/.profile - echo "export LC_CTYPE=\"en_US.UTF-8\"" >> ~/.profile - echo "export LANG=\"en_US.UTF-8\"" >> ~/.profile - - You can now log out, open a new terminal/shell on your local machine and - reconnect to the login node, and you should not get these warnings anymore. - \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1_metadata.json deleted file mode 100644 index 323292b910e3..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_11.1_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "First-Time-connection-to-the-HPC-infrastructure", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 2, - "directory": "connecting", - "parent_title": "Connecting-to-the-HPC-infrastructure", - "previous_title": "connecting_paragraph_10", - "next_title": "connecting_paragraph_12", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#first-time-connection-to-the-hpc-infrastructure" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1.txt deleted file mode 100644 index d872c89a0f83..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1.txt +++ /dev/null @@ -1,6 +0,0 @@ -Transfer Files to/from the HPC -Before you can do some work, you'll have to transfer the files you need from your desktop or department to the cluster. At the end of a job, you might want to transfer some files back. -The preferred way to transfer files is by using an scp or sftp via the -secure OpenSSH protocol. ships with an implementation of OpenSSH, so you -don't need to install any third-party software to use it. Just open a -terminal window and jump in! diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1_metadata.json deleted file mode 100644 index 8a420f36c2bd..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_12.1_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Transfer-Files-tofrom-the-HPC", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 2, - "directory": "connecting", - "parent_title": "Connecting-to-the-HPC-infrastructure", - "previous_title": "connecting_paragraph_11", - "next_title": "connecting_paragraph_13", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#transfer-files-tofrom-the-hpc" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1.txt deleted file mode 100644 index 8d0031fcca9f..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1.txt +++ /dev/null @@ -1,12 +0,0 @@ -Transfer Files tofrom the HPC -Using scp -Secure copy or SCP is a tool (command) for securely transferring files between a local -host (= your computer) and a remote host (the HPC). It is based on the -Secure Shell (SSH) protocol. The scp command is the equivalent of the cp (i.e., -copy) command, but can copy files to or from remote machines. -It's easier to copy files directly to $VSC_DATA and $VSC_SCRATCH if -you have symlinks to them in your home directory. See -the chapter titled "Uploading/downloading/editing files", section "Symlinks for data/scratch" in the intro to Linux - for how to do this. -Open an additional terminal window and check that you're working on your -local machine. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1_metadata.json deleted file mode 100644 index 791570056009..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.1_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Using-scp", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 3, - "directory": "connecting", - "parent_title": "Transfer-Files-tofrom-the-HPC", - "links": { - "0": "https://docs.hpc.ugent.be/localhost:8000/Gent//intro-Linux/uploading_files/#symlinks-for-datascratch" - }, - "previous_title": "connecting_paragraph_12", - "next_title": "connecting_macos_paragraph_13.2", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-scp" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2.txt deleted file mode 100644 index f1da0677a677..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2.txt +++ /dev/null @@ -1,17 +0,0 @@ -$ hostname - -If you're still using the terminal that is connected to the HPC, close the -connection by typing "exit" in the terminal window. -For example, we will copy the (local) file "localfile.txt" to your -home directory on the HPC cluster. We first generate a small dummy -"localfile.txt", which contains the word "Hello". Use your own VSC -account, which is something like "vsc40000". Don't forget the colon (:) at the -end: if you forget it, it will just create a file named vsc40000@login.hpc.ugent.be on your -local filesystem. You can even specify where to save the file on the -remote filesystem by putting a path after the colon. -$ echo "Hello" > localfile.txt -$ ls -l -... --rw-r--r-- 1 user staff 6 Sep 18 09:37 localfile.txt -$ scp localfile.txt vsc40000@login.hpc.ugent.be: -localfile.txt 100% 6 0.0KB/s 00:00 diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2_metadata.json deleted file mode 100644 index dc57de365bf1..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.2_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Using-scp", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 3, - "directory": "connecting", - "parent_title": "Transfer-Files-tofrom-the-HPC", - "previous_title": "connecting_macos_paragraph_13.1", - "next_title": "connecting_macos_paragraph_13.3", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-scp" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.3.txt deleted file mode 100644 index 9585900e3564..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.3.txt +++ /dev/null @@ -1,22 +0,0 @@ -Connect to the HPC via another terminal, print the working directory (to -make sure you're in the home directory) and check whether the file has -arrived: -$ pwd -/user/home/gent/vsc400/vsc40000 -$ ls -l -total 1536 -drwxrwxr-x 2 -drwxrwxr-x 2 -drwxrwxr-x 10 --rw-r--r-- 1 -$ cat localfile.txt -Hello -The scp command can also be used to copy files from the cluster to your -local machine. Let us copy the remote file "intro-HPC--Gent.pdf" from your "docs" -subdirectory on the cluster to your local computer. -First, we will confirm that the file is indeed in the "docs" -subdirectory. In the terminal on the login node, enter: -$ cd ~/docs -$ ls -l -total 1536 --rw-r--r-- 1 vsc40000 Sep 11 09:53 intro-HPC--Gent.pdf diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.3_metadata.json deleted file mode 100644 index 5a4623c650db..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.3_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Using-scp", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 3, - "directory": "connecting", - "parent_title": "Transfer-Files-tofrom-the-HPC", - "previous_title": "connecting_macos_paragraph_13.2", - "next_title": "connecting_macos_paragraph_13.4", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-scp" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.4.txt deleted file mode 100644 index d09b69552ef7..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.4.txt +++ /dev/null @@ -1,14 +0,0 @@ -Now we will copy the file to the local machine. On the terminal on your -own local computer, enter: -$ scp vsc40000@login.hpc.ugent.be:./docs/intro-HPC--Gent.pdf . -intro-HPC--Gent.pdf 100% 725KB 724.6KB/s 00:01 -$ ls -l -total 899 --rw-r--r-- 1 user staff 741995 Sep 18 09:53 --rw-r--r-- 1 user staff 6 Sep 18 09:37 localfile.txt -The file has been copied from the HPC to your local computer. -It's also possible to copy entire directories (and their contents) with -the -r flag. For example, if we want to copy the local directory -dataset to $VSC_SCRATCH, we can use the following command (assuming -you've created the scratch symlink): -scp -r dataset vsc40000@login.hpc.ugent.be:scratch diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.4_metadata.json deleted file mode 100644 index 54b3fe19d58f..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.4_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Using-scp", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 3, - "directory": "connecting", - "parent_title": "Transfer-Files-tofrom-the-HPC", - "previous_title": "connecting_macos_paragraph_13.3", - "next_title": "connecting_macos_paragraph_13.5", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-scp" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.5.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.5.txt deleted file mode 100644 index 532d57bb4a58..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.5.txt +++ /dev/null @@ -1,14 +0,0 @@ -If you don't use the -r option to copy a directory, you will run into -the following error: -$ scp dataset vsc40000@login.hpc.ugent.be:scratch -dataset: not a regular file -Using sftp -The SSH File Transfer Protocol (also Secure File Transfer Protocol, or SFTP) is a network protocol that provides file access, file -transfer and file management functionalities over any reliable data -stream. It was designed as an extension of the Secure Shell protocol -(SSH) version 2.0. This protocol assumes that it is run over a secure -channel, such as SSH, that the server has already authenticated the -client, and that the identity of the client user is available to the -protocol. -The sftp is an equivalent of the ftp command, with the difference that -it uses the secure ssh protocol to connect to the clusters. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.5_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.5_metadata.json deleted file mode 100644 index 0b9ba08e3b11..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.5_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Using-sftp", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 3, - "directory": "connecting", - "parent_title": "Transfer-Files-tofrom-the-HPC", - "previous_title": "connecting_macos_paragraph_13.4", - "next_title": "connecting_macos_paragraph_13.6", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-sftp" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.6.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.6.txt deleted file mode 100644 index 1ef13b80c6f0..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.6.txt +++ /dev/null @@ -1,18 +0,0 @@ -One easy way of starting a sftp session is -sftp vsc40000@login.hpc.ugent.be -Typical and popular commands inside an sftp session are: -| | | -|:--------------------------|:-------------------------------------------------------------------------------------| -| cd ~/exmples/fibo | Move to the examples/fibo subdirectory on the (i.e., the HPC remote machine) | -| ls | Get a list of the files in the current directory on the HPC. | -| get fibo.py | Copy the file "fibo.py" from the HPC | -| get tutorial/HPC.pdf | Copy the file "HPC.pdf" from the HPC, which is in the "tutorial" subdirectory. | -| lcd test | Move to the "test" subdirectory on your local machine. | -| lcd .. | Move up one level in the local directory. | -| lls | Get local directory listing. | -| put test.py | Copy the local file test.py to the HPC. | -| put test1.py test2.py | Copy the local file test1.py to the and rename it to test2.py. | -| bye | Quit the sftp session | -| **mget *.cc** | Copy all the remote files with extension ".cc" to the local directory. | -| **mput *.h** | Copy all the local files with extension ".h" to the HPC. | -| | | diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.6_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.6_metadata.json deleted file mode 100644 index 9b08fbde5498..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_13.6_metadata.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Using-sftp", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 3, - "directory": "connecting", - "parent_title": "Transfer-Files-tofrom-the-HPC", - "links": { - "0": "https://docs.hpc.ugent.be/", - "1": "https://docs.hpc.ugent.be/" - }, - "previous_title": "connecting_macos_paragraph_13.5", - "next_title": "connecting_macos_paragraph_13.7", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-sftp" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.1.txt deleted file mode 100644 index 20a4acb40a80..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.1.txt +++ /dev/null @@ -1,15 +0,0 @@ -Transfer Files tofrom the HPC -Using a GUI (Cyberduck) -Cyberduck is a graphical alternative to the scp command. It can be -installed from . -This is the one-time setup you will need to do before connecting: -1. After starting Cyberduck, the Bookmark tab will show up. To add a - new bookmark, click on the "+" sign on the bottom left of the - window. A new window will open. -2. In the drop-down menu on top, select "SFTP (SSH File Transfer Protocol)". -3. In the "Server" field, type in login.hpc.ugent.be. In the "Username" field, type in - your VSC account id (this looks like vsc40000). -4. Select the location of your SSH private key in the "SSH Private Key" field. -5. Finally, type in a name for the bookmark in the "Nickname" field and - close the window by pressing on the red circle in the top left - corner of the window. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.1_metadata.json deleted file mode 100644 index 694b7682aa97..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.1_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Using-a-GUI-(Cyberduck)", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 3, - "directory": "connecting", - "parent_title": "Transfer-Files-tofrom-the-HPC", - "previous_title": "connecting_paragraph_13", - "next_title": "connecting_macos_paragraph_14.2", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-a-gui-cyberduck" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.2.txt deleted file mode 100644 index 1d20edf411f8..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.2.txt +++ /dev/null @@ -1,3 +0,0 @@ -To open the connection, click on the "Bookmarks" icon (which -resembles an open book) and double-click on the bookmark you just -created. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.2_metadata.json deleted file mode 100644 index e32b1ab4c58e..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_14.2_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Using-a-GUI-(Cyberduck)", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 3, - "directory": "connecting", - "parent_title": "Transfer-Files-tofrom-the-HPC", - "previous_title": "connecting_macos_paragraph_14.1", - "next_title": "connecting_paragraph_15", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-a-gui-cyberduck" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1.txt deleted file mode 100644 index 1e22cfc8b1f5..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1.txt +++ /dev/null @@ -1,10 +0,0 @@ -First Time connection to the HPC infrastructure -Connect -Open up a terminal and enter the following command to connect to the HPC. -You can open a terminal by navigation to Applications and then Utilities in the finder and open Terminal.app, or enter Terminal in Spotlight Search. -ssh vsc40000@login.hpc.ugent.be -Here, user vsc40000 wants to make a connection to the "hpcugent" cluster at UGent via the login -node "login.hpc.ugent.be", so replace vsc40000 with your own VSC id in the above command. -The first time you make a connection to the login node, you will be -asked to verify the authenticity of the login node. Please check -Warning message when first connecting to new host on how to do this. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1_metadata.json deleted file mode 100644 index f928fbfcdd6e..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Connect", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 3, - "directory": "connecting", - "parent_title": "First-Time-connection-to-the-HPC-infrastructure", - "links": { - "0": "https://docs.hpc.ugent.be/troubleshooting/#warning-message-when-first-connecting-to-new-host" - }, - "previous_title": "connecting_paragraph_4", - "next_title": "connecting_macos_paragraph_5.2", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#connect" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2.txt deleted file mode 100644 index f3f5ac6e7754..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2.txt +++ /dev/null @@ -1,7 +0,0 @@ -A possible error message you can get if you previously saved your -private key somewhere else than the default location -($HOME/.ssh/id_rsa): -Permission denied (publickey,gssapi-keyex,gssapi-with-mic). -In this case, use the -i option for the ssh command to specify the -location of your private key. For example: -ssh -i /home/example/my_keys diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2_metadata.json deleted file mode 100644 index 047d58633612..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.2_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Connect", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 3, - "directory": "connecting", - "parent_title": "First-Time-connection-to-the-HPC-infrastructure", - "previous_title": "connecting_macos_paragraph_5.1", - "next_title": "connecting_paragraph_6", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#connect" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1.txt deleted file mode 100644 index ca00a8a0f651..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1.txt +++ /dev/null @@ -1,17 +0,0 @@ -Adding multiple SSH public keys (optional) -In case you are connecting from different computers to the login nodes, -it is advised to use separate SSH public keys to do so. You should -follow these steps. -1. Create a new public/private SSH key pair from Putty. Repeat the - process described in - sectionĀ Generate a public/private key pair. -2. Go to -3. Upload the new SSH public key using the Add public key section. Make sure that your - public key is actually saved, because a public key will be refused - if it is too short, wrong type, or in a wrong format. -4. (optional) If you lost your key, you can delete the old key on the - same page. You should keep at least one valid public SSH key in your - account. -5. Take into account that it will take some time before the new SSH - public key is active in your account on the system; waiting for - 15-30 minutes should be sufficient. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1_metadata.json deleted file mode 100644 index 4614c053f2ce..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_11.1_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Adding-multiple-SSH-public-keys-(optional)", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Applying-for-the-account", - "links": { - "0": "https://docs.hpc.ugent.be/account/#generate-a-publicprivate-key-pair" - }, - "previous_title": "account_paragraph_10", - "next_title": "account_paragraph_12", - "OS": "windows", - "reference_link": "https://docs.hpc.ugent.be/Windows/account/#adding-multiple-ssh-public-keys-optional" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1.txt deleted file mode 100644 index 93ca7ac9da5f..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1.txt +++ /dev/null @@ -1,18 +0,0 @@ -How do SSH keys work -A typical Windows environment does not come with pre-installed software -to connect and run command-line executables on a HPC. Some tools need to be -installed on your Windows machine first, before we can start the actual -work. -Get PuTTY: A free telnet/SSH client -We recommend to use the PuTTY tools package, which is freely available. -You do not need to install PuTTY, you can download the PuTTY and -PuTTYgen executable and run it. This can be useful in situations where -you do not have the required permissions to install software on the -computer you are using. Alternatively, an installation package is also -available. -You can download PuTTY from the official address: -. You -probably want the 64-bits version. If you can install software on your -computer, you can use the "Package files", if not, you can download and -use putty.exe and puttygen.exe in the "Alternative binary files" -section. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json deleted file mode 100644 index e0024f40d556..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Get-PuTTY-A-free-telnetSSH-client", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Getting-ready-to-request-an-account", - "previous_title": "account_paragraph_3", - "next_title": "account_windows_paragraph_4.2", - "OS": "windows", - "reference_link": "https://docs.hpc.ugent.be/Windows/account/#get-putty-a-free-telnetssh-client" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2.txt deleted file mode 100644 index cebd1da3bafe..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2.txt +++ /dev/null @@ -1,13 +0,0 @@ -The PuTTY package consists of several components, but we'll only use -two: -1. PuTTY: the Telnet and SSH client itself (to login, see Open a terminal) -2. PuTTYgen: an RSA and DSA key generation utility (to generate a key pair, - see Generate a public/private key pair) -Generating a public/private key pair -Before requesting a VSC account, you need to generate a pair of ssh -keys. You need 2 keys, a public and a private key. You can visualise the -public key as a lock to which only you have the key (your private key). -You can send a copy of your lock to anyone without any problems, because -only you can open it, as long as you keep your private key secure. To -generate a public/private key pair, you can use the PuTTYgen key -generator. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json deleted file mode 100644 index 534ebda0a1c3..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.2_metadata.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Generating-a-publicprivate-key-pair", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Getting-ready-to-request-an-account", - "links": { - "0": "https://docs.hpc.ugent.be/connecting/#open-a-terminal", - "1": "https://docs.hpc.ugent.be/account/#generating-a-publicprivate-key-pair" - }, - "previous_title": "account_windows_paragraph_4.1", - "next_title": "account_windows_paragraph_4.3", - "OS": "windows", - "reference_link": "https://docs.hpc.ugent.be/Windows/account/#generating-a-publicprivate-key-pair" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt deleted file mode 100644 index 6e65300562da..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3.txt +++ /dev/null @@ -1,30 +0,0 @@ -Start PuTTYgen.exe it and follow these steps: -1. In "Parameters" (at the bottom of the window), choose "RSA" and set the number of - bits in the key to 4096. -2. Click on "Generate". To generate the key, you must move the mouse cursor over - the PuTTYgen window (this generates some random data that PuTTYgen - uses to generate the key pair). Once the key pair is generated, your - public key is shown in the field "Public key for pasting into OpenSSH authorized_keys file". -3. Next, it is advised to fill in the "Key comment" field to make it easier - identifiable afterwards. -4. Next, you should specify a passphrase in the "Key passphrase" field and retype it in - the "Confirm passphrase" field. Remember, the passphrase protects the private key against - unauthorised use, so it is best to choose one that is not too easy - to guess but that you can still remember. Using a passphrase is not - required, but we recommend you to use a good passphrase unless you - are certain that your computer's hard disk is encrypted with a - decent password. (If you are not sure your disk is encrypted, it - probably isn't.) -5. Save both the public and private keys in a folder on your personal - computer (We recommend to create and put them in the folder - "C:\\Users\\%USERNAME%\\AppData\\Local\\PuTTY\\.ssh") with the - buttons "Save public key" and "Save private key". We recommend using the name "id_rsa.pub" for the public key, and - "id_rsa.ppk" for the private key. -6. Finally, save an "OpenSSH" version of your private key (in - particular for later "X2Go" usage, see x2go) by entering the - "Conversions" menu and selecting "Export OpenSSH key" (do not select the - "force new file format" variant). Save the file in the same location - as in the previous step with filename "id_rsa". (If there is no - "Conversions" menu, you must update your "puttygen" version. If you - want to do this conversion afterwards, you can start with loading an - existing "id_rsa.ppk" and only do this conversions export.) diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json deleted file mode 100644 index 4555638639d6..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.3_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Generating-a-publicprivate-key-pair", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Getting-ready-to-request-an-account", - "links": { - "0": "https://docs.hpc.ugent.be/" - }, - "previous_title": "account_windows_paragraph_4.2", - "next_title": "account_windows_paragraph_4.4", - "OS": "windows", - "reference_link": "https://docs.hpc.ugent.be/Windows/account/#generating-a-publicprivate-key-pair" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4.txt deleted file mode 100644 index d0425d6738f4..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4.txt +++ /dev/null @@ -1,2 +0,0 @@ -If you use another program to generate a key pair, please remember that -they need to be in the OpenSSH format to access the HPC clusters. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4_metadata.json deleted file mode 100644 index ebd55060657f..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.4_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Generating-a-publicprivate-key-pair", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Getting-ready-to-request-an-account", - "previous_title": "account_windows_paragraph_4.3", - "next_title": "account_paragraph_5", - "OS": "windows", - "reference_link": "https://docs.hpc.ugent.be/Windows/account/#generating-a-publicprivate-key-pair" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1.txt deleted file mode 100644 index b8dba743c0a2..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1.txt +++ /dev/null @@ -1,13 +0,0 @@ -Using an SSH agent (optional) -It is possible to setup a SSH agent in Windows. This is an optional -configuration to help you to keep all your SSH keys (if you have -several) stored in the same key ring to avoid to type the SSH key -password each time. The SSH agent is also necessary to enable SSH hops -with key forwarding from Windows. -Pageant is the SSH authentication agent used in windows. This agent should be -available from the PuTTY installation package - or as -stand alone binary package. -After the installation just start the Pageant application in Windows, -this will start the agent in background. The agent icon will be visible -from the Windows panel. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1_metadata.json deleted file mode 100644 index 5fd697066b62..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Using-an-SSH-agent-(optional)", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Getting-ready-to-request-an-account", - "previous_title": "account_paragraph_5", - "next_title": "account_windows_paragraph_6.2", - "OS": "windows", - "reference_link": "https://docs.hpc.ugent.be/Windows/account/#using-an-ssh-agent-optional" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2.txt deleted file mode 100644 index 62ac04dd9aa0..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2.txt +++ /dev/null @@ -1,11 +0,0 @@ -At this point the agent does not contain any private key. You should -include the private key(s) generated in the previous section Generating a public/private key pair. -1. Click on "Add key" -2. Select the private key file generated in Generating a public/private key pair ("id_rsa.ppk" by default). -3. Enter the same SSH key password used to generate the key. After this - step the new key will be included in Pageant to manage the SSH - connections. -4. You can see the SSH key(s) available in the key ring just clicking - on "View Keys". -5. You can change PuTTY setup to use the SSH agent. Open PuTTY and check - Connection > SSH > Auth > Allow agent forwarding. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2_metadata.json deleted file mode 100644 index 11c693380290..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.2_metadata.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Using-an-SSH-agent-(optional)", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Getting-ready-to-request-an-account", - "links": { - "0": "https://docs.hpc.ugent.be/account/#generating-a-publicprivate-key-pair", - "1": "https://docs.hpc.ugent.be/account/#generating-a-publicprivate-key-pair" - }, - "previous_title": "account_windows_paragraph_6.1", - "next_title": "account_windows_paragraph_6.3", - "OS": "windows", - "reference_link": "https://docs.hpc.ugent.be/Windows/account/#using-an-ssh-agent-optional" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3.txt deleted file mode 100644 index 17c94975dec9..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3.txt +++ /dev/null @@ -1,5 +0,0 @@ -Now you can connect to the login nodes as usual. The SSH agent will know -which SSH key should be used and you do not have to type the SSH -passwords each time, this task is done by Pageant agent automatically. -It is also possible to use WinSCP with Pageant, see - for more details. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3_metadata.json deleted file mode 100644 index e33d002d2485..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.3_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Using-an-SSH-agent-(optional)", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 3, - "directory": "account", - "parent_title": "Getting-ready-to-request-an-account", - "previous_title": "account_windows_paragraph_6.2", - "next_title": "account_paragraph_7", - "OS": "windows", - "reference_link": "https://docs.hpc.ugent.be/Windows/account/#using-an-ssh-agent-optional" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1.txt deleted file mode 100644 index 9fd23612756d..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1.txt +++ /dev/null @@ -1,8 +0,0 @@ -Applying for the account -After you log in using your UGent login and password, you will be asked to -upload the file that contains your public key, i.e., the file -"id_rsa.pub" which you have generated earlier. Make sure that your -public key is actually accepted for upload, because if it is in a wrong -format, wrong type or too short, then it will be refused. -This file should have been stored in the directory -"C:\\Users\\%USERNAME%\\AppData\\Local\\PuTTY\\.ssh" diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1_metadata.json deleted file mode 100644 index 87cda41283f4..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_9.1_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "account", - "subtitle": "Applying-for-the-account", - "source_file": "../../mkdocs/docs/HPC/account.md", - "title_depth": 2, - "directory": "account", - "parent_title": "account", - "previous_title": "account_paragraph_8", - "next_title": "account_paragraph_10", - "OS": "windows", - "reference_link": "https://docs.hpc.ugent.be/Windows/account/#applying-for-the-account" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1.txt deleted file mode 100644 index 5aa8ca033740..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1.txt +++ /dev/null @@ -1,9 +0,0 @@ -First Time connection to the HPC infrastructure - A locale is a set of parameters that defines the user's language, country and - any special variant preferences that the user wants to see in their user - interface. Usually a locale identifier consists of at least a language - identifier and a region identifier. - Note - If you try to set a non-supported locale, then it will be automatically - set to the default. Currently the default is en_US.UFT-8 or en_US, - depending on whether your originally (non-supported) locale was UTF-8 or not. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1_metadata.json deleted file mode 100644 index d4b02dbc9fb4..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_11.1_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "First-Time-connection-to-the-HPC-infrastructure", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 2, - "directory": "connecting", - "parent_title": "Connecting-to-the-HPC-infrastructure", - "previous_title": "connecting_paragraph_10", - "next_title": "connecting_paragraph_12", - "OS": "windows", - "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#first-time-connection-to-the-hpc-infrastructure" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.1.txt deleted file mode 100644 index 67e5e4548529..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.1.txt +++ /dev/null @@ -1,22 +0,0 @@ -Transfer Files to/from the HPC -Before you can do some work, you'll have to transfer the files you need from your desktop or department to the cluster. At the end of a job, you might want to transfer some files back. -WinSCP -To transfer files to and from the cluster, we recommend the use of -WinSCP, a graphical file management tool which can transfer files using -secure protocols such as SFTP and SCP. WinSCP is freely available from -. -To transfer your files using WinSCP, -1. Open the program -2. The "Login" menu is shown automatically (if it is closed, click "New Session" to open it again). Fill in the necessary fields under "Session" - 1. Click "New Site". - 2. Enter "login.hpc.ugent.be" in the "Host name" field. - 3. Enter your "vsc-account" in the "User name" field. - 4. Select "SCP" as the "file" protocol. - 5. Note that the password field remains empty. - - 6. Click "Advanced...". - 7. Click "SSH > Authentication". - 8. Select your private key in the field "Private key file". -3. Press the "Save" button, to save the session under "Session > Sites" for future access. -4. Finally, when clicking on "Login", you will be asked for your key passphrase. - diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.1_metadata.json deleted file mode 100644 index a4bbaee0f598..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.1_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "WinSCP", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 3, - "directory": "connecting", - "parent_title": "First-Time-connection-to-the-HPC-infrastructure", - "previous_title": "connecting_paragraph_11", - "next_title": "connecting_windows_paragraph_12.2", - "OS": "windows", - "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#winscp" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.2.txt deleted file mode 100644 index 82c71ac41299..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.2.txt +++ /dev/null @@ -1,11 +0,0 @@ -The first time you make a connection to the login node, a Security -Alert will appear and you will be asked to verify the authenticity of the -login node. -Make sure the fingerprint in the alert matches one of the following: -- ssh-rsa 2048 10:2f:31:21:04:75:cb:ed:67:e0:d5:0c:a1:5a:f4:78 -- ssh-rsa 2048 SHA256:W8Wz0/FkkCR2ulN7+w8tNI9M0viRgFr2YlHrhKD2Dd0 -- ssh-ed25519 255 19:28:76:94:52:9d:ff:7d:fb:8b:27:b6:d7:69:42:eb -- ssh-ed25519 256 SHA256:8AJg3lPN27y6i+um7rFx3xoy42U8ZgqNe4LsEycHILA -- ssh-ecdsa 256 e6:d2:9c:d8:e7:59:45:03:4a:1f:dc:96:62:29:9c:5f -- ssh-ecdsa 256 SHA256:C8TVx0w8UjGgCQfCmEUaOPxJGNMqv2PXLyBNODe5eOQ -If it does, press Yes, if it doesn't, please contact hpc@ugent.be. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.2_metadata.json deleted file mode 100644 index 80a8ef763a1b..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.2_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "WinSCP", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 3, - "directory": "connecting", - "parent_title": "First-Time-connection-to-the-HPC-infrastructure", - "previous_title": "connecting_windows_paragraph_12.1", - "next_title": "connecting_windows_paragraph_12.3", - "OS": "windows", - "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#winscp" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.3.txt deleted file mode 100644 index c0ffe6b46021..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.3.txt +++ /dev/null @@ -1,6 +0,0 @@ -Note: it is possible that the ssh-ed25519 fingerprint starts with ssh-ed25519 255 -rather than ssh-ed25519 256 (or vice versa), depending on the PuTTY version you are using. -It is safe to ignore this 255 versus 256 difference, but the part after should be -identical. -Now, try out whether you can transfer an arbitrary file from your local -machine to the HPC and back. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.3_metadata.json deleted file mode 100644 index 07760730d56f..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_12.3_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "WinSCP", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 3, - "directory": "connecting", - "parent_title": "First-Time-connection-to-the-HPC-infrastructure", - "previous_title": "connecting_windows_paragraph_12.2", - "next_title": "connecting_paragraph_13", - "OS": "windows", - "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#winscp" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt deleted file mode 100644 index e45f4e63b85b..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1.txt +++ /dev/null @@ -1,42 +0,0 @@ -First Time connection to the HPC infrastructure -Open a Terminal -You've generated a public/private key pair with PuTTYgen and have an -approved account on the VSC clusters. The next step is to setup the -connection to (one of) the HPC. -In the screenshots, we show the setup for user -"vsc20167" -to the HPC cluster via the login node "login.hpc.ugent.be". -1. Start the PuTTY executable putty.exe in your directory - C:\Program Files (x86)\PuTTY and the configuration screen will pop - up. As you will often use the PuTTY tool, we recommend adding a - shortcut on your desktop. -2. Within the category , in the field , enter the name of the - login node of the cluster (i.e., "login.hpc.ugent.be") you want to connect to. - -3. In the category "Connection > Data", in the field "Auto-login username", put in , which is your VSC - username that you have received by e-mail after your request was - approved. - -4. In the category "Connection > SSH > Auth", in the field "Private key file for authentication" click on "Browse" and select the private key - (i.e., "id_rsa.ppk") that you generated and saved above. -5. In the category "Connection > SSH > X11", click the "Enable X11 Forwarding" checkbox. -6. Now go back to , and fill in "hpcugent" in the "Saved Sessions" field and press "Save" to - store the session information. - -7. Now pressing "Open", will open a terminal window and asks for you - passphrase. - -8. If this is your first time connecting, you will be asked to verify - the authenticity of the login node. Please see - sectionĀ Warning message when first connecting to new host - on how to do this. -9. After entering your correct passphrase, you will be connected to the - login-node of the HPC. -10. To check you can now "Print the Working Directory" (pwd) and check - the name of the computer, where you have logged in (hostname): - $ pwd - /user/home/gent/vsc400/vsc40000 - $ hostname -f - gligar07.gastly.os -11. For future PuTTY sessions, just select your saved session (i.e. "hpcugent") - from the list, "Load" it and press "Open". diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json deleted file mode 100644 index d3b7d581c943..000000000000 --- a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_4.1_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "connecting", - "subtitle": "Open-a-Terminal", - "source_file": "../../mkdocs/docs/HPC/connecting.md", - "title_depth": 3, - "directory": "connecting", - "parent_title": "First-Time-connection-to-the-HPC-infrastructure", - "links": { - "0": "https://docs.hpc.ugent.be/troubleshooting/#warning-message-when-first-connecting-to-new-host" - }, - "previous_title": "connecting_paragraph_3", - "next_title": "connecting_paragraph_5", - "OS": "windows", - "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#open-a-terminal" -} \ No newline at end of file From 1ef1f10e6b05839f604fe65e2370599e580c2382 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 30 Aug 2024 13:26:47 +0200 Subject: [PATCH 141/145] Changed paragraphs to decide length based on tokens instead of characters --- scripts/HPC_chatbot_preprocessor/README.md | 2 +- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 12 +++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md index 96a99498451f..6cfd9be82315 100644 --- a/scripts/HPC_chatbot_preprocessor/README.md +++ b/scripts/HPC_chatbot_preprocessor/README.md @@ -36,7 +36,7 @@ Including this option will split the source files based on the titles and subtit #### `pl`/`min_paragraph_length` -This option allows the user to configure the minimum length a paragraph must be. Some deviations from this minimum length are possible (for example at the end of a file). The default value for this minimum paragraph length is 683 characters. This options only works if `split_on_titles` is not enabled. +This option allows the user to configure the minimum length a paragraph must be. Some deviations from this minimum length are possible (for example at the end of a file). The default value for this minimum paragraph length is 512 tokens. This options only works if `split_on_titles` is not enabled. #### `td`/`max_title_depth` diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index cff487f85893..7c3e63c01971 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -6,6 +6,7 @@ import os import re import shutil +import tiktoken import yaml from itertools import chain, tee, zip_longest from pathlib import Path @@ -615,7 +616,12 @@ def paragraph_long_enough(paragraph, options): :return: """ # TODO: change this into something that uses the tokenizer - return len(paragraph) >= options[MIN_PARAGRAPH_LENGTH] + encoding = tiktoken.get_encoding("cl100k_base") + token_amount = len(encoding.encode(paragraph)) + + print(token_amount) + + return token_amount >= options[MIN_PARAGRAPH_LENGTH] def write_metadata(main_title, subtitle, links, title_level, directory, source_file): @@ -1144,7 +1150,7 @@ def main(options): main_title = filename[:-3] # variable that keeps track of the directories that are used to write in at different levels - curr_dirs = [filename[:-3] for _ in range(5)] + curr_dirs = [filename[:-3] for _ in range(options[MAX_TITLE_DEPTH] + 1)] ################### actually parse the md file ################### @@ -1212,7 +1218,7 @@ def main(options): parser.add_argument("-src", "--source", required=True, type=str, help="The source directory where the original files are located") parser.add_argument("-dst", "--destination", required=True, type=str, help="The destination directory where the processed files should be written to") parser.add_argument("-st", "--split_on_titles", action="store_true", help="Splits the text based on titles and subtitles instead of paragraphs with a minimum length.") - parser.add_argument("-pl", "--min_paragraph_length", type=int, default=683, help="Minimum length in characters of a paragraph, only works if split on titles is disabled (default: 683)") + parser.add_argument("-pl", "--min_paragraph_length", type=int, default=512, help="Minimum length in characters of a paragraph, only works if split on titles is disabled (default: 683)") parser.add_argument("-td", "--max_title_depth", type=int, default=4, help="Maximum depth of titles that divide the source text into sections, only works if split on titles is enabled (default: 4)") parser.add_argument("-l", "--links", action="store_true", help="Add links to the output texts") parser.add_argument("-dd", "--deep_directories", action="store_true", help="Generate a nested directory structure following the structure of the subtitles. Only works if split on titles is enabled") From 621c0a3f083966f2aaa097516767fdf2d4fdd559 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 30 Aug 2024 13:27:57 +0200 Subject: [PATCH 142/145] Changed paragraphs to decide length based on tokens instead of characters --- scripts/HPC_chatbot_preprocessor/requirements.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/HPC_chatbot_preprocessor/requirements.txt b/scripts/HPC_chatbot_preprocessor/requirements.txt index 4d27d4624600..37137582aad6 100644 --- a/scripts/HPC_chatbot_preprocessor/requirements.txt +++ b/scripts/HPC_chatbot_preprocessor/requirements.txt @@ -1,2 +1,4 @@ PyYAML==6.0.2 -Jinja2==3.1.4 \ No newline at end of file +Jinja2==3.1.4 +tiktoken~=0.7.0 +pathlib~=1.0.1 \ No newline at end of file From adf364d1f897e433fbc2f0fcc80b8fdeb4f22a43 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 30 Aug 2024 13:28:45 +0200 Subject: [PATCH 143/145] Changed paragraphs to decide length based on tokens instead of characters --- scripts/HPC_chatbot_preprocessor/chatbot_parser.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py index 7c3e63c01971..24e0b287a0a4 100644 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py @@ -615,12 +615,9 @@ def paragraph_long_enough(paragraph, options): :param options: dictionary containing the options given by the user :return: """ - # TODO: change this into something that uses the tokenizer encoding = tiktoken.get_encoding("cl100k_base") token_amount = len(encoding.encode(paragraph)) - print(token_amount) - return token_amount >= options[MIN_PARAGRAPH_LENGTH] From 32f884d13f8d79420c4fae3725da169fbe2e74fe Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 30 Aug 2024 13:47:10 +0200 Subject: [PATCH 144/145] Added output of chatbot_parser script part 1 --- .../generic/FAQ/FAQ_paragraph_1.txt | 33 ++++++++++ .../generic/FAQ/FAQ_paragraph_1_metadata.json | 19 ++++++ .../generic/FAQ/FAQ_paragraph_2.txt | 34 ++++++++++ .../generic/FAQ/FAQ_paragraph_2_metadata.json | 20 ++++++ .../generic/FAQ/FAQ_paragraph_3.txt | 28 ++++++++ .../generic/FAQ/FAQ_paragraph_3_metadata.json | 18 ++++++ .../generic/FAQ/FAQ_paragraph_4.txt | 29 +++++++++ .../generic/FAQ/FAQ_paragraph_4_metadata.json | 20 ++++++ .../generic/FAQ/FAQ_paragraph_5.txt | 36 +++++++++++ .../generic/FAQ/FAQ_paragraph_5_metadata.json | 19 ++++++ .../generic/FAQ/FAQ_paragraph_6.txt | 37 +++++++++++ .../generic/FAQ/FAQ_paragraph_6_metadata.json | 17 +++++ .../generic/FAQ/FAQ_paragraph_7.txt | 25 ++++++++ .../generic/FAQ/FAQ_paragraph_7_metadata.json | 19 ++++++ .../generic/FAQ/FAQ_paragraph_8.txt | 20 ++++++ .../generic/FAQ/FAQ_paragraph_8_metadata.json | 18 ++++++ .../generic/HOD/HOD_paragraph_1.txt | 43 +++++++++++++ .../generic/HOD/HOD_paragraph_1_metadata.json | 16 +++++ .../generic/HOD/HOD_paragraph_2.txt | 34 ++++++++++ .../generic/HOD/HOD_paragraph_2_metadata.json | 12 ++++ .../generic/HOD/HOD_paragraph_3.txt | 9 +++ .../generic/HOD/HOD_paragraph_3_metadata.json | 12 ++++ .../generic/account/account_paragraph_1.txt | 36 +++++++++++ .../account/account_paragraph_1_metadata.json | 18 ++++++ .../alphafold/alphafold_paragraph_1.txt | 32 ++++++++++ .../alphafold_paragraph_1_metadata.json | 17 +++++ .../alphafold/alphafold_paragraph_2.txt | 32 ++++++++++ .../alphafold_paragraph_2_metadata.json | 18 ++++++ .../alphafold/alphafold_paragraph_3.txt | 26 ++++++++ .../alphafold_paragraph_3_metadata.json | 19 ++++++ .../alphafold/alphafold_paragraph_4.txt | 33 ++++++++++ .../alphafold_paragraph_4_metadata.json | 21 ++++++ .../alphafold/alphafold_paragraph_5.txt | 33 ++++++++++ .../alphafold_paragraph_5_metadata.json | 12 ++++ .../apptainer/apptainer_paragraph_1.txt | 42 ++++++++++++ .../apptainer_paragraph_1_metadata.json | 12 ++++ .../apptainer/apptainer_paragraph_2.txt | 46 +++++++++++++ .../apptainer_paragraph_2_metadata.json | 12 ++++ .../apptainer/apptainer_paragraph_3.txt | 34 ++++++++++ .../apptainer_paragraph_3_metadata.json | 12 ++++ .../best_practices_paragraph_1.txt | 39 +++++++++++ .../best_practices_paragraph_1_metadata.json | 15 +++++ .../compiling_your_software_paragraph_1.txt | 39 +++++++++++ ...ng_your_software_paragraph_1_metadata.json | 12 ++++ .../compiling_your_software_paragraph_2.txt | 42 ++++++++++++ ...ng_your_software_paragraph_2_metadata.json | 15 +++++ .../compiling_your_software_paragraph_3.txt | 50 +++++++++++++++ ...ng_your_software_paragraph_3_metadata.json | 12 ++++ .../compiling_your_software_paragraph_4.txt | 52 +++++++++++++++ ...ng_your_software_paragraph_4_metadata.json | 12 ++++ .../compiling_your_software_paragraph_5.txt | 28 ++++++++ ...ng_your_software_paragraph_5_metadata.json | 12 ++++ .../connecting/connecting_paragraph_1.txt | 39 +++++++++++ .../connecting_paragraph_1_metadata.json | 15 +++++ .../connecting/connecting_paragraph_4.txt | 35 ++++++++++ .../connecting_paragraph_4_metadata.json | 15 +++++ .../connecting/connecting_paragraph_9.txt | 11 ++++ .../connecting_paragraph_9_metadata.json | 16 +++++ .../generic/crontab/crontab_paragraph_1.txt | 28 ++++++++ .../crontab/crontab_paragraph_1_metadata.json | 15 +++++ .../easybuild/easybuild_paragraph_1.txt | 44 +++++++++++++ .../easybuild_paragraph_1_metadata.json | 15 +++++ .../easybuild/easybuild_paragraph_2.txt | 33 ++++++++++ .../easybuild_paragraph_2_metadata.json | 16 +++++ .../easybuild/easybuild_paragraph_3.txt | 18 ++++++ .../easybuild_paragraph_3_metadata.json | 12 ++++ ..._tuning_job_specifications_paragraph_1.txt | 42 ++++++++++++ ...b_specifications_paragraph_1_metadata.json | 12 ++++ ..._tuning_job_specifications_paragraph_2.txt | 44 +++++++++++++ ...b_specifications_paragraph_2_metadata.json | 12 ++++ ..._tuning_job_specifications_paragraph_3.txt | 49 ++++++++++++++ ...b_specifications_paragraph_3_metadata.json | 12 ++++ ..._tuning_job_specifications_paragraph_4.txt | 60 +++++++++++++++++ ...b_specifications_paragraph_4_metadata.json | 15 +++++ ..._tuning_job_specifications_paragraph_5.txt | 42 ++++++++++++ ...b_specifications_paragraph_5_metadata.json | 12 ++++ ..._tuning_job_specifications_paragraph_6.txt | 38 +++++++++++ ...b_specifications_paragraph_6_metadata.json | 12 ++++ ..._tuning_job_specifications_paragraph_7.txt | 42 ++++++++++++ ...b_specifications_paragraph_7_metadata.json | 12 ++++ ..._tuning_job_specifications_paragraph_8.txt | 33 ++++++++++ ...b_specifications_paragraph_8_metadata.json | 16 +++++ .../getting_started_paragraph_3.txt | 37 +++++++++++ .../getting_started_paragraph_3_metadata.json | 17 +++++ .../getting_started_paragraph_4.txt | 32 ++++++++++ .../getting_started_paragraph_4_metadata.json | 16 +++++ .../getting_started_paragraph_5.txt | 10 +++ .../getting_started_paragraph_5_metadata.json | 21 ++++++ .../generic/gpu/gpu_paragraph_1.txt | 48 ++++++++++++++ .../generic/gpu/gpu_paragraph_1_metadata.json | 16 +++++ .../generic/gpu/gpu_paragraph_2.txt | 35 ++++++++++ .../generic/gpu/gpu_paragraph_2_metadata.json | 12 ++++ .../generic/gpu/gpu_paragraph_3.txt | 40 ++++++++++++ .../generic/gpu/gpu_paragraph_3_metadata.json | 15 +++++ .../generic/gpu/gpu_paragraph_4.txt | 11 ++++ .../generic/gpu/gpu_paragraph_4_metadata.json | 12 ++++ .../account/account_linux_paragraph_2.1.txt | 4 ++ .../account_linux_paragraph_2.1_metadata.json | 15 +++++ .../account/account_linux_paragraph_3.1.txt | 44 +++++++++++++ .../account_linux_paragraph_3.1_metadata.json | 12 ++++ .../account/account_linux_paragraph_3.2.txt | 19 ++++++ .../account_linux_paragraph_3.2_metadata.json | 12 ++++ .../account/account_linux_paragraph_4.1.txt | 22 +++++++ .../account_linux_paragraph_4.1_metadata.json | 15 +++++ .../account/account_linux_paragraph_5.1.txt | 37 +++++++++++ .../account_linux_paragraph_5.1_metadata.json | 12 ++++ .../account/account_linux_paragraph_6.1.txt | 34 ++++++++++ .../account_linux_paragraph_6.1_metadata.json | 15 +++++ .../connecting_linux_paragraph_2.1.txt | 7 ++ ...nnecting_linux_paragraph_2.1_metadata.json | 16 +++++ .../connecting_linux_paragraph_3.1.txt | 47 ++++++++++++++ ...nnecting_linux_paragraph_3.1_metadata.json | 15 +++++ .../connecting_linux_paragraph_5.1.txt | 61 ++++++++++++++++++ ...nnecting_linux_paragraph_5.1_metadata.json | 12 ++++ .../connecting_linux_paragraph_6.1.txt | 6 ++ ...nnecting_linux_paragraph_6.1_metadata.json | 12 ++++ .../connecting_linux_paragraph_7.1.txt | 42 ++++++++++++ ...nnecting_linux_paragraph_7.1_metadata.json | 15 +++++ .../connecting_linux_paragraph_7.2.txt | 37 +++++++++++ ...nnecting_linux_paragraph_7.2_metadata.json | 12 ++++ .../connecting_linux_paragraph_7.3.txt | 18 ++++++ ...nnecting_linux_paragraph_7.3_metadata.json | 16 +++++ .../connecting_linux_paragraph_8.1.txt | 17 +++++ ...nnecting_linux_paragraph_8.1_metadata.json | 15 +++++ .../getting_started_linux_paragraph_1.1.txt | 30 +++++++++ ..._started_linux_paragraph_1.1_metadata.json | 23 +++++++ .../getting_started_linux_paragraph_1.2.txt | 2 + ..._started_linux_paragraph_1.2_metadata.json | 15 +++++ .../getting_started_linux_paragraph_2.1.txt | 27 ++++++++ ..._started_linux_paragraph_2.1_metadata.json | 19 ++++++ .../account/account_macos_paragraph_2.1.txt | 4 ++ .../account_macos_paragraph_2.1_metadata.json | 15 +++++ .../account/account_macos_paragraph_3.1.txt | 45 +++++++++++++ .../account_macos_paragraph_3.1_metadata.json | 12 ++++ .../account/account_macos_paragraph_3.2.txt | 19 ++++++ .../account_macos_paragraph_3.2_metadata.json | 12 ++++ .../account/account_macos_paragraph_4.1.txt | 21 ++++++ .../account_macos_paragraph_4.1_metadata.json | 15 +++++ .../account/account_macos_paragraph_5.1.txt | 42 ++++++++++++ .../account_macos_paragraph_5.1_metadata.json | 12 ++++ .../account/account_macos_paragraph_6.1.txt | 34 ++++++++++ .../account_macos_paragraph_6.1_metadata.json | 15 +++++ .../connecting_macos_paragraph_2.1.txt | 7 ++ ...nnecting_macos_paragraph_2.1_metadata.json | 16 +++++ .../connecting_macos_paragraph_3.1.txt | 48 ++++++++++++++ ...nnecting_macos_paragraph_3.1_metadata.json | 15 +++++ .../connecting_macos_paragraph_5.1.txt | 61 ++++++++++++++++++ ...nnecting_macos_paragraph_5.1_metadata.json | 12 ++++ .../connecting_macos_paragraph_6.1.txt | 6 ++ ...nnecting_macos_paragraph_6.1_metadata.json | 12 ++++ .../connecting_macos_paragraph_7.1.txt | 42 ++++++++++++ ...nnecting_macos_paragraph_7.1_metadata.json | 15 +++++ .../connecting_macos_paragraph_7.2.txt | 37 +++++++++++ ...nnecting_macos_paragraph_7.2_metadata.json | 12 ++++ .../connecting_macos_paragraph_7.3.txt | 18 ++++++ ...nnecting_macos_paragraph_7.3_metadata.json | 16 +++++ .../connecting_macos_paragraph_8.1.txt | 25 ++++++++ ...nnecting_macos_paragraph_8.1_metadata.json | 15 +++++ .../getting_started_macos_paragraph_1.1.txt | 27 ++++++++ ..._started_macos_paragraph_1.1_metadata.json | 25 ++++++++ .../getting_started_macos_paragraph_1.2.txt | 1 + ..._started_macos_paragraph_1.2_metadata.json | 12 ++++ .../getting_started_macos_paragraph_2.1.txt | 27 ++++++++ ..._started_macos_paragraph_2.1_metadata.json | 19 ++++++ .../account/account_windows_paragraph_2.1.txt | 64 +++++++++++++++++++ ...ccount_windows_paragraph_2.1_metadata.json | 18 ++++++ .../account/account_windows_paragraph_2.2.txt | 2 + ...ccount_windows_paragraph_2.2_metadata.json | 12 ++++ .../account/account_windows_paragraph_4.1.txt | 29 +++++++++ ...ccount_windows_paragraph_4.1_metadata.json | 16 +++++ .../account/account_windows_paragraph_5.1.txt | 38 +++++++++++ ...ccount_windows_paragraph_5.1_metadata.json | 12 ++++ .../account/account_windows_paragraph_6.1.txt | 34 ++++++++++ ...ccount_windows_paragraph_6.1_metadata.json | 15 +++++ .../connecting_windows_paragraph_2.1.txt | 48 ++++++++++++++ ...ecting_windows_paragraph_2.1_metadata.json | 17 +++++ .../connecting_windows_paragraph_3.1.txt | 32 ++++++++++ ...ecting_windows_paragraph_3.1_metadata.json | 12 ++++ .../connecting_windows_paragraph_5.1.txt | 33 ++++++++++ ...ecting_windows_paragraph_5.1_metadata.json | 12 ++++ .../connecting_windows_paragraph_6.1.txt | 32 ++++++++++ ...ecting_windows_paragraph_6.1_metadata.json | 12 ++++ .../connecting_windows_paragraph_6.2.txt | 7 ++ ...ecting_windows_paragraph_6.2_metadata.json | 12 ++++ .../connecting_windows_paragraph_8.1.txt | 8 +++ ...ecting_windows_paragraph_8.1_metadata.json | 15 +++++ .../getting_started_windows_paragraph_1.1.txt | 27 ++++++++ ...tarted_windows_paragraph_1.1_metadata.json | 25 ++++++++ .../getting_started_windows_paragraph_1.2.txt | 1 + ...tarted_windows_paragraph_1.2_metadata.json | 12 ++++ .../getting_started_windows_paragraph_2.1.txt | 21 ++++++ ...tarted_windows_paragraph_2.1_metadata.json | 21 ++++++ 192 files changed, 4394 insertions(+) create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_3.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_3_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_4.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_4_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_5.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_5_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_6.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_6_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_7.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_7_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_8.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_8_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/HOD/HOD_paragraph_1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/HOD/HOD_paragraph_1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/HOD/HOD_paragraph_2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/HOD/HOD_paragraph_2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/HOD/HOD_paragraph_3.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/HOD/HOD_paragraph_3_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/alphafold/alphafold_paragraph_1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/alphafold/alphafold_paragraph_1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/alphafold/alphafold_paragraph_2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/alphafold/alphafold_paragraph_2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/alphafold/alphafold_paragraph_3.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/alphafold/alphafold_paragraph_3_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/alphafold/alphafold_paragraph_4.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/alphafold/alphafold_paragraph_4_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/alphafold/alphafold_paragraph_5.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/alphafold/alphafold_paragraph_5_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/apptainer/apptainer_paragraph_1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/apptainer/apptainer_paragraph_1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/apptainer/apptainer_paragraph_2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/apptainer/apptainer_paragraph_2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/apptainer/apptainer_paragraph_3.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/apptainer/apptainer_paragraph_3_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/best_practices/best_practices_paragraph_1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/best_practices/best_practices_paragraph_1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_4.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_4_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/crontab/crontab_paragraph_1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/crontab/crontab_paragraph_1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/easybuild/easybuild_paragraph_1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/easybuild/easybuild_paragraph_1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/easybuild/easybuild_paragraph_2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/easybuild/easybuild_paragraph_2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/easybuild/easybuild_paragraph_3.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/easybuild/easybuild_paragraph_3_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_3.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_3_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_4.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_4_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_5.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_5_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_6.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_6_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_7.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_7_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_8.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_8_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/getting_started/getting_started_paragraph_3.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/getting_started/getting_started_paragraph_3_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/getting_started/getting_started_paragraph_4.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/getting_started/getting_started_paragraph_4_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/getting_started/getting_started_paragraph_5.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/getting_started/getting_started_paragraph_5_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/gpu/gpu_paragraph_1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/gpu/gpu_paragraph_1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/gpu/gpu_paragraph_2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/gpu/gpu_paragraph_2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/gpu/gpu_paragraph_3.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/gpu/gpu_paragraph_3_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/gpu/gpu_paragraph_4.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/generic/gpu/gpu_paragraph_4_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_2.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_2.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_3.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_3.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_3.2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_3.2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_2.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_2.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_3.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_3.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_6.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_6.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_7.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_7.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_7.2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_7.2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_7.3.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_7.3_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_8.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_8.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/getting_started/getting_started_linux_paragraph_1.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/getting_started/getting_started_linux_paragraph_1.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/getting_started/getting_started_linux_paragraph_1.2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/getting_started/getting_started_linux_paragraph_1.2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/getting_started/getting_started_linux_paragraph_2.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/getting_started/getting_started_linux_paragraph_2.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_2.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_2.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_3.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_3.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_3.2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_3.2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_2.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_2.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_3.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_3.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_6.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_6.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_7.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_7.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_7.2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_7.2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_7.3.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_7.3_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_8.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_8.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/getting_started/getting_started_macos_paragraph_1.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/getting_started/getting_started_macos_paragraph_1.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/getting_started/getting_started_macos_paragraph_1.2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/getting_started/getting_started_macos_paragraph_1.2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/getting_started/getting_started_macos_paragraph_2.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/getting_started/getting_started_macos_paragraph_2.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_2.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_2.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_2.2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_2.2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_5.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_5.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_2.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_2.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_3.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_3.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_5.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_5.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_6.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_6.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_6.2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_6.2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_8.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_8.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/getting_started/getting_started_windows_paragraph_1.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/getting_started/getting_started_windows_paragraph_1.1_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/getting_started/getting_started_windows_paragraph_1.2.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/getting_started/getting_started_windows_paragraph_1.2_metadata.json create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/getting_started/getting_started_windows_paragraph_2.1.txt create mode 100644 scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/getting_started/getting_started_windows_paragraph_2.1_metadata.json diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_1.txt new file mode 100644 index 000000000000..9ead695e3881 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_1.txt @@ -0,0 +1,33 @@ +Frequently Asked Questions (FAQ) +New users should consult the Introduction to HPC +to get started, which is a great resource for learning the basics, troubleshooting, and looking up specifics. +If you want to use software that's not yet installed on the HPC, send us a +software installation request. +Overview of HPC-UGent Tier-2 infrastructure +Composing a job +How many cores/nodes should I request? +An important factor in this question is how well your task is being parallelized: +does it actually run faster with more resources? You can test this yourself: +start with 4 cores, then 8, then 16... The execution time should each time be reduced to +around half of what it was before. You can also try this with full nodes: 1 node, 2 nodes. +A rule of thumb is that you're around the limit when you double the resources but the +execution time is still ~60-70% of what it was before. That's a signal to stop increasing the core count. +See also: Running batch jobs. +Which packages are available? +When connected to the HPC, use the commands module avail [search_text] and module spider [module] +to find installed modules and get information on them. +Among others, many packages for both Python and R are readily available on the HPC. +These aren't always easy to find, though, as we've bundled them together. +Specifically, the module SciPy-bundle includes numpy, pandas, scipy and a few others. +For R, the normal R module has many libraries included. The bundle R-bundle-Bioconductor +contains more libraries. +Use the command module spider [module] to find the specifics on these bundles. +If the package or library you want is not available, send us a +software installation request. +How do I choose the job modules? +Modules each come with a suffix that describes the toolchain used to install them. +Examples: + AlphaFold/2.2.2-foss-2021a* + tqdm/4.61.2-GCCcore-10.3.0* + Python/3.9.5-GCCcore-10.3.0* + matplotlib/3.4.2-foss-2021a* diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_1_metadata.json new file mode 100644 index 000000000000..63406afa3fdb --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_1_metadata.json @@ -0,0 +1,19 @@ +{ + "main_title": "FAQ", + "subtitle": "How-do-I-choose-the-job-modules", + "source_file": "../../mkdocs/docs/HPC/FAQ.md", + "title_depth": 3, + "directory": "FAQ", + "links": { + "0": "https://www.ugent.be/hpc/en/training/2023/introhpcugent", + "1": "https://www.ugent.be/hpc/en/support/software-installation-request", + "2": "https://www.ugent.be/hpc/en/infrastructure", + "3": "https://docs.hpc.ugent.be/running_batch_jobs", + "4": "https://www.ugent.be/hpc/en/support/software-installation-request" + }, + "parent_title": "", + "previous_title": null, + "next_title": "FAQ_paragraph_2", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/FAQ/#how-do-i-choose-the-job-modules" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_2.txt new file mode 100644 index 000000000000..0218d9926f1e --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_2.txt @@ -0,0 +1,34 @@ +Modules from the same toolchain always work together, and modules from a +\*different version of the same toolchain\* never work together. +The above set of modules works together: an overview of compatible toolchains can be found here: +https://docs.easybuild.io/en/latest/Common-toolchains.html#overview-of-common-toolchains. +You can use module avail [search_text] to see which versions on which toolchains are available to use. +If you need something that's not available yet, you can request it through a +software installation request. +It is possible to use the modules without specifying a version or toolchain. However, +this will probably cause incompatible modules to be loaded. Don't do it if you use multiple modules. +Even if it works now, as more modules get installed on the HPC, your job can suddenly break. +Troubleshooting +My modules don't work together +When incompatible modules are loaded, you might encounter an error like this: +Lmod has detected the following error: A different version of the 'GCC' module +is already loaded (see output of 'ml'). +You should load another foss module for that is compatible with the currently +loaded version of GCC. +Use ml spider foss to get an overview of the available versions. +Modules from the same toolchain always work together, and modules from a +_different version of the same toolchain_ never work together. +An overview of compatible toolchains can be found here: +https://docs.easybuild.io/en/latest/Common-toolchains.html#overview-of-common-toolchains. +See also: How do I choose the job modules? +My job takes longer than 72 hours +The 72 hour walltime limit will not be extended. However, you can work around this barrier: +* Check that all available resources are being used. See also: + * How many cores/nodes should I request?. + * My job is slow. + * My job isn't using any GPUs. +* Use a faster cluster. +* Divide the job into more parallel processes. +* Divide the job into shorter processes, which you can submit as separate jobs. +* Use the built-in checkpointing of your software. +Job failed: SEGV Segmentation fault diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_2_metadata.json new file mode 100644 index 000000000000..747a0de47bfc --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_2_metadata.json @@ -0,0 +1,20 @@ +{ + "main_title": "FAQ", + "subtitle": "Job-failed-SEGV-Segmentation-fault", + "source_file": "../../mkdocs/docs/HPC/FAQ.md", + "title_depth": 3, + "directory": "FAQ", + "links": { + "0": "https://www.ugent.be/hpc/en/support/software-installation-request", + "1": "https://docs.hpc.ugent.be/FAQ/#how-do-i-choose-the-job-modules", + "2": "https://docs.hpc.ugent.be/FAQ/#how-many-coresnodes-should-i-request", + "3": "https://docs.hpc.ugent.be/FAQ/#my-job-runs-slower-than-i-expected", + "4": "https://docs.hpc.ugent.be/FAQ/#my-job-isnt-using-any-gpus", + "5": "https://www.ugent.be/hpc/en/infrastructure" + }, + "parent_title": "", + "previous_title": "FAQ_paragraph_1", + "next_title": "FAQ_paragraph_3", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/FAQ/#job-failed-segv-segmentation-fault" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_3.txt new file mode 100644 index 000000000000..9db33be16c56 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_3.txt @@ -0,0 +1,28 @@ +Any error mentioning SEGV or Segmentation fault/violation has something to do with a memory error. +If you weren't messing around with memory-unsafe applications or programming, your job probably hit its memory limit. +When there's no memory amount specified in a job script, your job will get access to a proportional +share of the total memory on the node: If you request a full node, all memory will be available. +If you request 8 cores on a cluster where nodes have 2x18 cores, you will get 8/36 = 2/9 +of the total memory on the node. +Try requesting a bit more memory than your proportional share, and see if that solves the issue. +See also: Specifying memory requirements. +My compilation/command fails on login node +When logging in, you are using a connection to the login nodes. There are somewhat strict +limitations on what you can do in those sessions: check out the output of ulimit -a. +Specifically, the memory and the amount of processes you can use may present an issue. +This is common with MATLAB compilation and Nextflow. An error caused by the login session +limitations can look like this: Aborted (core dumped). +It's easy to get around these limitations: start an interactive session on one of the clusters. +Then, you are acting as a node on that cluster instead of a login node. Notably, the +debug/interactive cluster will grant such a session immediately, while other clusters might make you wait a bit. +Example command: ml swap cluster/donphan && qsub -I -l nodes=1:ppn=8 +See also: Running interactive jobs. +My job isn't using any GPUs +Only two clusters have GPUs. Check out the infrastructure overview, +to see which one suits your needs. Make sure that you manually switch to the GPU cluster before you submit +the job. Inside the job script, you need to explicitly request the GPUs: +#PBS -l nodes=1:ppn=24:gpus=2 +Some software modules don't have GPU support, even when running on the GPU cluster. For example, +when running module avail alphafold on the joltik cluster, you will find versions on both +the foss toolchain and the fossCUDA toolchain. Of these, only the CUDA versions will +use GPU power. When in doubt, CUDA means GPU support. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_3_metadata.json new file mode 100644 index 000000000000..5a55993674aa --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_3_metadata.json @@ -0,0 +1,18 @@ +{ + "main_title": "FAQ", + "subtitle": "My-job-isn't-using-any-GPUs", + "source_file": "../../mkdocs/docs/HPC/FAQ.md", + "title_depth": 3, + "directory": "FAQ", + "links": { + "0": "https://docs.hpc.ugent.be/fine_tuning_job_specifications/#specifying-memory-requirements", + "1": "https://docs.hpc.ugent.be/interactive_debug", + "2": "https://docs.hpc.ugent.be/running_interactive_jobs", + "3": "https://www.ugent.be/hpc/en/infrastructure" + }, + "parent_title": "", + "previous_title": "FAQ_paragraph_2", + "next_title": "FAQ_paragraph_4", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/FAQ/#my-job-isnt-using-any-gpus" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_4.txt new file mode 100644 index 000000000000..93694817d0d0 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_4.txt @@ -0,0 +1,29 @@ +See also: HPC-UGent GPU clusters. +My job runs slower than I expected +There are a few possible causes why a job can perform worse than expected. +Is your job using all the available cores you've requested? You can test this by increasing and +decreasing the core amount: If the execution time stays the same, the job was not using all cores. +Some workloads just don't scale well with more cores. If you expect the job to be very parallelizable +and you encounter this problem, maybe you missed some settings that enable multicore execution. +See also: How many cores/nodes should i request? +Does your job have access to the GPUs you requested? +See also: My job isn't using any GPUs +Not all file locations perform the same. In particular, the $VSC_HOME and $VSC_DATA +directories are, relatively, very slow to access. Your jobs should rather use the +$VSC_SCRATCH directory, or other fast locations (depending on your needs), described +in Where to store your data on the HPC. +As an example how to do this: The job can copy the input to the scratch directory, then execute +the computations, and lastly copy the output back to the data directory. +Using the home and data directories is especially a problem when UGent isn't your home institution: +your files may be stored, for example, in Leuven while you're running a job in Ghent. +My MPI job fails +Use mympirun in your job script instead of mpirun. It is a tool that makes sure everything +gets set up correctly for the HPC infrastructure. You need to load it as a module in your +job script: module load vsc-mympirun. +To submit the job, use the qsub command rather than sbatch. Although both will submit a job, +qsub will correctly interpret the #PBS parameters inside the job script. sbatch might not +set the job environment up correctly for mympirun/OpenMPI. +See also: Multi core jobs/Parallel Computing +and Mympirun. +mympirun seems to ignore its arguments +For example, we have a simple script (./hello.sh): diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_4_metadata.json new file mode 100644 index 000000000000..609a3ca303f0 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_4_metadata.json @@ -0,0 +1,20 @@ +{ + "main_title": "FAQ", + "subtitle": "`mympirun`-seems-to-ignore-its-arguments", + "source_file": "../../mkdocs/docs/HPC/FAQ.md", + "title_depth": 3, + "directory": "FAQ", + "links": { + "0": "https://docs.hpc.ugent.be/gpu_gent", + "1": "https://docs.hpc.ugent.be/FAQ/#how-many-coresnodes-should-i-request", + "2": "https://docs.hpc.ugent.be/FAQ/#my-job-isnt-using-any-gpus", + "3": "https://docs.hpc.ugent.be/running_jobs_with_input_output_data/#where-to-store-your-data-on-the-hpc", + "4": "https://docs.hpc.ugent.be/multi_core_jobs", + "5": "https://docs.hpc.ugent.be/mympirun" + }, + "parent_title": "", + "previous_title": "FAQ_paragraph_3", + "next_title": "FAQ_paragraph_5", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/FAQ/#mympirun-seems-to-ignore-its-arguments" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_5.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_5.txt new file mode 100644 index 000000000000..ac6b8aa113ac --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_5.txt @@ -0,0 +1,36 @@ +#!/bin/bash +echo "hello world" +And we run it like mympirun ./hello.sh --output output.txt. +To our surprise, this doesn't output to the file output.txt, but to +standard out! This is because mympirun expects the program name and +the arguments of the program to be its last arguments. Here, the +--output output.txt arguments are passed to ./hello.sh instead of to +mympirun. The correct way to run it is: +mympirun --output output.txt ./hello.sh +When will my job start? +See the explanation about how jobs get prioritized in When will my job start. +Why do I get a "No space left on device" error, while I still have storage space left? +When trying to create files, errors like this can occur: +No space left on device +The error "No space left on device" can mean two different things: +- all available storage quota on the file system in question has been used; +- the inode limit has been reached on that file system. +An inode can be seen as a "file slot", meaning that when the limit is reached, no more additional files can be created. +There is a standard inode limit in place that will be increased if needed. +The number of inodes used per file system can be checked on the VSC account page. +Possible solutions to this problem include cleaning up unused files and directories or +compressing directories with a lot of files into zip- or tar-files. +If the problem persists, feel free to contact support. +Other +Can I share my account with someone else? +NO. You are not allowed to share your VSC account with anyone else, it is +strictly personal. +See +https://helpdesk.ugent.be/account/en/regels.php. +If you want to share data, there are alternatives (like a shared directories in VO +space, see Virtual organisations). +Can I share my data with other HPC users? +Yes, you can use the chmod or setfacl commands to change permissions +of files so other users can access the data. For example, the following +command will enable a user named "otheruser" to read the file named +dataset.txt. See diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_5_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_5_metadata.json new file mode 100644 index 000000000000..3235cd89ff48 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_5_metadata.json @@ -0,0 +1,19 @@ +{ + "main_title": "FAQ", + "subtitle": "Can-I-share-my-data-with-other-HPC-users", + "source_file": "../../mkdocs/docs/HPC/FAQ.md", + "title_depth": 3, + "directory": "FAQ", + "links": { + "0": "https://docs.hpc.ugent.be/running_batch_jobs/#when-will-my-job-start", + "1": "https://account.vscentrum.be", + "2": "https://docs.hpc.ugent.be/linux-tutorial/manipulating_files_and_directories/#zipping-gzipgunzip-zipunzip", + "3": "https://docs.hpc.ugent.be/FAQ/#i-have-another-questionproblem", + "4": "https://docs.hpc.ugent.be/running_jobs_with_input_output_data/#virtual-organisations" + }, + "parent_title": "", + "previous_title": "FAQ_paragraph_4", + "next_title": "FAQ_paragraph_6", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/FAQ/#can-i-share-my-data-with-other-hpc-users" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_6.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_6.txt new file mode 100644 index 000000000000..011ad0ce4638 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_6.txt @@ -0,0 +1,37 @@ +$ setfacl -m u:otheruser:r dataset.txt +$ ls -l dataset.txt +-rwxr-x---+ 2 vsc40000 mygroup 40 Apr 12 15:00 dataset.txt +For more information about chmod or setfacl, see +Linux tutorial. +Can I use multiple different SSH key pairs to connect to my VSC account? +Yes, and this is recommended when working from different computers. +Please see Adding multiple SSH public keys on how to do this. +I want to use software that is not available on the clusters yet +Please fill out the details about the software and why you need it in +this form: +https://www.ugent.be/hpc/en/support/software-installation-request. +When submitting the form, a mail will be sent to hpc@ugent.be containing all the +provided information. The HPC team will look into your request as soon +as possible you and contact you when the installation is done or if +further information is required. +Is my connection compromised? Remote host identification has changed +On Monday 25 April 2022, the login nodes received an update to RHEL8. +This means that the host keys of those servers also changed. As a result, +you could encounter the following warnings. +MacOS & Linux (on Windows, only the second part is shown): +@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ +@ WARNING: REMOTE HOST IDENTIFICATION HAS CHANGED! @ +@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ +IT IS POSSIBLE THAT SOMEONE IS DOING SOMETHING NASTY! +Someone could be eavesdropping on you right now (man-in-the-middle attack)! +It is also possible that a host key has just been changed. +The fingerprint for the RSA key sent by the remote host is +xx:xx:xx. +Please contact your system administrator. +Add correct host key in /home/hostname/.ssh/known_hosts to get rid of this message. +Offending RSA key in /var/lib/sss/pubconf/known_hosts:1 +RSA host key for user has changed and you have requested strict checking. +Host key verification failed. +Please follow the instructions at migration to RHEL8 +to ensure it really is not a hacking attempt \- you will find the correct host key to compare. +You will also find how to hide the warning. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_6_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_6_metadata.json new file mode 100644 index 000000000000..6b646e83475b --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_6_metadata.json @@ -0,0 +1,17 @@ +{ + "main_title": "FAQ", + "subtitle": "Is-my-connection-compromised-Remote-host-identification-has-changed", + "source_file": "../../mkdocs/docs/HPC/FAQ.md", + "title_depth": 3, + "directory": "FAQ", + "links": { + "0": "https://docs.hpc.ugent.be/linux-tutorial/manipulating_files_and_directories/#changing-permissions-chmod", + "1": "https://docs.hpc.ugent.be/account/#adding-multiple-ssh-public-keys-optional", + "2": "https://www.ugent.be/hpc/en/infrastructure/migration_to_rhel8" + }, + "parent_title": "", + "previous_title": "FAQ_paragraph_5", + "next_title": "FAQ_paragraph_7", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/FAQ/#is-my-connection-compromised-remote-host-identification-has-changed" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_7.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_7.txt new file mode 100644 index 000000000000..8228e874f4b4 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_7.txt @@ -0,0 +1,25 @@ +VO: how does it work? +A Virtual Organisation consists of a number of members and moderators. A moderator can: +* Manage the VO members (but can't access/remove their data on the system). +* See how much storage each member has used, and set limits per member. +* Request additional storage for the VO. +One person can only be part of one VO, be it as a member or moderator. +It's possible to leave a VO and join another one. However, it's not +recommended to keep switching between VO's (to supervise groups, for example). +See also: Virtual Organisations. +My UGent shared drives don't show up +After mounting the UGent shared drives with kinit your_email@ugent.be, +you might not see an entry with your username when listing ls /UGent. +This is normal: try ls /UGent/your_username or cd /UGent/your_username, and you should be able to access the drives. +Be sure to use your UGent username and not your VSC username here. +See also: Your UGent home drive and shares. +My home directory is (almost) full, and I don't know why +Your home directory might be full without looking like it due to hidden files. +Hidden files and subdirectories have a name starting with a dot and do not show up when running ls. +If you want to check where the storage in your home directory is used, you can make use of the du command to find out what the largest files and subdirectories are: +du -h --max-depth 1 $VSC_HOME | egrep '[0-9]{3}M|[0-9]G' +The du command returns the size of every file and subdirectory in the $VSC_HOME directory. This output is then piped into an egrep to filter the lines to the ones that matter the most. +The egrep command will only let entries that match with the specified regular expression [0-9]{3}M|[0-9]G through, which corresponds with files that consume more than 100 MB. +How can I get more storage space? +By default you get 3 GB of storage space for your home directory and 25 GB in your personal directories on both the data ($VSC_DATA) and scratch ($VSC_SCRATCH) filesystems. +It is not possible to expand the storage quota for these personal directories. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_7_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_7_metadata.json new file mode 100644 index 000000000000..e3f76810c3d1 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_7_metadata.json @@ -0,0 +1,19 @@ +{ + "main_title": "FAQ", + "subtitle": "How-can-I-get-more-storage-space", + "source_file": "../../mkdocs/docs/HPC/FAQ.md", + "title_depth": 3, + "directory": "FAQ", + "links": { + "0": "https://docs.hpc.ugent.be/running_jobs_with_input_output_data/#virtual-organisations", + "1": "https://docs.hpc.ugent.be/running_jobs_with_input_output_data/#your-ugent-home-drive-and-shares", + "2": "https://docs.hpc.ugent.be/running_jobs_with_input_output_data/#check-your-quota", + "3": "https://docs.hpc.ugent.be/linux-tutorial/beyond_the_basics/#searching-file-contents-grep", + "4": "https://docs.hpc.ugent.be/running_jobs_with_input_output_data/#quota" + }, + "parent_title": "", + "previous_title": "FAQ_paragraph_6", + "next_title": "FAQ_paragraph_8", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/FAQ/#how-can-i-get-more-storage-space" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_8.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_8.txt new file mode 100644 index 000000000000..7819acaf2e67 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_8.txt @@ -0,0 +1,20 @@ +You can get more storage space through a Virtual Organisation (VO), +which will give you access to the additional directories in a subdirectory specific to that VO ($VSC_DATA_VO and $VSC_SCRATCH_VO). +The moderators of a VO can request more storage for their VO. +Why can't I use the sudo command? +When you attempt to use sudo, you will be prompted for a password. +However, you cannot enter a valid password because this feature is reserved exclusively for HPC administrators. +sudo is used to execute a command with administrator rights, which would allow you to make system-wide changes. +You are only able to run commands that make changes to the directories that your VSC account has access to, +like your home directory, your personal directories like $VSC_DATA and $VSC_SCRATCH, +or shared VO/group directories like $VSC_DATA_VO and $VSC_SCRATCH_VO. +A lot of tasks can be performed without sudo, including installing software in your own account. +Installing software +- If you know how to install the software without using sudo, you are welcome to proceed with the installation. +- If you are unsure how to install the software, you can submit a software installation request, and the HPC-UGent support team will handle the installation for you. +I have another question/problem +Who can I contact? +* General questions regarding HPC-UGent and VSC: +* HPC-UGent Tier-2: +* VSC Tier-1 compute: +* VSC Tier-1 cloud: \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_8_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_8_metadata.json new file mode 100644 index 000000000000..f290413808ba --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/FAQ/FAQ_paragraph_8_metadata.json @@ -0,0 +1,18 @@ +{ + "main_title": "FAQ", + "subtitle": "I-have-another-questionproblem", + "source_file": "../../mkdocs/docs/HPC/FAQ.md", + "title_depth": 3, + "directory": "FAQ", + "links": { + "0": "https://docs.hpc.ugent.be/running_jobs_with_input_output_data/#virtual-organisations", + "1": "https://docs.hpc.ugent.be/running_jobs_with_input_output_data/#vo-directories", + "2": "https://docs.hpc.ugent.be/running_jobs_with_input_output_data/#requesting-more-storage-space", + "3": "https://www.ugent.be/hpc/en/support/software-installation-request" + }, + "parent_title": "", + "previous_title": "FAQ_paragraph_7", + "next_title": null, + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/FAQ/#i-have-another-questionproblem" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/HOD/HOD_paragraph_1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/HOD/HOD_paragraph_1.txt new file mode 100644 index 000000000000..5cb90b96d22e --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/HOD/HOD_paragraph_1.txt @@ -0,0 +1,43 @@ +Hanythingondemand (HOD) +Hanythingondemand (or HOD for short) is a tool to run a Hadoop (Yarn) +cluster on a traditional HPC system. +Documentation +The official documentation for HOD version 3.0.0 and newer is available +at https://hod.readthedocs.org/en/latest/. The slides of the 2016 HOD +training session are available at +http://users.ugent.be/~kehoste/hod_20161024.pdf. +Using HOD +Before using HOD, you first need to load the hod module. We don't +specify a version here (this is an exception, for most other modules you +should, see Using explicit version numbers) because newer versions might include important bug fixes. +module load hod +Compatibility with login nodes +The hod modules are constructed such that they can be used on the HPC-UGent infrastructure +login nodes, regardless of which cluster module is loaded (this is not +the case for software installed via modules in general, see Running software that is incompatible with host). +As such, you should experience no problems if you swap to a different +cluster module before loading the hod module and subsequently running +|hod|. +For example, this will work as expected: +$ module swap cluster/donphan +$ module load hod +$ hod +hanythingondemand - Run services within an HPC cluster +usage: hod [subcommand options] +Available subcommands (one of these must be specified!): + batch Submit a job to spawn a cluster on a PBS job controller, run a job script, and tear down the cluster when it's done + clean Remove stale cluster info. +... +Note that also modules named hanythingondemand/* are available. These +should however not be used directly, since they may not be compatible +with the login nodes (depending on which cluster they were installed +for). +Standard HOD configuration +The hod module will also put a basic configuration in place for HOD, +by defining a couple of $HOD_* environment variables: +$ module load hod +$ env | grep HOD | sort +HOD_BATCH_HOD_MODULE=hanythingondemand/3.2.2-intel-2016b-Python-2.7.12 +HOD_BATCH_WORKDIR=$VSC_SCRATCH/hod +HOD_CREATE_HOD_MODULE=hanythingondemand/3.2.2-intel-2016b-Python-2.7.12 +HOD_CREATE_WORKDIR=$VSC_SCRATCH/hod diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/HOD/HOD_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/HOD/HOD_paragraph_1_metadata.json new file mode 100644 index 000000000000..e451d3afb80b --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/HOD/HOD_paragraph_1_metadata.json @@ -0,0 +1,16 @@ +{ + "main_title": "HOD", + "subtitle": "Standard-HOD-configuration", + "source_file": "../../mkdocs/docs/HPC/HOD.md", + "title_depth": 3, + "directory": "HOD", + "links": { + "0": "https://docs.hpc.ugent.be/running_batch_jobs/#using-explicit-version-numbers", + "1": "https://docs.hpc.ugent.be/troubleshooting/#running-software-that-is-incompatible-with-host" + }, + "parent_title": "", + "previous_title": null, + "next_title": "HOD_paragraph_2", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/HOD/#standard-hod-configuration" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/HOD/HOD_paragraph_2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/HOD/HOD_paragraph_2.txt new file mode 100644 index 000000000000..45df37178713 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/HOD/HOD_paragraph_2.txt @@ -0,0 +1,34 @@ +By defining these environment variables, we avoid that you have to +specify --hod-module and --workdir when using hod batch or +hod create, since they are strictly required. +If you want to use a different parent working directory for HOD, it +suffices to either redefine $HOD_BATCH_WORKDIR and +$HOD_CREATE_WORKDIR, or to specify --workdir (which will override +the corresponding environment variable). +Changing the HOD module that is used by the HOD backend (i.e., using +--hod-module or redefining $HOD_*_HOD_MODULE) is strongly +discouraged. +Cleaning up +After HOD clusters terminate, their local working directory and cluster +information is typically not cleaned up automatically (for example, +because the job hosting an interactive HOD cluster submitted via +hod create runs out of walltime). +These HOD clusters will still show up in the output of hod list, and +will be marked as . +You should occasionally clean this up using hod clean: +$ module list +Currently Loaded Modulefiles: + 1) cluster/doduo(default) 2) pbs_python/4.6.0 3) vsc-base/2.4.2 4) hod/3.0.0-cli +$ hod list +Cluster label Job ID State Hosts +example1 123456 <job-not-found> <none> +$ hod clean +Removed cluster localworkdir directory /user/scratch/gent/vsc400/vsc40000/hod/hod/123456 for cluster labeled example1 +Removed cluster info directory /user/home/gent/vsc400/vsc40000/.config/hod.d/wordcount for cluster labeled example1 +$ module swap cluster/donphan +Cluster label Job ID State Hosts +example2 98765.master19.donphan.gent.vsc <job-not-found> <none> +$ hod clean +Removed cluster localworkdir directory /user/scratch/gent/vsc400/vsc40000/hod/hod/98765.master19.donphan.gent.vsc for cluster labeled example2 +Removed cluster info directory /user/home/gent/vsc400/vsc40000/.config/hod.d/wordcount for cluster labeled example2 +Note that only HOD clusters that were submitted to the currently loaded cluster module will be cleaned up. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/HOD/HOD_paragraph_2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/HOD/HOD_paragraph_2_metadata.json new file mode 100644 index 000000000000..480afb3ceb67 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/HOD/HOD_paragraph_2_metadata.json @@ -0,0 +1,12 @@ +{ + "main_title": "HOD", + "subtitle": "Cleaning-up", + "source_file": "../../mkdocs/docs/HPC/HOD.md", + "title_depth": 3, + "directory": "HOD", + "parent_title": "", + "previous_title": "HOD_paragraph_1", + "next_title": "HOD_paragraph_3", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/HOD/#cleaning-up" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/HOD/HOD_paragraph_3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/HOD/HOD_paragraph_3.txt new file mode 100644 index 000000000000..d95838f55e27 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/HOD/HOD_paragraph_3.txt @@ -0,0 +1,9 @@ +Getting help +If you have any questions, or are experiencing problems using HOD, you +have a couple of options: +- Subscribe to the HOD mailing list via + https://lists.ugent.be/wws/info/hod, and contact the HOD users and + developers at hod@lists.ugent.be. +- Contact the HPC-UGent team via hpc@ugent.be +- Open an issue in the hanythingondemand GitHub repository, via + https://github.com/hpcugent/hanythingondemand/issues. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/HOD/HOD_paragraph_3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/HOD/HOD_paragraph_3_metadata.json new file mode 100644 index 000000000000..1486348f570b --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/HOD/HOD_paragraph_3_metadata.json @@ -0,0 +1,12 @@ +{ + "main_title": "HOD", + "subtitle": "Getting-help", + "source_file": "../../mkdocs/docs/HPC/HOD.md", + "title_depth": 2, + "directory": "HOD", + "parent_title": "", + "previous_title": "HOD_paragraph_2", + "next_title": null, + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/HOD/#getting-help" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1.txt new file mode 100644 index 000000000000..d7f8bf11a93d --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1.txt @@ -0,0 +1,36 @@ +Getting an HPC Account +Getting ready to request an account +All users of AUGent can request +an +account on the HPC, which is part of the Flemish Supercomputing Centre (VSC). +See HPC policies for more information on who is entitled to an account. +The VSC, abbreviation of Flemish Supercomputer Centre, is a virtual +supercomputer centre. It is a partnership between the five Flemish +associations: the Association KUĀ Leuven, Ghent University Association, +Brussels University Association, Antwerp University Association and the +University Colleges-Limburg. The VSC is funded by the Flemish +Government. +There are two methods for connecting to HPC-UGent infrastructure: +- Using a terminal to connect via SSH. +- Using the web portal +The web portal offers a convenient way to upload files and gain shell access to the HPC-UGent infrastructure from a standard web browser (no software installation or configuration required). +If you would like use a terminal with SSH as this gives you more flexibility continue reading. +However if you prefer to use the web portal, you can skip ahead to the following section: Applying for the account. +Once you have successfully obtained an account, you can then delve into the details of utilizing the HPC-UGent web portal by reading Using the HPC-UGent web portal. +The HPC-UGent infrastructure clusters use public/private key pairs for user authentication +(rather than passwords). Technically, the private key is stored on your +local computer and always stays there; the public key is stored on the HPC. +Access to the HPC is granted to anyone who can prove to have access to the +corresponding private key on his local computer. +How do SSH keys work? +- an SSH public/private key pair can be seen as a lock and a key +- the SSH public key is equivalent with a lock: you give it to the + VSC and they put it on the door that gives access to your account. +- the SSH private key is like a physical key: you don't hand it out + to other people. +- anyone who has the key (and the optional password) can unlock the + door and log in to the account. +- the door to your VSC account is special: it can have multiple + locks (SSH public keys) attached to it, and you only need to open + one lock with the corresponding key (SSH private key) to open + the door (log in to the account). diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json new file mode 100644 index 000000000000..812fbbba4c27 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/account/account_paragraph_1_metadata.json @@ -0,0 +1,18 @@ +{ + "main_title": "account", + "subtitle": "How-do-SSH-keys-work", + "source_file": "../../mkdocs/docs/HPC/account.md", + "title_depth": 3, + "directory": "account", + "links": { + "0": "https://docs.hpc.ugent.be/sites/hpc_policies", + "1": "https://docs.hpc.ugent.be/web_portal", + "2": "https://docs.hpc.ugent.be/account/#applying-for-the-account", + "3": "https://docs.hpc.ugent.be/web_portal" + }, + "parent_title": "", + "previous_title": null, + "next_title": "account_paragraph_2", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/account/#how-do-ssh-keys-work" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/alphafold/alphafold_paragraph_1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/alphafold/alphafold_paragraph_1.txt new file mode 100644 index 000000000000..ec6f7d85f906 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/alphafold/alphafold_paragraph_1.txt @@ -0,0 +1,32 @@ +What is AlphaFold? +AlphaFold is an AI system developed by DeepMind that predicts a protein’s 3D structure from its amino acid sequence. +It aims to achieve accuracy competitive with experimental methods. +See https://www.vscentrum.be/alphafold for more information and there you can also find a getting started video recording if you prefer that. +Documentation & extra material +This chapter focuses specifically on the use of AlphaFold on the HPC-UGent infrastructure. +It is intented to augment the existing AlphaFold documentation rather than replace it. +It is therefore recommended to first familiarize yourself with AlphaFold. The following resources can be helpful: +- AlphaFold website: https://alphafold.com/ +- AlphaFold repository: https://github.com/deepmind/alphafold/tree/main +- AlphaFold FAQ: https://alphafold.com/faq +- VSC webpage about AlphaFold: https://www.vscentrum.be/alphafold +- Introductory course on AlphaFold by VIB: https://elearning.vib.be/courses/alphafold +- "Getting Started with AlphaFold" presentation by Kenneth Hoste (HPC-UGent) + - recording available on YouTube + - slides available here (PDF) + - see also https://www.vscentrum.be/alphafold +Using AlphaFold on HPC-UGent infrastructure +Several different versions of AlphaFold are installed on both the CPU and GPU HPC-UGent Tier-2 clusters, see the output of module avail AlphaFold. +If you run this command on a GPU cluster, additional CUDA modules will show up: +$ module avail AlphaFold +------------ /apps/gent/RHEL8/cascadelake-volta-ib/modules/all ------------- + AlphaFold/2.0.0-fosscuda-2020b + AlphaFold/2.1.1-fosscuda-2020b + AlphaFold/2.1.2-foss-2021a-CUDA-11.3.1 + AlphaFold/2.2.2-foss-2021a-CUDA-11.3.1 + AlphaFold/2.3.0-foss-2021b-CUDA-11.4.1 + AlphaFold/2.3.1-foss-2022a-CUDA-11.7.0 +--------------- /apps/gent/RHEL8/cascadelake-ib/modules/all ---------------- + AlphaFold/2.0.0-foss-2020b AlphaFold/2.3.1-foss-2022a + AlphaFold/2.1.2-foss-2021a AlphaFold/2.3.4-foss-2022a-ColabFold (D) + AlphaFold/2.2.2-foss-2021a diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/alphafold/alphafold_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/alphafold/alphafold_paragraph_1_metadata.json new file mode 100644 index 000000000000..c407aed2e0fd --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/alphafold/alphafold_paragraph_1_metadata.json @@ -0,0 +1,17 @@ +{ + "main_title": "alphafold", + "subtitle": "Using-AlphaFold-on-HPC-UGent-infrastructure", + "source_file": "../../mkdocs/docs/HPC/alphafold.md", + "title_depth": 2, + "directory": "alphafold", + "links": { + "0": "https://www.youtube.com/watch?v=jP9Qg1yBGcs", + "1": "https://www.vscentrum.be/_files/ugd/5446c2_f19a8723f7f7460ebe990c28a53e56a2.pdf?index=true", + "2": "https://docs.hpc.ugent.be/gpu" + }, + "parent_title": "", + "previous_title": null, + "next_title": "alphafold_paragraph_2", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/alphafold/#using-alphafold-on-hpc-ugent-infrastructure" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/alphafold/alphafold_paragraph_2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/alphafold/alphafold_paragraph_2.txt new file mode 100644 index 000000000000..dc47707fe273 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/alphafold/alphafold_paragraph_2.txt @@ -0,0 +1,32 @@ +To use AlphaFold, you should load a particular module, for example: +module load AlphaFold/2.3.1-foss-2022a-CUDA-11.7.0 + Tip "We strongly advise loading a specific version of an AlphaFold module, so you know exactly which version is being used." + Warning + + When using AlphaFold, you should submit jobs to a GPU cluster for better performance, see GPU clusters. + Later in this chapter, you will find a comparison between running AlphaFold on CPUs or GPUs. +Multiple revisions of the large database (~2.5TB) that is also required to run AlphaFold have been +made available on the HPC-UGent infrastructure in a central location (/arcanine/scratch/gent/apps/AlphaFold), +so you do not have to download it yourself. +$ ls /arcanine/scratch/gent/apps/AlphaFold +20210812 20211201 20220701 20230310 +The directories located there indicate when the data was downloaded, so that this leaves room for providing updated datasets later. +As of writing this documentation the latest version is 20230310. + Info + + The arcanine scratch shared filesystem is powered by fast SSD disks, + which is recommended for the AlphaFold data, because of random access I/O patterns. + See Pre-defined user directories to get more info about the arcanine filesystem. +The AlphaFold installations we provide have been modified a bit to facilitate the usage on HPC-UGent infrastructure. +Setting up the environment +The location to the AlphaFold data can be specified via the $ALPHAFOLD_DATA_DIR environment variable, so you should define this variable in your AlphaFold job script: +export ALPHAFOLD_DATA_DIR=/arcanine/scratch/gent/apps/AlphaFold/20230310 + Warning "Use newest version" + + Do not forget to replace 20230310 with a more up to date version if available. +Running AlphaFold +AlphaFold provides a script called run_alphafold.py +A symbolic link named alphafold that points to this script is included, +so you can just use alphafold instead of run_alphafold.py or python run_alphafold.py after loading the AlphaFold module. +The run_alphafold.py script has also been slightly modified such that defining the $ALPHAFOLD_DATA_DIR (see above) is sufficient to pick up all the data provided in that location, +so you don't need to use options like --data_dir to specify the location of the data. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/alphafold/alphafold_paragraph_2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/alphafold/alphafold_paragraph_2_metadata.json new file mode 100644 index 000000000000..a134bec40f47 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/alphafold/alphafold_paragraph_2_metadata.json @@ -0,0 +1,18 @@ +{ + "main_title": "alphafold", + "subtitle": "Running-AlphaFold", + "source_file": "../../mkdocs/docs/HPC/alphafold.md", + "title_depth": 3, + "directory": "alphafold", + "links": { + "0": "https://docs.hpc.ugent.be/gpu", + "1": "http://localhost:8000/HPC/Gent/running_jobs_with_input_output_data/#pre-defined-user-directories", + "2": "https://raw.githubusercontent.com/deepmind/alphafold/main/run_alphafold.py", + "3": "https://docs.hpc.ugent.be/alphafold/#setting-up-the-environment" + }, + "parent_title": "", + "previous_title": "alphafold_paragraph_1", + "next_title": "alphafold_paragraph_3", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/alphafold/#running-alphafold" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/alphafold/alphafold_paragraph_3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/alphafold/alphafold_paragraph_3.txt new file mode 100644 index 000000000000..8347a4e27fd5 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/alphafold/alphafold_paragraph_3.txt @@ -0,0 +1,26 @@ +Similarly, the script was also tweaked such that the location to commands like hhblits,hhsearch,jackhmmer,kalign are already correctly set, so options like --hhblits_binary_path are not required. +For more information about the script and options see this section in the official README. + WARNING "READ README" + It is strongly advised to read the official README provided by DeepMind before continuing. + +Controlling core count for hhblits and jackhmmer +The Python scripts that are used to run hhblits and jackhmmer have been tweaked so you can control how many cores are used for these tools, +rather than hardcoding it to 4 and 8 cores, respectively. +Using the $ALPHAFOLD_HHBLITS_N_CPU environment variable, you can specify how many cores should be used for running hhblits; +the default of 4 cores will be used if $ALPHAFOLD_HHBLITS_N_CPU is not defined. +Likewise for jackhmmer, the core count can be controlled via $ALPHAFOLD_JACKHMMER_N_CPU. + Info + Tweaking this might not yield significant benefits, + as we have noticed that these tools may exhibit slower performance when utilizing more than 4/8 cores (though this behavior could vary based on the workload). +CPU/GPU comparison +The provided timings were obtained by executing the T1050.fasta example, as outlined in the Alphafold README. +For the corresponding jobscripts, they are available here. +Using --db_preset=full_dbs, the following runtime data was collected: +* CPU-only, on doduo, using 24 cores (1 node): 9h 9min +* CPU-only, on doduo, using 96 cores (1 full node): 12h 22min +* GPU on joltik, using 1 V100 GPU + 8 cores: 2h 20min +* GPU on joltik, using 2 V100 GPUs + 16 cores: 2h 16min +This highlights a couple of important attention points: +* Running AlphaFold on GPU is significantly faster than CPU-only (close to 4x faster for this particular example). + Using more CPU cores may lead to longer* runtimes, so be careful with using full nodes when running AlphaFold CPU-only. +* Using multiple GPUs results in barely any speedup (for this particular T1050.fasta example). diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/alphafold/alphafold_paragraph_3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/alphafold/alphafold_paragraph_3_metadata.json new file mode 100644 index 000000000000..2a0105633e74 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/alphafold/alphafold_paragraph_3_metadata.json @@ -0,0 +1,19 @@ +{ + "main_title": "alphafold", + "subtitle": "CPUGPU-comparison", + "source_file": "../../mkdocs/docs/HPC/alphafold.md", + "title_depth": 3, + "directory": "alphafold", + "links": { + "0": "https://github.com/deepmind/alphafold/blob/main/README.md#running-alphafold", + "1": "https://github.com/deepmind/alphafold/blob/main/README.md", + "2": "https://github.com/deepmind/alphafold/blob/main/README.md", + "3": "https://github.com/deepmind/alphafold/blob/main/README.md", + "4": "https://docs.hpc.ugent.be/example-jobscripts" + }, + "parent_title": "", + "previous_title": "alphafold_paragraph_2", + "next_title": "alphafold_paragraph_4", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/alphafold/#cpugpu-comparison" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/alphafold/alphafold_paragraph_4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/alphafold/alphafold_paragraph_4.txt new file mode 100644 index 000000000000..521975f2e2aa --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/alphafold/alphafold_paragraph_4.txt @@ -0,0 +1,33 @@ +With --db_preset=casp14, it is clearly more demanding: +* On doduo, with 24 cores (1 node): still running after 48h... +* On joltik, 1 V100 GPU + 8 cores: 4h 48min +This highlights the difference between CPU and GPU performance even more. +Example scenario +The following example comes from the official Examples section in the Alphafold README. +The run command is slightly different (see above: Running AlphaFold). +Do not forget to set up the environment (see above: Setting up the environment). +Folding a monomer +Say we have a monomer with the sequence . +Create a file monomer.fasta with the following content: +>sequence_name + +Then run the following command in the same directory: +alphafold + --fasta_paths=monomer.fasta \ + --max_template_date=2021-11-01 \ + --model_preset=monomer \ + --output_dir=. +See AlphaFold output, for information about the outputs. + Info + + For more scenarios see the example section in the official README. + +Example jobscripts +The following two example job scripts can be used as a starting point for running AlphaFold. +The main difference between using a GPU or CPU in a job script is what module to load. +For running AlphaFold on GPU, use an AlphaFold module that mentions CUDA (or cuda), +for example AlphaFold/2.3.1-foss-2022a-CUDA-11.7.0. +To run the job scripts you need to create a file named T1050.fasta with the following content: +>T1050 A7LXT1, Bacteroides Ovatus, 779 residues| +MASQSYLFKHLEVSDGLSNNSVNTIYKDRDGFMWFGTTTGLNRYDGYTFKIYQHAENEPGSLPDNYITDIVEMPDGRFWINTARGYVLFDKERDYFITDVTGFMKNLESWGVPEQVFVDREGNTWLSVAGEGCYRYKEGGKRLFFSYTEHSLPEYGVTQMAECSDGILLIYNTGLLVCLDRATLAIKWQSDEIKKYIPGGKTIELSLFVDRDNCIWAYSLMGIWAYDCGTKSWRTDLTGIWSSRPDVIIHAVAQDIEGRIWVGKDYDGIDVLEKETGKVTSLVAHDDNGRSLPHNTIYDLYADRDGVMWVGTYKKGVSYYSESIFKFNMYEWGDITCIEQADEDRLWLGTNDHGILLWNRSTGKAEPFWRDAEGQLPNPVVSMLKSKDGKLWVGTFNGGLYCMNGSQVRSYKEGTGNALASNNVWALVEDDKGRIWIASLGGGLQCLEPLSGTFETYTSNNSALLENNVTSLCWVDDNTLFFGTASQGVGTMDMRTREIKKIQGQSDSMKLSNDAVNHVYKDSRGLVWIATREGLNVYDTRRHMFLDLFPVVEAKGNFIAAITEDQERNMWVSTSRKVIRVTVASDGKGSYLFDSRAYNSEDGLQNCDFNQRSIKTLHNGIIAIGGLYGVNIFAPDHIRYNKMLPNVMFTGLSLFDEAVKVGQSYGGRVLIEKELNDVENVEFDYKQNIFSVSFASDNYNLPEKTQYMYKLEGFNNDWLTLPVGVHNVTFTNLAPGKYVLRVKAINSDGYVGIKEATLGIVVNPPFKLAAALQHHHHHH +source: diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/alphafold/alphafold_paragraph_4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/alphafold/alphafold_paragraph_4_metadata.json new file mode 100644 index 000000000000..e5ddf8f1e012 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/alphafold/alphafold_paragraph_4_metadata.json @@ -0,0 +1,21 @@ +{ + "main_title": "alphafold", + "subtitle": "Example-jobscripts", + "source_file": "../../mkdocs/docs/HPC/alphafold.md", + "title_depth": 2, + "directory": "alphafold", + "links": { + "0": "https://github.com/deepmind/alphafold/blob/main/README.md#examples", + "1": "https://github.com/deepmind/alphafold/blob/main/README.md", + "2": "https://docs.hpc.ugent.be/running-alphafold", + "3": "https://docs.hpc.ugent.be/setting-up-the-environment", + "4": "https://github.com/deepmind/alphafold/blob/main/README.md#alphafold-output", + "5": "https://github.com/deepmind/alphafold/blob/main/README.md#examples", + "6": "https://github.com/deepmind/alphafold/blob/main/README.md" + }, + "parent_title": "", + "previous_title": "alphafold_paragraph_3", + "next_title": "alphafold_paragraph_5", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/alphafold/#example-jobscripts" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/alphafold/alphafold_paragraph_5.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/alphafold/alphafold_paragraph_5.txt new file mode 100644 index 000000000000..d529a666c788 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/alphafold/alphafold_paragraph_5.txt @@ -0,0 +1,33 @@ +Job script for running AlphaFold on GPU +Job script that runs AlphaFold on GPU using 1 V100 GPU + 8 cores. +Swap to the joltik GPU before submitting it: +module swap cluster/joltik +#!/bin/bash +#PBS -N AlphaFold-gpu-joltik +#PBS -l nodes=1:ppn=8,gpus=1 +#PBS -l walltime=10:0:0 +module load AlphaFold/2.3.1-foss-2022a-CUDA-11.7.0 +export ALPHAFOLD_DATA_DIR=/arcanine/scratch/gent/apps/AlphaFold/20230310 +WORKDIR=$VSC_SCRATCH/$PBS_JOBNAME-$PBS_JOBID +mkdir -p $WORKDIR +# download T1050.fasta via via https://www.predictioncenter.org/casp14/target.cgi?target=T1050&view=sequence +cp -a $PBS_O_WORKDIR/T1050.fasta $WORKDIR/ +cd $WORKDIR +alphafold --fasta_paths=T1050.fasta --max_template_date=2020-05-14 --db_preset=full_dbs --output_dir=$PWD +echo "Output available in $WORKDIR" +Job script for running AlphaFold CPU-only +Jobscript that runs AlphaFold on CPU using 24 cores on one node. +#!/bin/bash +#PBS -N AlphaFold-cpu-doduo +#PBS -l nodes=1:ppn=24 +#PBS -l walltime=72:0:0 +module load AlphaFold/2.3.1-foss-2022a +export ALPHAFOLD_DATA_DIR=/arcanine/scratch/gent/apps/AlphaFold/20230310 +WORKDIR=$VSC_SCRATCH/$PBS_JOBNAME-$PBS_JOBID +mkdir -p $WORKDIR +# download T1050.fasta via via https://www.predictioncenter.org/casp14/target.cgi?target=T1050&view=sequence +cp -a $PBS_O_WORKDIR/T1050.fasta $WORKDIR/ +cd $WORKDIR +alphafold --fasta_paths=T1050.fasta --max_template_date=2020-05-14 --db_preset=full_dbs --output_dir=$PWD +echo "Output available in $WORKDIR" +In case of problems or questions, don't hesitate to contact use at . \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/alphafold/alphafold_paragraph_5_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/alphafold/alphafold_paragraph_5_metadata.json new file mode 100644 index 000000000000..a44fef384f08 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/alphafold/alphafold_paragraph_5_metadata.json @@ -0,0 +1,12 @@ +{ + "main_title": "alphafold", + "subtitle": "Job-script-for-running-AlphaFold-CPU-only", + "source_file": "../../mkdocs/docs/HPC/alphafold.md", + "title_depth": 3, + "directory": "alphafold", + "parent_title": "", + "previous_title": "alphafold_paragraph_4", + "next_title": null, + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/alphafold/#job-script-for-running-alphafold-cpu-only" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/apptainer/apptainer_paragraph_1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/apptainer/apptainer_paragraph_1.txt new file mode 100644 index 000000000000..0b035d5ab3b9 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/apptainer/apptainer_paragraph_1.txt @@ -0,0 +1,42 @@ +Apptainer (formally known as Singularity) +What is Apptainer? +Apptainer is an open-source computer program that performs +operating-system-level virtualization (also known as containerisation). +One of the main uses of Apptainer is to bring containers and +reproducibility to scientific computing and the high-performance +computing (HPC) world. Using Apptainer/Singularity containers, +developers can work in reproducible environments of their choosing and +design, and these complete environments can easily be copied and +executed on other platforms. +For more general information about the use of Apptainer, please see the +official documentation at https://apptainer.org/docs/. +This documentation only covers aspects of using Apptainer on the +HPC-UGent infrastructure infrastructure. +Restrictions on image location +Some restrictions have been put in place on the use of Apptainer. This +is mainly done for performance reasons and to avoid that the use of +Apptainer impacts other users on the system. +The Apptainer/Singularity image file must be located on either one of +the scratch filesystems, the local disk of the workernode you are using +or /dev/shm. The centrally provided apptainer command will refuse to +run using images that are located elsewhere, in particular on the +$VSC_HOME, /apps or $VSC_DATA filesystems. +In addition, this implies that running containers images provided via a +URL (e.g., shub://... or docker://...) will not work. +If these limitations are a problem for you, please let us know via hpc@ugent.be. +Available filesystems +All HPC-UGent shared filesystems will be readily available in an +Apptainer/Singularity container, including the home, data and scratch +filesystems, and they will be accessible via the familiar $VSC_HOME, +$VSC_DATA* and $VSC_SCRATCH* environment variables. +Apptainer/Singularity Images +Creating Apptainer/Singularity images +Creating new Apptainer/Singularity images or converting Docker images, +by default, requires admin privileges, which is obviously not available +on the HPC-UGent infrastructure infrastructure. However, if you use the --fakeroot option, you +can make new Apptainer/Singularity images or convert Docker images. +Due to the nature of --fakeroot option, we recommend to write your +Apptainer/Singularity image to a globally writable location, like +/tmp, or /local directories. Once the image is created, you should +move it to your desired destination. An example to make an +Apptainer/Singularity container image: diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/apptainer/apptainer_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/apptainer/apptainer_paragraph_1_metadata.json new file mode 100644 index 000000000000..39678562b8b7 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/apptainer/apptainer_paragraph_1_metadata.json @@ -0,0 +1,12 @@ +{ + "main_title": "apptainer", + "subtitle": "Creating-ApptainerSingularity-images", + "source_file": "../../mkdocs/docs/HPC/apptainer.md", + "title_depth": 3, + "directory": "apptainer", + "parent_title": "", + "previous_title": null, + "next_title": "apptainer_paragraph_2", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/apptainer/#creating-apptainersingularity-images" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/apptainer/apptainer_paragraph_2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/apptainer/apptainer_paragraph_2.txt new file mode 100644 index 000000000000..ecfb6d41eb17 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/apptainer/apptainer_paragraph_2.txt @@ -0,0 +1,46 @@ +# avoid that Apptainer uses $HOME/.cache +export APPTAINER_CACHEDIR=/tmp/$USER/apptainer/cache +# instruct Apptainer to use temp dir on local filessytem +export APPTAINER_TMPDIR=/tmp/$USER/apptainer/tmpdir +# specified temp dir must exist, so create it +mkdir -p $APPTAINER_TMPDIR +# convert Docker container to Apptainer container image +apptainer build --fakeroot /tmp/$USER/tf.sif docker://nvcr.io/nvidia/tensorflow:21.10-tf1-py3 +# mv container image to $VSC_SCRATCH +mv /tmp/$USER/tf.sif $VSC_SCRATCH/tf.sif +Converting Docker images +For more information on converting existing Docker images to +Apptainer/Singularity images, see +https://apptainer.org/docs/user/main/docker_and_oci.html. +We strongly recommend the use of Docker Hub, see +https://hub.docker.com/ for more information. +Execute our own script within our container +Copy testing image from /apps/gent/tutorials/Singularity to +$VSC_SCRATCH: +cp /apps/gent/tutorials/Singularity/CentOS7_EasyBuild.img $VSC_SCRATCH/ +Create a job script like: +#!/bin/sh +#PBS -o apptainer.output +#PBS -e apptainer.error +#PBS -l nodes=1:ppn=1 +#PBS -l walltime=12:00:00 +apptainer exec $VSC_SCRATCH/CentOS7_EasyBuild.img ~/my_script.sh +Create an example myscript.sh: +#!/bin/bash +# prime factors +factor 1234567 +Tensorflow example +We already have a Tensorflow example image, but you can also convert the +Docker image (see https://hub.docker.com/r/tensorflow/tensorflow) to a +Apptainer/Singularity image yourself +Copy testing image from /apps/gent/tutorials to $VSC_SCRATCH: +cp /apps/gent/tutorials/Singularity/Ubuntu14.04_tensorflow.img $VSC_SCRATCH/ +#!/bin/sh +# +# +#PBS -o tensorflow.output +#PBS -e tensorflow.error +#PBS -l nodes=1:ppn=4 +#PBS -l walltime=12:00:00 +# +apptainer exec $VSC_SCRATCH/Ubuntu14.04_tensorflow.img python ~/linear_regression.py diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/apptainer/apptainer_paragraph_2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/apptainer/apptainer_paragraph_2_metadata.json new file mode 100644 index 000000000000..ec6df5485370 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/apptainer/apptainer_paragraph_2_metadata.json @@ -0,0 +1,12 @@ +{ + "main_title": "apptainer", + "subtitle": "Tensorflow-example", + "source_file": "../../mkdocs/docs/HPC/apptainer.md", + "title_depth": 2, + "directory": "apptainer", + "parent_title": "", + "previous_title": "apptainer_paragraph_1", + "next_title": "apptainer_paragraph_3", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/apptainer/#tensorflow-example" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/apptainer/apptainer_paragraph_3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/apptainer/apptainer_paragraph_3.txt new file mode 100644 index 000000000000..73860639fe23 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/apptainer/apptainer_paragraph_3.txt @@ -0,0 +1,34 @@ +You can download linear_regression.py from [the official Tensorflow +repository](https://github.com/tensorflow/tensorflow/blob/r1.12/tensorflow/examples/get_started/regression/linear_regression.py). +MPI example +It is also possible to execute MPI jobs within a container, but the +following requirements apply: +- Mellanox IB libraries must be available from the container (install + the infiniband-diags, libmlx5-1 and libmlx4-1 OS packages) +- Use modules within the container (install the environment-modules + or lmod package in your container) +- Load the required module(s) before apptainer execution. +- Set C_INCLUDE_PATH variable in your container if it is required + during compilation time + (export C_INCLUDE_PATH=/usr/include/x86_64-linux-gnu/:$C_INCLUDE_PATH + for Debian flavours) +Copy the testing image from /apps/gent/tutorials/Singularity to +$VSC_SCRATCH +cp /apps/gent/tutorials/Singularity/Debian8_UGentMPI.img $VSC_SCRATCH/ +For example to compile an [MPI +example](https://github.com/open-mpi/ompi/blob/master/examples/ring_c.c): +module load intel +apptainer shell $VSC_SCRATCH/Debian8_UGentMPI.img +export LANG=C +export C_INCLUDE_PATH=/usr/include/x86_64-linux-gnu/:$C_INCLUDE_PATH +mpiicc ompi/examples/ring_c.c -o ring_debian +exit +Example MPI job script: +#!/bin/sh +#PBS -N mpi +#PBS -o apptainermpi.output +#PBS -e apptainermpi.error +#PBS -l nodes=2:ppn=15 +#PBS -l walltime=12:00:00 +module load intel vsc-mympirun +mympirun --impi-fallback apptainer exec $VSC_SCRATCH/Debian8_UGentMPI.img ~/ring_debian diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/apptainer/apptainer_paragraph_3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/apptainer/apptainer_paragraph_3_metadata.json new file mode 100644 index 000000000000..0db8305a6618 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/apptainer/apptainer_paragraph_3_metadata.json @@ -0,0 +1,12 @@ +{ + "main_title": "apptainer", + "subtitle": "MPI-example", + "source_file": "../../mkdocs/docs/HPC/apptainer.md", + "title_depth": 2, + "directory": "apptainer", + "parent_title": "", + "previous_title": "apptainer_paragraph_2", + "next_title": null, + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/apptainer/#mpi-example" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/best_practices/best_practices_paragraph_1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/best_practices/best_practices_paragraph_1.txt new file mode 100644 index 000000000000..c096d42572b9 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/best_practices/best_practices_paragraph_1.txt @@ -0,0 +1,39 @@ +Best Practices { #ch:best-practices} +General Best Practices { #sec:general-best-practices} +1. Before starting, you should always check: + - Are there any errors in the script? + - Are the required modules loaded? + - Is the correct executable used? +2. Check your computer requirements upfront, and request the correct + resources in your batch job script. + - Number of requested cores + - Amount of requested memory + - Requested network type +3. Check your jobs at runtime. You could login to the node and check + the proper execution of your jobs with, e.g., top or vmstat. + Alternatively you could run an interactive job (qsub -I). +4. Try to benchmark the software for scaling issues when using MPI or + for I/O issues. +5. Use the scratch file system ($VSC_SCRATCH_NODE, which is mapped to + the local /tmp) whenever possible. Local disk I/O is always much + faster as it does not have to use the network. +6. When your job starts, it will log on to the compute node(s) and + start executing the commands in the job script. It will start in + your home directory $VSC_HOME, so going to the current directory + with cd $PBS_O_WORKDIR is the first thing which needs to be done. + You will have your default environment, so don't forget to load the + software with module load. +[//]: # (Do not worry, it will render with correct numbering in all cases.) +7. In case your job not running, use "checkjob". It will show why your + job is not yet running. Sometimes commands might timeout with an + overloaded scheduler. +8. Submit your job and wait (be patient) ... +9. Submit small jobs by grouping them together. See chapter Multi-job submission for + how this is done. +10. The runtime is limited by the maximum walltime of the queues. +11. Requesting many processors could imply long queue times. It's + advised to only request the resources you'll be able to use. +12. For all multi-node jobs, please use a cluster that has an + "InfiniBand" interconnect network. +13. And above all, do not hesitate to contact the HPC staff at hpc@ugent.be. We're here + to help you. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/best_practices/best_practices_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/best_practices/best_practices_paragraph_1_metadata.json new file mode 100644 index 000000000000..72bc17a2ef1a --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/best_practices/best_practices_paragraph_1_metadata.json @@ -0,0 +1,15 @@ +{ + "main_title": "best_practices", + "subtitle": "General-Best-Practices", + "source_file": "../../mkdocs/docs/HPC/best_practices.md", + "title_depth": 2, + "directory": "best_practices", + "links": { + "0": "https://docs.hpc.ugent.be/multi_job_submission" + }, + "parent_title": "", + "previous_title": null, + "next_title": null, + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/best_practices/#general-best-practices" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1.txt new file mode 100644 index 000000000000..5e9c1fc81b47 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1.txt @@ -0,0 +1,39 @@ +Compiling and testing your software on the HPC +All nodes in the HPC cluster are running the "RHEL 8.8 (accelgor, doduo, donphan, gallade, joltik, skitty)" +Operating system, which is a specific version of Red Hat Enterprise Linux. This means that all the +software programs +(executable) that the end-user wants to run on the HPC first must be +compiled for RHEL 8.8 (accelgor, doduo, donphan, gallade, joltik, skitty). It also means that you first have to install all the +required external software packages on the HPC. +Most commonly used compilers are already pre-installed on the HPC and can be +used straight away. Also, many popular external software packages, which +are regularly used in the scientific community, are also pre-installed. +Check the pre-installed software on the HPC +In order to check all the available modules and their version numbers, +which are pre-installed on the HPC enter: +When your required application is not available on the HPC please contact +any HPC member. Be aware of potential "License Costs". "Open Source" +software is often preferred. +Porting your code +To port a software-program is to translate it from the operating system in +which it was developed (e.g., Windows 7) to another operating system +(e.g., Red Hat Enterprise Linux on our HPC) so that it can be used there. Porting implies some +degree of effort, but not nearly as much as redeveloping the program in +the new environment. It all depends on how "portable" you wrote your +code. +In the simplest case the file or files may simply be copied from one +machine to the other. However, in many cases the software is installed +on a computer in a way, which depends upon its detailed hardware, +software, and setup, with device drivers for particular devices, using +installed operating system and supporting software components, and using +different directories. +In some cases software, usually described as "portable software" is +specifically designed to run on different computers with compatible +operating systems and processors without any machine-dependent +installation; it is sufficient to transfer specified directories and +their contents. Hardware- and software-specific information is often +stored in configuration files in specified locations (e.g., the registry +on machines running MS Windows). +Software, which is not portable in this sense, will have to be +transferred with modifications to support the environment on the +destination machine. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1_metadata.json new file mode 100644 index 000000000000..6ffb0d3529f1 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_1_metadata.json @@ -0,0 +1,12 @@ +{ + "main_title": "compiling_your_software", + "subtitle": "Porting-your-code", + "source_file": "../../mkdocs/docs/HPC/compiling_your_software.md", + "title_depth": 2, + "directory": "compiling_your_software", + "parent_title": "", + "previous_title": null, + "next_title": "compiling_your_software_paragraph_2", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#porting-your-code" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2.txt new file mode 100644 index 000000000000..224e61402ed3 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2.txt @@ -0,0 +1,42 @@ +Whilst programming, it would be wise to stick to certain standards +(e.g., ISO/ANSI/POSIX). This will ease the porting of your code to other +platforms. +Porting your code to the RHEL 8.8 (accelgor, doduo, donphan, gallade, joltik, skitty) platform is the responsibility of the end-user. +Compiling and building on the HPC +Compiling refers to the process of translating code written in some +programming language, e.g., Fortran, C, or C++, to machine code. +Building is similar, but includes gluing together the machine code +resulting from different source files into an executable (or library). +The text below guides you through some basic problems typical for small +software projects. For larger projects it is more appropriate to use +makefiles or even an advanced build system like CMake. +All the HPC nodes run the same version of the Operating System, i.e. RHEL 8.8 (accelgor, doduo, donphan, gallade, joltik, skitty). So, +it is sufficient to compile your program on any compute node. Once you +have generated an executable with your compiler, this executable should +be able to run on any other compute-node. +A typical process looks like: +1. Copy your software to the login-node of the HPC +2. Start an interactive session on a compute node; +3. Compile it; +4. Test it locally; +5. Generate your job scripts; +6. Test it on the HPC +7. Run it (in parallel); +We assume you've copied your software to the HPC. The next step is to request +your private compute node. +$ qsub -I +qsub: waiting for job 123456 to start +Compiling a sequential program in C +Go to the examples for chapter +Compiling and testing your software on the HPC and load the +foss module: +cd ~/examples/Compiling-and-testing-your-software-on-the-HPC +module load foss +We now list the directory and explore the contents of the "hello.c" +program: +$ ls -l +total 512 +-rw-r--r-- 1 vsc40000 214 Sep 16 09:42 hello.c +-rw-r--r-- 1 vsc40000 130 Sep 16 11:39 hello.pbs* +-rw-r--r-- 1 vsc40000 359 Sep 16 13:55 mpihello.c +-rw-r--r-- 1 vsc40000 304 Sep 16 13:55 mpihello.pbs diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2_metadata.json new file mode 100644 index 000000000000..4984907716ea --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_2_metadata.json @@ -0,0 +1,15 @@ +{ + "main_title": "compiling_your_software", + "subtitle": "Compiling-a-sequential-program-in-C", + "source_file": "../../mkdocs/docs/HPC/compiling_your_software.md", + "title_depth": 3, + "directory": "compiling_your_software", + "links": { + "0": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-and-building-on-the-hpc" + }, + "parent_title": "", + "previous_title": "compiling_your_software_paragraph_1", + "next_title": "compiling_your_software_paragraph_3", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-a-sequential-program-in-c" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3.txt new file mode 100644 index 000000000000..4a667fca1d75 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3.txt @@ -0,0 +1,50 @@ +/* + * VSC : Flemish Supercomputing Centre + * Tutorial : Introduction to HPC + * Description: Print 500 numbers, whilst waiting 1 second in between + */ +#include "stdio.h" +int main( int argc, char *argv[] ) +{ + int i; + for (i=0; i<500; i++) + { + printf("Hello #%d\n", i); + fflush(stdout); + sleep(1); + } +} +The "hello.c" program is a simple source file, written in C. It'll print +500 times "Hello #<num>", and waits one second between 2 printouts. +We first need to compile this C-file into an executable with the +gcc-compiler. +First, check the command line options for *"gcc" (GNU C-Compiler)*, then +we compile. the O2 option enables a moderate level of optimization when compiling the code. +It instructs the compiler to optimize the code for better performance without significantly increasing compilation time. +Finally, list the contents of the directory again: +$ gcc -help +$ gcc -O2 -o hello hello.c +$ ls -l +total 512 +-rwxrwxr-x 1 vsc40000 7116 Sep 16 11:43 hello* +-rw-r--r-- 1 vsc40000 214 Sep 16 09:42 hello.c +-rwxr-xr-x 1 vsc40000 130 Sep 16 11:39 hello.pbs* +A new file "hello" has been created. Note that this file has "execute" +rights, i.e., it is an executable. More often than not, calling gcc -- +or any other compiler for that matter -- will provide you with a list of +errors and warnings referring to mistakes the programmer made, such as +typos, syntax errors. You will have to correct them first in order to +make the code compile. Warnings pinpoint less crucial issues that may +relate to performance problems, using unsafe or obsolete language +features, etc. It is good practice to remove all warnings from a +compilation process, even if they seem unimportant so that a code change +that produces a warning does not go unnoticed. +Let's test this program on the local compute node, which is at your +disposal after the qsub --I command: +$ ./hello +Hello #0 +Hello #1 +Hello #2 +Hello #3 +Hello #4 +... diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3_metadata.json new file mode 100644 index 000000000000..8209e6c4ae4b --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_3_metadata.json @@ -0,0 +1,12 @@ +{ + "main_title": "compiling_your_software", + "subtitle": "Compiling-a-sequential-program-in-C", + "source_file": "../../mkdocs/docs/HPC/compiling_your_software.md", + "title_depth": 3, + "directory": "compiling_your_software", + "parent_title": "", + "previous_title": "compiling_your_software_paragraph_2", + "next_title": "compiling_your_software_paragraph_4", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-a-sequential-program-in-c" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4.txt new file mode 100644 index 000000000000..dfbb87a2e209 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4.txt @@ -0,0 +1,52 @@ +It seems to work, now run it on the HPC +qsub hello.pbs +Compiling a parallel program in C/MPI +cd ~/examples/Compiling-and-testing-your-software-on-the-HPC +List the directory and explore the contents of the "mpihello.c" +program: +$ ls -l +total 512 +total 512 +-rw-r--r-- 1 vsc40000 214 Sep 16 09:42 hello.c +-rw-r--r-- 1 vsc40000 130 Sep 16 11:39 hello.pbs* +-rw-r--r-- 1 vsc40000 359 Sep 16 13:55 mpihello.c +-rw-r--r-- 1 vsc40000 304 Sep 16 13:55 mpihello.pbs +/* + * VSC : Flemish Supercomputing Centre + * Tutorial : Introduction to HPC + * Description: Example program, to compile with MPI + */ +#include +#include +main(int argc, char **argv) +{ + int node, i, j; + float f; + MPI_Init(&argc,&argv); + MPI_Comm_rank(MPI_COMM_WORLD, &node); + + printf("Hello World from Node %d.\n", node); + for (i=0; i<=100000; i++) + f=i*2.718281828*i+i+i*3.141592654; + MPI_Finalize(); +} +The "mpi_hello.c" program is a simple source file, written in C with MPI +library calls. +Then, check the command line options for *"mpicc" (GNU C-Compiler with +MPI extensions)*, then we compile and list the contents of the directory +again: +mpicc --help +mpicc -o mpihello mpihello.c +ls -l +A new file "hello" has been created. Note that this program has +"execute" rights. +Let's test this program on the "login" node first: +$ ./mpihello +Hello World from Node 0. +It seems to work, now run it on the HPC. +qsub mpihello.pbs +Compiling a parallel program in Intel Parallel Studio Cluster Edition +We will now compile the same program, but using the Intel Parallel +Studio Cluster Edition compilers. We stay in the examples directory for +this chapter: +cd ~/examples/Compiling-and-testing-your-software-on-the-HPC diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4_metadata.json new file mode 100644 index 000000000000..d5416e5b7bc0 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_4_metadata.json @@ -0,0 +1,12 @@ +{ + "main_title": "compiling_your_software", + "subtitle": "Compiling-a-parallel-program-in-Intel-Parallel-Studio-Cluster-Edition", + "source_file": "../../mkdocs/docs/HPC/compiling_your_software.md", + "title_depth": 3, + "directory": "compiling_your_software", + "parent_title": "", + "previous_title": "compiling_your_software_paragraph_3", + "next_title": "compiling_your_software_paragraph_5", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-a-parallel-program-in-intel-parallel-studio-cluster-edition" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5.txt new file mode 100644 index 000000000000..a94d986ff58a --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5.txt @@ -0,0 +1,28 @@ +We will compile this C/MPI -file into an executable with the Intel +Parallel Studio Cluster Edition. First, clear the modules (purge) and +then load the latest "intel" module: +module purge +module load intel +Then, compile and list the contents of the directory again. The Intel +equivalent of mpicc is mpiicc. +mpiicc -o mpihello mpihello.c +ls -l +Note that the old "mpihello" file has been overwritten. Let's test this +program on the "login" node first: +$ ./mpihello +Hello World from Node 0. +It seems to work, now run it on the HPC. +qsub mpihello.pbs +Note: The AUGent only has a license for the Intel Parallel Studio Cluster +Edition for a fixed number of users. As such, it might happen that you +have to wait a few minutes before a floating license becomes available +for your use. +Note: The Intel Parallel Studio Cluster Edition contains equivalent +compilers for all GNU compilers. Hereafter the overview for C, C++ and +Fortran compilers. +| | Sequential Program | | **Parallel Program (with MPI)** | | +|-------------|------------------------|-----------|---------------------------------|-----------| +| | GNU | Intel | GNU | Intel | +| C | gcc | icc | mpicc | mpiicc | +| **C++** | g++ | icpc | mpicxx | mpiicpc | +| Fortran | gfortran | ifort | mpif90 | mpiifort | \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5_metadata.json new file mode 100644 index 000000000000..6ba11883f8be --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/compiling_your_software/compiling_your_software_paragraph_5_metadata.json @@ -0,0 +1,12 @@ +{ + "main_title": "compiling_your_software", + "subtitle": "Compiling-a-parallel-program-in-Intel-Parallel-Studio-Cluster-Edition", + "source_file": "../../mkdocs/docs/HPC/compiling_your_software.md", + "title_depth": 3, + "directory": "compiling_your_software", + "parent_title": "", + "previous_title": "compiling_your_software_paragraph_4", + "next_title": null, + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/compiling_your_software/#compiling-a-parallel-program-in-intel-parallel-studio-cluster-edition" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1.txt new file mode 100644 index 000000000000..53bee49de3fb --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1.txt @@ -0,0 +1,39 @@ +Connecting to the HPC infrastructure +Before you can really start using the HPC clusters, there are several things +you need to do or know: +1. You need to log on to the cluster using an SSH client to one of + the login nodes or by using the HPC web portal. + This will give you command-line access. + A standard web browser like Firefox or Chrome for the web portal will suffice. +2. Before you can do some work, you'll have to transfer the files + that you need from your desktop computer to the cluster. At the end + of a job, you might want to transfer some files back. +3. Optionally, if you wish to use programs with a **graphical user + interface**, you will need an X-server on your client system and log + in to the login nodes with X-forwarding enabled. +4. Often several versions of software packages and libraries are + installed, so you need to select the ones you need. To manage + different versions efficiently, the VSC clusters use so-called + modules, so you will need to select and load the modules that + you need. +Connection restrictions +Since March 20th 2020, restrictions are in place that limit from where +you can connect to the VSC HPC infrastructure, in response to security +incidents involving several European HPC centres. +VSC login nodes are only directly accessible from within university +networks, and from (most) Belgian commercial internet providers. +All other IP domains are blocked by default. If you are connecting from +an IP address that is not allowed direct access, you have the following +options to get access to VSC login nodes: +- Use an VPN connection to connect to UGent the network (recommended). See https://helpdesk.ugent.be/vpn/en/ for more information. +- Whitelist your IP address automatically by accessing + https://firewall.vscentrum.be and log in with your UGent account. + - While this web connection is active new SSH sessions can be + started. + - Active SSH sessions will remain active even when this web page + is closed. +- Contact your HPC support team (via hpc@ugent.be) and ask them to whitelist your + IP range (e.g., for industry access, automated processes). +Trying to establish an SSH connection from an IP address that does not +adhere to these restrictions will result in an immediate failure to +connect, with an error message like: diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1_metadata.json new file mode 100644 index 000000000000..450a04e1aa8a --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_1_metadata.json @@ -0,0 +1,15 @@ +{ + "main_title": "connecting", + "subtitle": "Connection-restrictions", + "source_file": "../../mkdocs/docs/HPC/connecting.md", + "title_depth": 2, + "directory": "connecting", + "links": { + "0": "https://docs.hpc.ugent.be/web_portal" + }, + "parent_title": "", + "previous_title": null, + "next_title": "connecting_paragraph_2", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/connecting/#connection-restrictions" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_4.txt new file mode 100644 index 000000000000..4c910d86d35f --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_4.txt @@ -0,0 +1,35 @@ +This directory contains: +1. This HPC Tutorial (in either a Mac, Linux or Windows version). +2. An examples subdirectory, containing all the examples that you need in this + Tutorial, as well as examples that might be useful for your specific applications. +cd examples + tip + Typing cd ex followed by tab (the Tab-key) will generate the cd examples + command. Command-line completion (also tab completion) is a common feature of the bash command + line interpreter, in which the program automatically fills in partially + typed commands. + tip + For more exhaustive tutorials about Linux usage, see Appendix Useful Linux Commands +The first action is to copy the contents of the HPC examples directory to +your home directory, so that you have your own personal copy and that +you can start using the examples. The "-r" option of the copy command +will also copy the contents of the sub-directories "recursively". +cp -r /apps/gent/tutorials/Intro-HPC/examples ~/ +Go to your home directory, check your own private examples directory, ...Ā and start working. +cd +ls -l +Upon connecting you will see a login message containing your last login time stamp and a basic overview of the current cluster utilisation. +Last login: Thu Mar 18 13:15:09 2021 from gligarha02.gastly.os + STEVIN HPC-UGent infrastructure status on Mon, 19 Feb 2024 10:00:01 + cluster - full - free - part - total - running - queued + nodes nodes free nodes jobs jobs + ------------------------------------------------------------------------- + skitty 39 0 26 68 1839 5588 + joltik 6 0 1 10 29 18 + doduo 22 0 75 128 1397 11933 + accelgor 4 3 2 9 18 1 + donphan 0 0 16 16 16 13 + gallade 2 0 5 16 19 136 +For a full view of the current loads and queues see: +https://hpc.ugent.be/clusterstate/ +Updates on current system status and planned maintenance can be found on https://www.ugent.be/hpc/en/infrastructure/status diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_4_metadata.json new file mode 100644 index 000000000000..faaf9fdf9c2e --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_4_metadata.json @@ -0,0 +1,15 @@ +{ + "main_title": "connecting", + "subtitle": "First-Time-connection-to-the-HPC-infrastructure", + "source_file": "../../mkdocs/docs/HPC/connecting.md", + "title_depth": 2, + "directory": "connecting", + "links": { + "0": "https://docs.hpc.ugent.be/useful_linux_commands" + }, + "parent_title": "", + "previous_title": "connecting_paragraph_3", + "next_title": "connecting_paragraph_5", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/connecting/#first-time-connection-to-the-hpc-infrastructure" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9.txt new file mode 100644 index 000000000000..dd4f3269fb56 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9.txt @@ -0,0 +1,11 @@ +If you want to find out which login host you are connected to, you can use the hostname command. +$ hostname +gligar07.gastly.os +$ ssh gligar08.gastly.os +$ hostname +gligar08.gastly.os +Rather than always starting a new session on the HPC, you can also use a terminal multiplexer like screen or tmux. +These can make sessions that 'survives' across disconnects. +You can find more information on how to use these tools here (or on other online sources): +- screen +- tmux \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9_metadata.json new file mode 100644 index 000000000000..7aafa76a65f4 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/connecting/connecting_paragraph_9_metadata.json @@ -0,0 +1,16 @@ +{ + "main_title": "connecting", + "subtitle": "Changing-login-nodes", + "source_file": "../../mkdocs/docs/HPC/connecting.md", + "title_depth": 2, + "directory": "connecting", + "links": { + "0": "https://www.howtogeek.com/662422/how-to-use-linuxs-screen-command/", + "1": "https://www.howtogeek.com/671422/how-to-use-tmux-on-linux-and-why-its-better-than-screen/" + }, + "parent_title": "", + "previous_title": "connecting_paragraph_8", + "next_title": null, + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/connecting/#changing-login-nodes" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/crontab/crontab_paragraph_1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/crontab/crontab_paragraph_1.txt new file mode 100644 index 000000000000..12d31c1677bc --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/crontab/crontab_paragraph_1.txt @@ -0,0 +1,28 @@ +Cron scripts +Cron scripts configuration +It is possible to run automated cron scripts as regular user on the +Ugent login nodes. Due to the high availability setup users should add +their cron scripts on the same login node to avoid any cron job script +duplication. +In order to create a new cron script first login to HPC-UGent login node +as usual with your vsc user's account (see section +Connecting). +Check if any cron script is already set in the current login node with: +crontab -l +At this point you can add/edit (with vi editor) any cron script +running the command: +crontab -e +Example cron job script + 15 5 * * * ~/runscript.sh >& ~/job.out +where runscript.sh has these lines in this example: +#!/bin/bash +module swap cluster/donphan +export SLURM_CLUSTERS="donphan" +/usr/libexec/jobcli/qsub ~/job_scripts/test.sh >& ~/job.out +In the previous example a cron script was set to be executed every day +at 5:15 am. More information about crontab and cron scheduling format at +https://www.redhat.com/sysadmin/automate-linux-tasks-cron. +Please note that you should login into the same login node to edit your +previously generated crontab tasks. If that is not the case you can +always jump from one login node to another with: +ssh gligar07 # or gligar08 diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/crontab/crontab_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/crontab/crontab_paragraph_1_metadata.json new file mode 100644 index 000000000000..e55974cb0c4c --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/crontab/crontab_paragraph_1_metadata.json @@ -0,0 +1,15 @@ +{ + "main_title": "crontab", + "subtitle": "Example-cron-job-script", + "source_file": "../../mkdocs/docs/HPC/crontab.md", + "title_depth": 4, + "directory": "crontab", + "links": { + "0": "https://docs.hpc.ugent.be/connecting/#connecting-to-the-hpc-infrastructure" + }, + "parent_title": "", + "previous_title": null, + "next_title": null, + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/crontab/#example-cron-job-script" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/easybuild/easybuild_paragraph_1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/easybuild/easybuild_paragraph_1.txt new file mode 100644 index 000000000000..60ac3607af20 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/easybuild/easybuild_paragraph_1.txt @@ -0,0 +1,44 @@ +Easybuild +What is Easybuild? +You can use EasyBuild to build and install supported software in your +own VSC account, rather than requesting a central installation by the +HPC support team. +EasyBuild (https://easybuilders.github.io/easybuild) is the software +build and installation framework that was created by the HPC-UGent team, +and has recently been picked up by HPC sites around the world. It allows +you to manage (scientific) software on High Performance Computing (HPC) +systems in an efficient way. +When should I use Easybuild? +For general software installation requests, please see I want to use software that is not available on the clusters yet. However, there +might be reasons to install the software yourself: +- applying custom patches to the software that only you or your group + are using +- evaluating new software versions prior to requesting a central + software installation +- installing (very) old software versions that are no longer eligible + for central installation (on new clusters) +Configuring EasyBuild +Before you use EasyBuild, you need to configure it: +Path to sources +This is where EasyBuild can find software sources: +EASYBUILD_SOURCEPATH=$VSC_DATA/easybuild/sources:/apps/gent/source +- the first directory $VSC_DATA/easybuild/sources is where EasyBuild + will (try to) automatically download sources if they're not + available yet +- /apps/gent/source is the central "cache" for already downloaded + sources, and will be considered by EasyBuild before downloading + anything +Build directory +This directory is where EasyBuild will build software in. To have good +performance, this needs to be on a fast filesystem. +export EASYBUILD_BUILDPATH=${TMPDIR:-/tmp/$USER} +On cluster nodes, you can use the fast, in-memory /dev/shm/$USER +location as a build directory. +Software install location +This is where EasyBuild will install the software (and accompanying +modules) to. +For example, to let it use $VSC_DATA/easybuild, use: +export EASYBUILD_INSTALLPATH=$VSC_DATA/easybuild/$VSC_OS_LOCAL/$VSC_ARCH_LOCAL$VSC_ARCH_SUFFIX +Using the $VSC_OS_LOCAL, $VSC_ARCH and $VSC_ARCH_SUFFIX +environment variables ensures that your install software to a location +that is specific to the cluster you are building for. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/easybuild/easybuild_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/easybuild/easybuild_paragraph_1_metadata.json new file mode 100644 index 000000000000..fee2b54bb4b5 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/easybuild/easybuild_paragraph_1_metadata.json @@ -0,0 +1,15 @@ +{ + "main_title": "easybuild", + "subtitle": "Software-install-location", + "source_file": "../../mkdocs/docs/HPC/easybuild.md", + "title_depth": 3, + "directory": "easybuild", + "links": { + "0": "https://docs.hpc.ugent.be/FAQ/#i-want-to-use-software-that-is-not-available-on-the-clusters-yet" + }, + "parent_title": "", + "previous_title": null, + "next_title": "easybuild_paragraph_2", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/easybuild/#software-install-location" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/easybuild/easybuild_paragraph_2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/easybuild/easybuild_paragraph_2.txt new file mode 100644 index 000000000000..9136e5638918 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/easybuild/easybuild_paragraph_2.txt @@ -0,0 +1,33 @@ +Make sure you do not build software on the login nodes, since the loaded cluster module determines the +location of the installed software. Software built on the login nodes +may not work on the cluster you want to use the software on (see also Running software that is incompatible with host). +To share custom software installations with members of your VO, replace +$VSC_DATA with $VSC_DATA_VO in the example above. +Using EasyBuild +Before using EasyBuild, you first need to load the EasyBuild module. +We don't specify a version here (this is an exception, for most other +modules you should see Using explicit version numbers) because newer versions might include important +bug fixes. +module load EasyBuild +Installing supported software +EasyBuild provides a large collection of readily available software +versions, combined with a particular toolchain version. Use the +--search (or -S) functionality to see which different 'easyconfigs' +(build recipes, see +http://easybuild.readthedocs.org/en/latest/Concepts_and_Terminology.html#easyconfig-files) are available: +$ eb -S example-1.2 +CFGS1=/apps/gent/CO7/sandybridge/software/EasyBuild/3.6.2/lib/python2.7/site-packages/easybuild_easyconfigs-3.6.2-py2.7.egg/easybuild/easyconfigs + * $CFGS1/e/example/example-1.2.1-foss-a.eb + * $CFGS1/e/example/example-1.2.3-foss-b.eb + * $CFGS1/e/example/example-1.2.5-intel-a.eb +For readily available easyconfigs, just specify the name of the +easyconfig file to build and install the corresponding software package: +eb example-1.2.1-foss-2024a.eb --robot +Installing variants on supported software +To install small variants on supported software, e.g., a different +software version, or using a different compiler toolchain, use the +corresponding --try-X options: +To try to install example v1.2.6, based on the easyconfig file for +example v1.2.5: +eb example-1.2.5-intel-a.eb --try-software-version=1.2.6 +To try to install example v1.2.5 with a different compiler toolchain: diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/easybuild/easybuild_paragraph_2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/easybuild/easybuild_paragraph_2_metadata.json new file mode 100644 index 000000000000..5927768b87d1 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/easybuild/easybuild_paragraph_2_metadata.json @@ -0,0 +1,16 @@ +{ + "main_title": "easybuild", + "subtitle": "Installing-variants-on-supported-software", + "source_file": "../../mkdocs/docs/HPC/easybuild.md", + "title_depth": 3, + "directory": "easybuild", + "links": { + "0": "https://docs.hpc.ugent.be/troubleshooting/#running-software-that-is-incompatible-with-host", + "1": "https://docs.hpc.ugent.be/running_batch_jobs/#using-explicit-version-numbers" + }, + "parent_title": "", + "previous_title": "easybuild_paragraph_1", + "next_title": "easybuild_paragraph_3", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/easybuild/#installing-variants-on-supported-software" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/easybuild/easybuild_paragraph_3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/easybuild/easybuild_paragraph_3.txt new file mode 100644 index 000000000000..148b07535086 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/easybuild/easybuild_paragraph_3.txt @@ -0,0 +1,18 @@ +eb example-1.2.5-intel-a.eb --robot --try-toolchain=intel,b +Install other software +To install other, not yet supported, software, you will need to provide +the required easyconfig files yourself. See +https://easybuild.readthedocs.org/en/latest/Writing_easyconfig_files.html +for more information. +Using the installed modules +To use the modules you installed with EasyBuild, extend $MODULEPATH to +make them accessible for loading: +module use $EASYBUILD_INSTALLPATH/modules/all +It makes sense to put this module use command and all export +commands in your .bashrc login script. +That way, you don't have to type +these commands every time you want to use EasyBuild or you want to load +modules generated with EasyBuild. +See also [the section on .bashrc in +the "Beyond the basics" chapter of the intro to +Linux](../linux-tutorial/beyond_the_basics/#bashrc-login-script) diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/easybuild/easybuild_paragraph_3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/easybuild/easybuild_paragraph_3_metadata.json new file mode 100644 index 000000000000..36db9a90c13c --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/easybuild/easybuild_paragraph_3_metadata.json @@ -0,0 +1,12 @@ +{ + "main_title": "easybuild", + "subtitle": "Using-the-installed-modules", + "source_file": "../../mkdocs/docs/HPC/easybuild.md", + "title_depth": 2, + "directory": "easybuild", + "parent_title": "", + "previous_title": "easybuild_paragraph_2", + "next_title": null, + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/easybuild/#using-the-installed-modules" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_1.txt new file mode 100644 index 000000000000..3cebfd9b5717 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_1.txt @@ -0,0 +1,42 @@ +Fine-tuning Job Specifications +[//]: # (ch:fine-tuning-job-specifications) +As HPC system administrators, we often observe that the HPC resources are not +optimally (or wisely) used. For example, we regularly notice that +several cores on a computing node are not utilised, due to the fact that +one sequential program uses only one core on the node. Or users run I/O +intensive applications on nodes with "slow" network connections. +Users often tend to run their jobs without specifying specific PBS Job +parameters. As such, their job will automatically use the default +parameters, which are not necessarily (or rarely) the optimal ones. This +can slow down the run time of your application, but also block HPC resources +for other users. +Specifying the "optimal" Job Parameters requires some knowledge of your +application (e.g., how many parallel threads does my application use, +is there a lot of inter-process communication, how much memory does my +application need) and also some knowledge about the HPC infrastructure +(e.g., what kind of multi-core processors are available, which nodes +have InfiniBand). +There are plenty of monitoring tools on Linux available to the user, +which are useful to analyse your individual application. The HPC environment +as a whole often requires different techniques, metrics and time goals, +which are not discussed here. We will focus on tools that can help to +optimise your Job Specifications. +Determining the optimal computer resource specifications can be broken +down into different parts. The first is actually determining which +metrics are needed and then collecting that data from the hosts. Some of +the most commonly tracked metrics are CPU usage, memory consumption, +network bandwidth, and disk I/O stats. These provide different +indications of how well a system is performing, and may indicate where +there are potential problems or performance bottlenecks. Once the data +have actually been acquired, the second task is analysing the data and +adapting your PBS Job Specifications. +Another different task is to monitor the behaviour of an application at +run time and detect anomalies or unexpected behaviour. Linux provides a +large number of utilities to monitor the performance of its components. +This chapter shows you how to measure: +1. Walltime +2. Memory usage +3. CPU usage +4. Disk (storage) needs +5. Network bottlenecks +First, we allocate a compute node and move to our relevant directory: diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_1_metadata.json new file mode 100644 index 000000000000..ace4c7c97143 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_1_metadata.json @@ -0,0 +1,12 @@ +{ + "main_title": "fine_tuning_job_specifications", + "subtitle": "Fine-tuning-Job-Specifications", + "source_file": "../../mkdocs/docs/HPC/fine_tuning_job_specifications.md", + "title_depth": 1, + "directory": "fine_tuning_job_specifications", + "parent_title": "", + "previous_title": null, + "next_title": "fine_tuning_job_specifications_paragraph_2", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/fine_tuning_job_specifications/#fine-tuning-job-specifications" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_2.txt new file mode 100644 index 000000000000..c34e4227576d --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_2.txt @@ -0,0 +1,44 @@ +qsub -I +cd ~/examples/Fine-tuning-Job-Specifications +Specifying Walltime +[//]: # (sec:specifying-walltime-requirements) +One of the most important and also easiest parameters to measure is the +duration of your program. This information is needed to specify the +walltime. +The time utility executes and times your application. You can just add the time +command in front of your normal command line, including your command +line options. After your executable has finished, time writes the total time +elapsed, the time consumed by system overhead, and the time used to +execute your executable to the standard error stream. The calculated +times are reported in seconds. +Test the time command: +$ time sleep 75 +real 1m15.005s +user 0m0.001s +sys 0m0.002s +It is a good practice to correctly estimate and specify the run time +(duration) of an application. Of course, a margin of 10% to 20% can be +taken to be on the safe side. +It is also wise to check the walltime on different compute nodes or to +select the "slowest" compute node for your walltime tests. Your estimate +should be appropriate in case your application will run on the "slowest" +(oldest) compute nodes. +The walltime can be specified in a job scripts as: +#PBS -l walltime=3:00:00:00 +or on the command line +qsub -l walltime=3:00:00:00 +It is recommended to always specify the walltime for a job. +Specifying memory requirements +[//]: # (sec:specifying-memory-requirements) +In many situations, it is useful to monitor the amount of memory an +application is using. You need this information to determine the +characteristics of the required compute node, where that application +should run on. Estimating the amount of memory an application will use +during execution is often non-trivial, especially when one uses +third-party software. +Available Memory on the machine +The first point is to be aware of the available free memory in your +computer. The "free" command displays the total amount of free and +used physical and swap memory in the system, as well as the buffers used +by the kernel. We also use the options "-m" to see the results expressed +in Mega-Bytes and the "-t" option to get totals. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_2_metadata.json new file mode 100644 index 000000000000..39460947d0d3 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_2_metadata.json @@ -0,0 +1,12 @@ +{ + "main_title": "fine_tuning_job_specifications", + "subtitle": "Available-Memory-on-the-machine", + "source_file": "../../mkdocs/docs/HPC/fine_tuning_job_specifications.md", + "title_depth": 3, + "directory": "fine_tuning_job_specifications", + "parent_title": "", + "previous_title": "fine_tuning_job_specifications_paragraph_1", + "next_title": "fine_tuning_job_specifications_paragraph_3", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/fine_tuning_job_specifications/#available-memory-on-the-machine" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_3.txt new file mode 100644 index 000000000000..6ca183af09a0 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_3.txt @@ -0,0 +1,49 @@ +$ free -m -t + total used free shared buffers cached +Mem: 16049 4772 11277 0 107 161 +-/+ buffers/cache: 4503 11546 +Swap: 16002 4185 11816 +Total: 32052 8957 23094 +Important is to note the total amount of memory available in the machine +(i.e., 16 GB in this example) and the amount of used and free memory +(i.e., 4.7 GB is used and another 11.2 GB is free here). +It is not a good practice to use swap-space for your computational +applications. A lot of "swapping" can increase the execution time of +your application tremendously. +On the UGent clusters, there is no swap space available for jobs, you +can only use physical memory, even though "free" will show swap. +Checking the memory consumption +To monitor the memory consumption of a running application, you can use the "top" or the "htop" command. +top +: provides an ongoing look at processor activity in real time. It + displays a listing of the most CPU-intensive tasks on the system, + and can provide an interactive interface for manipulating processes. + It can sort the tasks by memory usage, CPU usage and run time. +htop +: is similar to top, but shows the CPU-utilisation for all the CPUs in + the machine and allows to scroll the list vertically and + horizontally to see all processes and their full command lines. +Setting the memory parameter {: #pbs_mem } +Once you gathered a good idea of the overall memory consumption of your +application, you can define it in your job script. It is wise to foresee +a margin of about 10%. +The maximum amount of physical memory used by the job per node can be +specified in a job script as: +#PBS -l mem=4gb +or on the command line +qsub -l mem=4gb +Specifying processors requirements +Users are encouraged to fully utilise all the available cores on a +certain compute node. Once the required numbers of cores and nodes are +decently specified, it is also good practice to monitor the CPU +utilisation on these cores and to make sure that all the assigned nodes +are working at full load. +Number of processors +The number of core and nodes that a user shall request fully depends on +the architecture of the application. Developers design their +applications with a strategy for parallelization in mind. The +application can be designed for a certain fixed number or for a +configurable number of nodes and cores. It is wise to target a specific +set of compute nodes (e.g., Westmere, Harpertown) for your computing +work and then to configure your software to nicely fill up all +processors on these compute nodes. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_3_metadata.json new file mode 100644 index 000000000000..6eca439dceab --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_3_metadata.json @@ -0,0 +1,12 @@ +{ + "main_title": "fine_tuning_job_specifications", + "subtitle": "Number-of-processors", + "source_file": "../../mkdocs/docs/HPC/fine_tuning_job_specifications.md", + "title_depth": 3, + "directory": "fine_tuning_job_specifications", + "parent_title": "", + "previous_title": "fine_tuning_job_specifications_paragraph_2", + "next_title": "fine_tuning_job_specifications_paragraph_4", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/fine_tuning_job_specifications/#number-of-processors" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_4.txt new file mode 100644 index 000000000000..7fdb9a8f6a7b --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_4.txt @@ -0,0 +1,60 @@ +The /proc/cpuinfo stores info about your CPU architecture like number +of CPUs, threads, cores, information about CPU caches, CPU family, model +and much more. So, if you want to detect how many cores are available on +a specific machine: +$ less /proc/cpuinfo +processor : 0 +vendor_id : GenuineIntel +cpu family : 6 +model : 23 +model name : Intel(R) Xeon(R) CPU E5420 @ 2.50GHz +stepping : 10 +cpu MHz : 2500.088 +cache size : 6144 KB +... +Or if you want to see it in a more readable format, execute: +$ grep processor /proc/cpuinfo +processor : 0 +processor : 1 +processor : 2 +processor : 3 +processor : 4 +processor : 5 +processor : 6 +processor : 7 + note + Unless you want information of the login nodes, you'll have to issue + these commands on one of the workernodes. This is most easily achieved + in an interactive job, see the chapter on Running interactive jobs. +In order to specify the number of nodes and the number of processors per +node in your job script, use: +#PBS -l nodes=N:ppn=M +or with equivalent parameters on the command line +qsub -l nodes=N:ppn=M +This specifies the number of nodes (nodes=N) and the number of +processors per node (ppn=M) that the job should use. PBS treats a +processor core as a processor, so a system with eight cores per compute +node can have ppn=8 as its maximum ppn request. +You can also use this statement in your job script: +#PBS -l nodes=N:ppn=all +to request all cores of a node, or +#PBS -l nodes=N:ppn=half +to request half of them. +Note that unless a job has some inherent parallelism of its own through +something like MPI or OpenMP, requesting more than a single processor on +a single node is usually wasteful and can impact the job start time. +Monitoring the CPU-utilisation +This could also be monitored with the htop command: +htop +Example output: + 1 [||| 11.0%] 5 [|| 3.0%] 9 [|| 3.0%] 13 [ 0.0%] + 2 [|||||100.0%] 6 [ 0.0%] 10 [ 0.0%] 14 [ 0.0%] + 3 [|| 4.9%] 7 [|| 9.1%] 11 [ 0.0%] 15 [ 0.0%] + 4 [|| 1.8%] 8 [ 0.0%] 12 [ 0.0%] 16 [ 0.0%] + Mem[|||||||||||||||||59211/64512MB] Tasks: 323, 932 thr; 2 running + Swp[|||||||||||| 7943/20479MB] Load average: 1.48 1.46 1.27 + Uptime: 211 days(!), 22:12:58 + PID USER PRI NI VIRT RES SHR S CPU% MEM% TIME+ Command +22350 vsc00000 20 0 1729M 1071M 704 R 98.0 1.7 27:15.59 bwa index + 7703 root 0 -20 10.1G 1289M 70156 S 11.0 2.0 36h10:11 /usr/lpp/mmfs/bin +27905 vsc00000 20 0 123M 2800 1556 R 7.0 0.0 0:17.51 htop diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_4_metadata.json new file mode 100644 index 000000000000..fa856402fed1 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_4_metadata.json @@ -0,0 +1,15 @@ +{ + "main_title": "fine_tuning_job_specifications", + "subtitle": "Monitoring-the-CPU-utilisation", + "source_file": "../../mkdocs/docs/HPC/fine_tuning_job_specifications.md", + "title_depth": 3, + "directory": "fine_tuning_job_specifications", + "links": { + "0": "https://docs.hpc.ugent.be/running_interactive_jobs" + }, + "parent_title": "", + "previous_title": "fine_tuning_job_specifications_paragraph_3", + "next_title": "fine_tuning_job_specifications_paragraph_5", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/fine_tuning_job_specifications/#monitoring-the-cpu-utilisation" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_5.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_5.txt new file mode 100644 index 000000000000..20c96d20fc70 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_5.txt @@ -0,0 +1,42 @@ +The advantage of htop is that it shows you the cpu utilisation for all +processors as well as the details per application. A nice exercise is to +start 4 instances of the "cpu_eat" program in 4 different terminals, and +inspect the cpu utilisation per processor with monitor and htop. +If htop reports that your program is taking 75% CPU on a certain processor, +it means that 75% of the samples taken by top found your process active +on the CPU. The rest of the time your application was in a wait. (It is +important to remember that a CPU is a discrete state machine. It really +can be at only 100%, executing an instruction, or at 0%, waiting for +something to do. There is no such thing as using 45% of a CPU. The CPU +percentage is a function of time.) However, it is likely that your +application's rest periods include waiting to be dispatched on a CPU and +not on external devices. That part of the wait percentage is then very +relevant to understanding your overall CPU usage pattern. +Fine-tuning your executable and/or job script +It is good practice to perform a number of run time stress tests, and to +check the CPU utilisation of your nodes. We (and all other users of the +HPC) would appreciate that you use the maximum of the CPU resources that +are assigned to you and make sure that there are no CPUs in your node +who are not utilised without reasons. +But how can you maximise? +1. Configure your software. (e.g., to exactly use the available amount + of processors in a node) +2. Develop your parallel program in a smart way. +3. Demand a specific type of compute node (e.g., Harpertown, Westmere), + which have a specific number of cores. +4. Correct your request for CPUs in your job script. +The system load +On top of the CPU utilisation, it is also important to check the system load. +The system load is a measure of the amount of computational work that a computer +system performs. +The system load is the number of applications running or waiting to run +on the compute node. In a system with for example four CPUs, a load +average of 3.61 would indicate that there were, on average, 3.61 +processes ready to run, and each one could be scheduled into a CPU. +The load averages differ from CPU percentage in two significant ways: +1. "load averages" measure the trend of processes waiting to be run + (and not only an instantaneous snapshot, as does CPU percentage); + and +2. "load averages" include all demand for all resources, e.g., CPU + and also I/O and network (and not only how much was active at the + time of measurement). diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_5_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_5_metadata.json new file mode 100644 index 000000000000..b0f65667858f --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_5_metadata.json @@ -0,0 +1,12 @@ +{ + "main_title": "fine_tuning_job_specifications", + "subtitle": "The-system-load", + "source_file": "../../mkdocs/docs/HPC/fine_tuning_job_specifications.md", + "title_depth": 2, + "directory": "fine_tuning_job_specifications", + "parent_title": "", + "previous_title": "fine_tuning_job_specifications_paragraph_4", + "next_title": "fine_tuning_job_specifications_paragraph_6", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/fine_tuning_job_specifications/#the-system-load" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_6.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_6.txt new file mode 100644 index 000000000000..b38d226db361 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_6.txt @@ -0,0 +1,38 @@ +Optimal load +What is the "optimal load" rule of thumb? +The load averages tell us whether our physical CPUs are over- or +under-utilised. The point of perfect utilisation, meaning that the CPUs are always busy and, yet, no +process ever waits for one, is **the +average matching the number of CPUs**. Your load should not exceed the number +of cores available. E.g., if there are four CPUs on a machine and the +reported one-minute load average is 4.00, the machine has been utilising +its processors perfectly for the last 60 seconds. The "100% utilisation" +mark is 1.0 on a single-core system, 2.0 on a dual-core, 4.0 on a +quad-core, etc. The optimal load shall be between 0.7 and 1.0 per +processor. +In general, the intuitive idea of load averages is the higher they rise +above the number of processors, the more processes are waiting and doing +nothing, and the lower they fall below the number of processors, the +more untapped CPU capacity there is. +Load averages do include any processes or threads waiting on I/O, +networking, databases or anything else not demanding the CPU. This means +that the optimal number of applications running on a system at the +same time, might be more than one per processor. +The "optimal number of applications" running on one machine at the same time depends on the type of +the applications that you are running. +1. When you are running computational intensive applications, one application per processor will generate + the optimal load. +2. For I/O intensive applications (e.g., applications which perform a lot of disk-I/O), a higher + number of applications can generate the optimal load. While some + applications are reading or writing data on disks, the processors + can serve other applications. +The optimal number of applications on a machine could be empirically +calculated by performing a number of stress tests, whilst checking the +highest throughput. There is however no manner in the HPC at the moment to +specify the maximum number of applications that shall run per core +dynamically. The HPC scheduler will not launch more than one process per +core. +The manner how the cores are spread out over CPUs does not matter for +what regards the load. Two quad-cores perform similar to four +dual-cores, and again perform similar to eight single-cores. It's all +eight cores for these purposes. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_6_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_6_metadata.json new file mode 100644 index 000000000000..4a3b26ca3c25 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_6_metadata.json @@ -0,0 +1,12 @@ +{ + "main_title": "fine_tuning_job_specifications", + "subtitle": "Optimal-load", + "source_file": "../../mkdocs/docs/HPC/fine_tuning_job_specifications.md", + "title_depth": 3, + "directory": "fine_tuning_job_specifications", + "parent_title": "", + "previous_title": "fine_tuning_job_specifications_paragraph_5", + "next_title": "fine_tuning_job_specifications_paragraph_7", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/fine_tuning_job_specifications/#optimal-load" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_7.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_7.txt new file mode 100644 index 000000000000..6959f61d299a --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_7.txt @@ -0,0 +1,42 @@ +Monitoring the load +The load average represents the average system load over a period of time. It +conventionally appears in the form of three numbers, which represent the +system load during the last one-, five-, and fifteen-minute periods. +The uptime command will show us the average load +$ uptime +10:14:05 up 86 days, 12:01, 11 users, load average: 0.60, 0.41, 0.41 +Now, compile and start a few instances of the "eat_cpu" program in the background, +and check the effect on the load again: +$ gcc -O2 eat_cpu.c -o eat_cpu +$ ./eat_cpu& +$ ./eat_cpu& +$ ./eat_cpu& +$ uptime +10:14:42 up 86 days, 12:02, 11 users, load average: 2.60, 0.93, 0.58 +You can also read it in the htop command. +Fine-tuning your executable and/or job script +It is good practice to perform a number of run time stress tests, and to +check the system load of your nodes. We (and all other users of the HPC) +would appreciate that you use the maximum of the CPU resources that are +assigned to you and make sure that there are no CPUs in your node who +are not utilised without reasons. +But how can you maximise? +1. Profile your software to improve its performance. +2. Configure your software (e.g., to exactly use the available amount + of processors in a node). +3. Develop your parallel program in a smart way, so that it fully + utilises the available processors. +4. Demand a specific type of compute node (e.g., Harpertown, Westmere), + which have a specific number of cores. +5. Correct your request for CPUs in your job script. +And then check again. +Checking File sizes & Disk I/O +Monitoring File sizes during execution +Some programs generate intermediate or output files, the size of which +may also be a useful metric. +Remember that your available disk space on the HPC online storage is +limited, and that you have environment variables which point to these +directories available (i.e., *$VSC_DATA*, *$VSC_SCRATCH* and +*$VSC_DATA*). On top of those, you can also access some temporary +storage (i.e., the /tmp directory) on the compute node, which is defined +by the *$VSC_SCRATCH_NODE* environment variable. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_7_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_7_metadata.json new file mode 100644 index 000000000000..f5295cfe3ce0 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_7_metadata.json @@ -0,0 +1,12 @@ +{ + "main_title": "fine_tuning_job_specifications", + "subtitle": "Monitoring-File-sizes-during-execution", + "source_file": "../../mkdocs/docs/HPC/fine_tuning_job_specifications.md", + "title_depth": 3, + "directory": "fine_tuning_job_specifications", + "parent_title": "", + "previous_title": "fine_tuning_job_specifications_paragraph_6", + "next_title": "fine_tuning_job_specifications_paragraph_8", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/fine_tuning_job_specifications/#monitoring-file-sizes-during-execution" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_8.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_8.txt new file mode 100644 index 000000000000..2d28c8b96a01 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_8.txt @@ -0,0 +1,33 @@ +It is important to be aware of the sizes of the file that will be +generated, as the available disk space for each user is limited. We +refer to section +How much disk space do I get? on Quotas to check your +quota and tools to find which files consumed the "quota". +Several actions can be taken, to avoid storage problems: +1. Be aware of all the files that are generated by your program. Also + check out the hidden files. +2. Check your quota consumption regularly. +3. Clean up your files regularly. +4. First work (i.e., read and write) with your big files in the local + /tmp directory. Once finished, you can move your files once to the + VSC_DATA directories. +5. Make sure your programs clean up their temporary files after + execution. +6. Move your output results to your own computer regularly. +7. Anyone can request more disk space to the HPC staff, but you will have + to duly justify your request. +Specifying network requirements +Users can examine their network activities with the htop command. When +your processors are 100% busy, but you see a lot of red bars and only +limited green bars in the htop screen, it is mostly an indication that +they lose a lot of time with inter-process communication. +Whenever your application utilises a lot of inter-process communication +(as is the case in most parallel programs), we strongly recommend to +request nodes with an "InfiniBand" network. The InfiniBand is a +specialised high bandwidth, low latency network that enables large +parallel jobs to run as efficiently as possible. +The parameter to add in your job script would be: +#PBS -l ib +If for some other reasons, a user is fine with the gigabit Ethernet +network, he can specify: +#PBS -l gbe diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_8_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_8_metadata.json new file mode 100644 index 000000000000..601614c5cccb --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/fine_tuning_job_specifications/fine_tuning_job_specifications_paragraph_8_metadata.json @@ -0,0 +1,16 @@ +{ + "main_title": "fine_tuning_job_specifications", + "subtitle": "Specifying-network-requirements", + "source_file": "../../mkdocs/docs/HPC/fine_tuning_job_specifications.md", + "title_depth": 2, + "directory": "fine_tuning_job_specifications", + "links": { + "0": "https://docs.hpc.ugent.be/running_jobs_with_input_output_data/#how-much-disk-space-do-i-get", + "1": "https://docs.hpc.ugent.be/running_jobs_with_input_output_data/#quota" + }, + "parent_title": "", + "previous_title": "fine_tuning_job_specifications_paragraph_7", + "next_title": null, + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/fine_tuning_job_specifications/#specifying-network-requirements" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/getting_started/getting_started_paragraph_3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/getting_started/getting_started_paragraph_3.txt new file mode 100644 index 000000000000..57beab61703c --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/getting_started/getting_started_paragraph_3.txt @@ -0,0 +1,37 @@ +Our job script looks like this: +#!/bin/bash +module load TensorFlow/2.11.0-foss-2022a +python tensorflow_mnist.py +As you can see this job script will run the Python script named tensorflow_mnist.py. +The jobs you submit are per default executed on cluser/doduo, you can swap to another cluster by issuing the following command. +module swap cluster/donphan + Tip + + When submitting jobs with limited amount of resources, it is recommended to use the debug/interactive cluster: donphan. + To get a list of all clusters and their hardware, see https://www.ugent.be/hpc/en/infrastructure. +This job script can now be submitted to the cluster's job system for execution, using the qsub (queue submit) command: +$ qsub run.sh +123456 +This command returns a job identifier (123456) on the HPC cluster. This is a unique identifier for the job which can be used to monitor and manage your job. + Warning "Make sure you understand what the module command does" + + Note that the module commands only modify environment variables. For instance, running module swap cluster/donphan will update your shell environment so that qsub submits a job to the donphan cluster, + but our active shell session is still running on the login node. + + It is important to understand that while module commands affect your session environment, they do not change where the commands your are running are being executed: they will still be run on the login node you are on. + + When you submit a job script however, the commands in the job script will be run on a workernode of the cluster the job was submitted to (like donphan). +For detailed information about module commands, read the running batch jobs chapter. +Wait for job to be executed +Your job is put into a queue before being executed, so it may take a while before it actually starts. +(see when will my job start? for scheduling policy). +You can get an overview of the active jobs using the qstat command: +$ qstat +Job ID Name User Time Use S Queue +---------- ---------------- --------------- -------- - ------- +123456 run.sh vsc40000 0:00:00 Q donphan +Eventually, after entering qstat again you should see that your job has started running: +$ qstat +Job ID Name User Time Use S Queue +---------- ---------------- --------------- -------- - ------- +123456 run.sh vsc40000 0:00:01 R donphan diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/getting_started/getting_started_paragraph_3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/getting_started/getting_started_paragraph_3_metadata.json new file mode 100644 index 000000000000..7b6b1c90dbdf --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/getting_started/getting_started_paragraph_3_metadata.json @@ -0,0 +1,17 @@ +{ + "main_title": "getting_started", + "subtitle": "Wait-for-job-to-be-executed", + "source_file": "../../mkdocs/docs/HPC/getting_started.md", + "title_depth": 3, + "directory": "getting_started", + "links": { + "0": "https://docs.hpc.ugent.be/interactive_debug/#interactive-and-debug-cluster", + "1": "https://docs.hpc.ugent.be/running_batch_jobs", + "2": "https://docs.hpc.ugent.be/running_batch_jobs/#when-will-my-job-start" + }, + "parent_title": "", + "previous_title": "getting_started_paragraph_2", + "next_title": "getting_started_paragraph_4", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/getting_started/#wait-for-job-to-be-executed" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/getting_started/getting_started_paragraph_4.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/getting_started/getting_started_paragraph_4.txt new file mode 100644 index 000000000000..24177a95b150 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/getting_started/getting_started_paragraph_4.txt @@ -0,0 +1,32 @@ +If you don't see your job in the output of the qstat command anymore, your job has likely completed. +Read this section on how to interpret the output. +Inspect your results +When your job finishes it generates 2 output files: +- One for normal output messages (stdout output channel). +- One for warning and error messages (stderr output channel). +By default located in the directory where you issued qsub. + Info + For more information about the stdout and stderr output channels, see this section. +In our example when running ls in the current directory you should see 2 new files: + +- run.sh.o123456, containing normal output messages produced by job 123456; +- run.sh.e123456, containing errors and warnings produced by job 123456. + Info + + run.sh.e123456 should be empty (no errors or warnings). + Warning "Use your own job ID" + Replace 123456 with the jobid you got from the qstat command (see above) or simply look for added files in your current directory by running ls. +When examining the contents of run.sh.o123456 you will see something like this: +Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz +11493376/11490434 [==============================] - 1s 0us/step +Epoch 1/5 +1875/1875 [==============================] - 2s 823us/step - loss: 0.2960 - accuracy: 0.9133 +Epoch 2/5 +1875/1875 [==============================] - 1s 771us/step - loss: 0.1427 - accuracy: 0.9571 +Epoch 3/5 +1875/1875 [==============================] - 1s 767us/step - loss: 0.1070 - accuracy: 0.9675 +Epoch 4/5 +1875/1875 [==============================] - 1s 764us/step - loss: 0.0881 - accuracy: 0.9727 +Epoch 5/5 +1875/1875 [==============================] - 1s 764us/step - loss: 0.0741 - accuracy: 0.9768 +313/313 - 0s - loss: 0.0782 - accuracy: 0.9764 diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/getting_started/getting_started_paragraph_4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/getting_started/getting_started_paragraph_4_metadata.json new file mode 100644 index 000000000000..22175edefccb --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/getting_started/getting_started_paragraph_4_metadata.json @@ -0,0 +1,16 @@ +{ + "main_title": "getting_started", + "subtitle": "Inspect-your-results", + "source_file": "../../mkdocs/docs/HPC/getting_started.md", + "title_depth": 3, + "directory": "getting_started", + "links": { + "0": "https://docs.hpc.ugent.be/running_batch_jobs/#monitoring-and-managing-your-jobs", + "1": "https://docs.hpc.ugent.be/linux-tutorial/beyond_the_basics/#inputoutput" + }, + "parent_title": "", + "previous_title": "getting_started_paragraph_3", + "next_title": "getting_started_paragraph_5", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/getting_started/#inspect-your-results" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/getting_started/getting_started_paragraph_5.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/getting_started/getting_started_paragraph_5.txt new file mode 100644 index 000000000000..b8999963f1d1 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/getting_started/getting_started_paragraph_5.txt @@ -0,0 +1,10 @@ +Hurray šŸŽ‰, we trained a deep learning model and achieved 97,64 percent accuracy. + Warning + When using TensorFlow specifically, you should actually submit jobs to a GPU cluster for better performance, see GPU clusters. + For the purpose of this example, we are running a very small TensorFlow workload on a CPU-only cluster. +Next steps +- Running interactive jobs +- Running jobs with input/output data +- Multi core jobs/Parallel Computing +- Interactive and debug cluster +For more examples see Program examples and Job script examples \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/getting_started/getting_started_paragraph_5_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/getting_started/getting_started_paragraph_5_metadata.json new file mode 100644 index 000000000000..fe8ab2642fd6 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/getting_started/getting_started_paragraph_5_metadata.json @@ -0,0 +1,21 @@ +{ + "main_title": "getting_started", + "subtitle": "Next-steps", + "source_file": "../../mkdocs/docs/HPC/getting_started.md", + "title_depth": 3, + "directory": "getting_started", + "links": { + "0": "https://docs.hpc.ugent.be/gpu", + "1": "https://docs.hpc.ugent.be/running_interactive_jobs", + "2": "https://docs.hpc.ugent.be/running_jobs_with_input_output_data", + "3": "https://docs.hpc.ugent.be/multi_core_jobs", + "4": "https://docs.hpc.ugent.be/interactive_debug/#interactive-and-debug-cluster", + "5": "https://docs.hpc.ugent.be/program_examples", + "6": "https://docs.hpc.ugent.be/jobscript_examples" + }, + "parent_title": "", + "previous_title": "getting_started_paragraph_4", + "next_title": null, + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/getting_started/#next-steps" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/gpu/gpu_paragraph_1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/gpu/gpu_paragraph_1.txt new file mode 100644 index 000000000000..0a4b02b08dd4 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/gpu/gpu_paragraph_1.txt @@ -0,0 +1,48 @@ +GPU clusters +Submitting jobs +To submit jobs to the joltik GPU cluster, where each node provides 4 +NVIDIA V100 GPUs (each with 32GB of GPU memory), use: +module swap cluster/joltik +To submit to the accelgor GPU cluster, where each node provides 4 +NVIDIA A100 GPUs (each with 80GB GPU memory), use: +module swap cluster/accelgor +Then use the familiar qsub, qstat, etc.Ā commands, taking into +account the guidelines outlined in +section Requesting (GPU) resources. +Interactive jobs +To interactively experiment with GPUs, you can submit an interactive job +using qsub -I (and request one or more GPUs, see +sectionĀ Requesting (GPU) resources). +Note that due to a bug in Slurm you will currently not be able to be +able to interactively use MPI software that requires access to the GPUs. +If you need this, please contact use via hpc@ugent.be. +Hardware +See https://www.ugent.be/hpc/en/infrastructure. +Requesting (GPU) resources +There are 2 main ways to ask for GPUs as part of a job: +- Either as a node property (similar to the number of cores per node + specified via ppn) using -l nodes=X:ppn=Y:gpus=Z (where the + ppn=Y is optional), or as a separate resource request (similar to + the amount of memory) via -l gpus=Z. Both notations give exactly + the same result. The -l gpus=Z is convenient if you only need one + node and you are fine with the default number of cores per GPU. The + -l nodes=...:gpus=Z notation is required if you want to run with + full control or in multinode cases like MPI jobs. If you do not + specify the number of GPUs by just using -l gpus, you get by + default 1 GPU. +- As a resource of its own, via --gpus X. In this case however, you + are not guaranteed that the GPUs are on the same node, so your + script or code must be able to deal with this. + diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/gpu/gpu_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/gpu/gpu_paragraph_1_metadata.json new file mode 100644 index 000000000000..f426ff058e68 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/gpu/gpu_paragraph_1_metadata.json @@ -0,0 +1,16 @@ +{ + "main_title": "gpu", + "subtitle": "Requesting-(GPU)-resources", + "source_file": "../../mkdocs/docs/HPC/gpu.md", + "title_depth": 2, + "directory": "gpu", + "links": { + "0": "https://docs.hpc.ugent.be/gpu/#requesting-gpu-resources", + "1": "https://docs.hpc.ugent.be/gpu/#requesting-gpu-resources" + }, + "parent_title": "", + "previous_title": null, + "next_title": "gpu_paragraph_2", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/gpu/#requesting-gpu-resources" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/gpu/gpu_paragraph_2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/gpu/gpu_paragraph_2.txt new file mode 100644 index 000000000000..240700d58b9e --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/gpu/gpu_paragraph_2.txt @@ -0,0 +1,35 @@ +Some background: +- The GPUs are constrained to the jobs (like the CPU cores), but do + not run in so-called "exclusive" mode. +- The GPUs run with the so-called "persistence daemon", so the GPUs is + not re-initialised between jobs. + + +Attention points +Some important attention points: +- For MPI jobs, we recommend the (new) wrapper mypmirun from the + vsc-mympirun module (pmi is the background mechanism to start + the MPI tasks, and is different from the usual mpirun that is used + by the mympirun wrapper). At some later point, we might promote + the mypmirun tool or rename it, to avoid the confusion in the + naming. +- Sharing GPUs requires MPS. The Slurm built-in MPS does not really do + want you want, so we will provide integration with mypmirun and + wurker. +- For parallel work, we are working on a wurker wrapper from the + vsc-mympirun module that supports GPU placement and MPS, without + any limitations wrt the requested resources (i.e. also support the + case where GPUs are spread heterogeneous over nodes from using the + --gpus Z option). +- Both mypmirun and wurker will try to do the most optimised + placement of cores and tasks, and will provide 1 (optimal) GPU per + task/MPI rank, and set one so-called visible device (i.e. + CUDA_VISIBLE_DEVICES only has 1 ID). The actual devices are not + constrained to the ranks, so you can access all devices requested in + the job. We know that at this moment, this is not working properly, but we are working on this. We advise against trying to fix this yourself. + + diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/gpu/gpu_paragraph_2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/gpu/gpu_paragraph_2_metadata.json new file mode 100644 index 000000000000..b0d70feecff9 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/gpu/gpu_paragraph_2_metadata.json @@ -0,0 +1,12 @@ +{ + "main_title": "gpu", + "subtitle": "Attention-points", + "source_file": "../../mkdocs/docs/HPC/gpu.md", + "title_depth": 2, + "directory": "gpu", + "parent_title": "", + "previous_title": "gpu_paragraph_1", + "next_title": "gpu_paragraph_3", + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/gpu/#attention-points" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/gpu/gpu_paragraph_3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/gpu/gpu_paragraph_3.txt new file mode 100644 index 000000000000..1b92c087db3e --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/gpu/gpu_paragraph_3.txt @@ -0,0 +1,40 @@ +Software with GPU support +Use module avail to check for centrally installed software. +The subsections below only cover a couple of installed software +packages, more are available. +GROMACS +Please consult module avail GROMACS for a list of installed versions. +Horovod +Horovod can be used for (multi-node) multi-GPU TensorFlow/PyTorch +calculations. +Please consult module avail Horovod for a list of installed versions. +Horovod supports TensorFlow, Keras, PyTorch and MxNet (see +https://github.com/horovod/horovod#id9), but should be run as an MPI +application with mypmirun. (Horovod also provides its own wrapper +horovodrun, not sure if it handles placement and others correctly). +At least for simple TensorFlow benchmarks, it looks like Horovod is a +bit faster than usual autodetect multi-GPU TensorFlow without horovod, +but it comes at the cost of the code modifications to use horovod. + +PyTorch +Please consult module avail PyTorch for a list of installed versions. +TensorFlow +Please consult module avail TensorFlow for a list of installed +versions. +Note: for running TensorFlow calculations on multiple GPUs and/or on more than one workernode, use Horovod, see section Horovod. +Example TensorFlow job script +#!/bin/bash +#PBS -l walltime=5:0:0 +#PBS -l nodes=1:ppn=quarter:gpus=1 +module load TensorFlow/2.6.0-foss-2021a-CUDA-11.3.1 +cd $PBS_O_WORKDIR +python example.py + +AlphaFold +Please consult module avail AlphaFold for a list of installed +versions. +For more information on using AlphaFold, we strongly recommend the +VIB-UGent course available at +https://elearning.bits.vib.be/courses/alphafold. +Getting help +In case of questions or problems, please contact the HPC-UGent team via hpc@ugent.be, and clearly +indicate that your question relates to the joltik cluster by adding +[joltik] in the email subject. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/gpu/gpu_paragraph_4_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/gpu/gpu_paragraph_4_metadata.json new file mode 100644 index 000000000000..5a48827bb3b8 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/generic/gpu/gpu_paragraph_4_metadata.json @@ -0,0 +1,12 @@ +{ + "main_title": "gpu", + "subtitle": "Getting-help", + "source_file": "../../mkdocs/docs/HPC/gpu.md", + "title_depth": 2, + "directory": "gpu", + "parent_title": "", + "previous_title": "gpu_paragraph_3", + "next_title": null, + "OS": "generic", + "reference_link": "https://docs.hpc.ugent.be/gpu/#getting-help" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_2.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_2.1.txt new file mode 100644 index 000000000000..3a282a73a15d --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_2.1.txt @@ -0,0 +1,4 @@ +How do SSH keys work +Since all VSC clusters use Linux as their main operating system, you +will need to get acquainted with using the command-line interface and +using the terminal (see tutorial). \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_2.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_2.1_metadata.json new file mode 100644 index 000000000000..518f856303f0 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_2.1_metadata.json @@ -0,0 +1,15 @@ +{ + "main_title": "account", + "subtitle": "How-do-SSH-keys-work", + "source_file": "../../mkdocs/docs/HPC/account.md", + "title_depth": 3, + "directory": "account", + "parent_title": "account", + "links": { + "0": "https://docs.hpc.ugent.be/linux-tutorial" + }, + "previous_title": "account_paragraph_1", + "next_title": "account_paragraph_3", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/account/#how-do-ssh-keys-work" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_3.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_3.1.txt new file mode 100644 index 000000000000..a48d77380037 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_3.1.txt @@ -0,0 +1,44 @@ +How do SSH keys work +Launch a terminal from your desktop's application menu and you will see +the bash shell. There are other shells, but most Linux distributions use +bash by default. +Test OpenSSH +Secure Shell (ssh) is a cryptographic network protocol for secure data +communication, remote command-line login, remote command execution, and +other secure network services between two networked computers. In short, +ssh provides a secure connection between 2 computers via insecure +channels (Network, Internet, telephone lines, ...). +"Secure" means that: +1. the User is authenticated to the System; and +2. the System is authenticated to the User; and +3. all data is encrypted during transfer. +OpenSSH is a FREE implementation of the SSH connectivity protocol. comes +with its own implementation of OpenSSH, so you don't need to install any +third-party software to use it. Just open a terminal window and jump in! +On all popular Linux distributions, the OpenSSH software is readily +available, and most often installed by default. You can check whether +the OpenSSH software is installed by opening a terminal and typing: +$ ssh -V +OpenSSH_7.4p1, OpenSSL 1.0.2k-fips 26 Jan 2017 +To access the clusters and transfer your files, you will use the +following commands: +1. ssh-keygen: to generate the SSH key pair (public + private key); +2. ssh: to open a shell on a remote machine; +3. sftp: a secure equivalent of ftp; +4. scp: a secure equivalent of the remote copy command rcp. +Generate a public/private key pair with OpenSSH +A key pair might already be present in the default location inside your +home directory. Therefore, we first check if a key is available with the +"list short" ("ls") command: +ls ~/.ssh +If a key-pair is already available, you would normally get: +authorized_keys id_rsa id_rsa.pub known_hosts +Otherwise, the command will show: +ls: .ssh: No such file or directory +You can recognise a public/private key pair when a pair of files has the +same name except for the extension ".pub" added to one of them. In this +particular case, the private key is "id_rsa" and public key is +"id_rsa.pub". You may have multiple keys (not necessarily in the +directory "~/.ssh") if you or your operating system requires this. Be +aware that your existing key pair might be too short, or not the right +type. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_3.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_3.1_metadata.json new file mode 100644 index 000000000000..d3ea56673f0a --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_3.1_metadata.json @@ -0,0 +1,12 @@ +{ + "main_title": "account", + "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH", + "source_file": "../../mkdocs/docs/HPC/account.md", + "title_depth": 3, + "directory": "account", + "parent_title": "account", + "previous_title": "account_paragraph_2", + "next_title": "account_linux_paragraph_3.2", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/account/#generate-a-publicprivate-key-pair-with-openssh" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_3.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_3.2.txt new file mode 100644 index 000000000000..ae6c282a95d5 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_3.2.txt @@ -0,0 +1,19 @@ +You will need to generate a new key pair, when: +1. you don't have a key pair yet +2. you forgot the passphrase protecting your private key +3. your private key was compromised +4. your key pair is too short or not the right type +For extra security, the private key itself can be encrypted using a +"passphrase", to prevent anyone from using your private key even when +they manage to copy it. You have to "unlock" the private key by typing +the passphrase. Be sure to never give away your private key, it is +private and should stay private. You should not even copy it to one of +your other machines, instead, you should create a new public/private key +pair for each machine. +ssh-keygen -t rsa -b 4096 +This will ask you for a file name to store the private and public key, +and a passphrase to protect your private key. It needs to be emphasised +that you really should choose the passphrase wisely! The system will ask +you for it every time you want to use the private key that is every time +you want to access the cluster or transfer your files. +Without your key pair, you won't be able to apply for a personal VSC account. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_3.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_3.2_metadata.json new file mode 100644 index 000000000000..118b5b02f2c7 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_3.2_metadata.json @@ -0,0 +1,12 @@ +{ + "main_title": "account", + "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH", + "source_file": "../../mkdocs/docs/HPC/account.md", + "title_depth": 3, + "directory": "account", + "parent_title": "account", + "previous_title": "account_linux_paragraph_3.1", + "next_title": "account_paragraph_4", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/account/#generate-a-publicprivate-key-pair-with-openssh" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1.txt new file mode 100644 index 000000000000..5072be0d3035 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1.txt @@ -0,0 +1,22 @@ +Using an SSH agent (optional) +Most recent Unix derivatives include by default an SSH agent ("gnome-keyring-daemon" in most cases) +to keep and manage the user SSH keys. If you use one of these derivatives you must include the new keys into +the SSH manager keyring to be able to connect to the HPC cluster. If +not, SSH client will display an error message (see Connecting) similar to this: +Agent admitted failure to sign using the key. +Permission denied (publickey,gssapi-keyex,gssapi-with-mic). +This could be fixed using the ssh-add command. You can include the new +private keys' identities in your keyring with: +ssh-add + tip + Without extra options ssh-add adds any key located at $HOME/.ssh + directory, but you can specify the private key location path as + argument, as example: ssh-add /path/to/my/id_rsa. +Check that your key is available from the keyring with: +ssh-add -l +After these changes the key agent will keep your SSH key to connect to +the clusters as usual. + tip + You should execute ssh-add command again if you generate a new SSH + key. +Visit https://wiki.gnome.org/Projects/GnomeKeyring/Ssh for more information. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1_metadata.json new file mode 100644 index 000000000000..67ea12b9f7e2 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_4.1_metadata.json @@ -0,0 +1,15 @@ +{ + "main_title": "account", + "subtitle": "Using-an-SSH-agent-(optional)", + "source_file": "../../mkdocs/docs/HPC/account.md", + "title_depth": 3, + "directory": "account", + "parent_title": "account", + "links": { + "0": "https://docs.hpc.ugent.be/connecting" + }, + "previous_title": "account_paragraph_3", + "next_title": "account_paragraph_5", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/account/#using-an-ssh-agent-optional" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1.txt new file mode 100644 index 000000000000..aa5284241c10 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1.txt @@ -0,0 +1,37 @@ +Welcome e mail +Applying for the account +Visit https://account.vscentrum.be/ +You will be redirected to our WAYF (Where Are You From) service where +you have to select your "Home Organisation". +Select "UGent" in the dropdown box and optionally select "Save my preference" +and "permanently". +Click "Confirm" +You will now be taken to the authentication page of your institute. +You will now have to log in with CAS using your UGent account. +You either have a login name of maximum 8 characters, or a (non-UGent) +email address if you are an external user. In case of problems with your +UGent password, please visit: https://password.ugent.be/. After +logging in, you may be requested to share your information. Click "Yes, +continue". +After you log in using your UGent login and password, you will be asked to +upload the file that contains your public key, i.e., the file +"id_rsa.pub" which you have generated earlier. Make sure that your +public key is actually accepted for upload, because if it is in a wrong +format, wrong type or too short, then it will be refused. +This file has been stored in the directory "~/.ssh/". +After you have uploaded your public key you will receive an e-mail with +a link to confirm your e-mail address. After confirming your e-mail +address the VSC staff will review and if applicable approve your +account. +Welcome e-mail +Within one day, you should receive a Welcome e-mail with your VSC +account details. +Dear (Username), +Your VSC-account has been approved by an administrator. +Your vsc-username is vsc40000 +Your account should be fully active within one hour. +To check or update your account information please visit +https://account.vscentrum.be/ +For further info please visit https://www.vscentrum.be/user-portal +Kind regards, +-- The VSC administrators \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1_metadata.json new file mode 100644 index 000000000000..3cd42cf6af19 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_5.1_metadata.json @@ -0,0 +1,12 @@ +{ + "main_title": "account", + "subtitle": "Welcome-e-mail", + "source_file": "../../mkdocs/docs/HPC/account.md", + "title_depth": 3, + "directory": "account", + "parent_title": "account", + "previous_title": "account_paragraph_4", + "next_title": "account_paragraph_6", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/account/#welcome-e-mail" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1.txt new file mode 100644 index 000000000000..204ca799a10f --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1.txt @@ -0,0 +1,34 @@ +Computation Workflow on the HPC +Now, you can start using the HPC. You can always look up your VSC id later +by visiting https://account.vscentrum.be. +Adding multiple SSH public keys (optional) +In case you are connecting from different computers to the login nodes, +it is advised to use separate SSH public keys to do so. You should +follow these steps. +1. Create a new public/private SSH key pair from the new computer. + Repeat the process described in + sectionĀ Generate a public/private key pair with OpenSSH. +2. Go to https://account.vscentrum.be/django/account/edit +3. Upload the new SSH public key using the Add public key section. Make sure that your + public key is actually saved, because a public key will be refused + if it is too short, wrong type, or in a wrong format. +4. (optional) If you lost your key, you can delete the old key on the + same page. You should keep at least one valid public SSH key in your + account. +5. Take into account that it will take some time before the new SSH + public key is active in your account on the system; waiting for + 15-30 minutes should be sufficient. +Computation Workflow on the HPC +A typical Computation workflow will be: +1. Connect to the HPC +2. Transfer your files to the HPC +3. Compile your code and test it +4. Create a job script +5. Submit your job +6. Wait while + 1. your job gets into the queue + 2. your job gets executed + 3. your job finishes +7. Move your results +We'll take you through the different tasks one by one in the following +chapters. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1_metadata.json new file mode 100644 index 000000000000..e0293f7bc86d --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/account/account_linux_paragraph_6.1_metadata.json @@ -0,0 +1,15 @@ +{ + "main_title": "account", + "subtitle": "Computation-Workflow-on-the-HPC", + "source_file": "../../mkdocs/docs/HPC/account.md", + "title_depth": 2, + "directory": "account", + "parent_title": "account", + "links": { + "0": "https://docs.hpc.ugent.be/account/#generate-a-publicprivate-key-pair-with-openssh" + }, + "previous_title": "account_paragraph_5", + "next_title": null, + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/account/#computation-workflow-on-the-hpc" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_2.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_2.1.txt new file mode 100644 index 000000000000..773d03f06893 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_2.1.txt @@ -0,0 +1,7 @@ +First Time connection to the HPC infrastructure +ssh_exchange_identification: read: Connection reset by peer +First Time connection to the HPC infrastructure +The remaining content in this chapter is primarily focused for people utilizing a terminal with SSH. +If you are instead using the web portal, the corresponding chapter might be more helpful: Using the HPC-UGent web portal. +If you have any issues connecting to the HPC after you've followed these +steps, see Issues connecting to login node to troubleshoot. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_2.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_2.1_metadata.json new file mode 100644 index 000000000000..a4f07157426e --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_2.1_metadata.json @@ -0,0 +1,16 @@ +{ + "main_title": "connecting", + "subtitle": "First-Time-connection-to-the-HPC-infrastructure", + "source_file": "../../mkdocs/docs/HPC/connecting.md", + "title_depth": 2, + "directory": "connecting", + "parent_title": "connecting", + "links": { + "0": "https://docs.hpc.ugent.be/web_portal", + "1": "https://docs.hpc.ugent.be/troubleshooting/#issues-connecting-to-login-node" + }, + "previous_title": "connecting_paragraph_1", + "next_title": "connecting_paragraph_3", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#first-time-connection-to-the-hpc-infrastructure" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_3.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_3.1.txt new file mode 100644 index 000000000000..edcf3dcf60d7 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_3.1.txt @@ -0,0 +1,47 @@ +First Time connection to the HPC infrastructure +Connect +Open up a terminal and enter the following command to connect to the HPC. +ssh vsc40000@login.hpc.ugent.be +Here, user vsc40000 wants to make a connection to the "hpcugent" cluster at UGent via the login +node "login.hpc.ugent.be", so replace vsc40000 with your own VSC id in the above command. +The first time you make a connection to the login node, you will be +asked to verify the authenticity of the login node. Please check +Warning message when first connecting to new host on how to do this. +A possible error message you can get if you previously saved your +private key somewhere else than the default location +($HOME/.ssh/id_rsa): +Permission denied (publickey,gssapi-keyex,gssapi-with-mic). +In this case, use the -i option for the ssh command to specify the +location of your private key. For example: +ssh -i /home/example/my_keys +Congratulations, you're on the HPC infrastructure now! +To find out where you have landed you can print the current working directory: +$ pwd +/user/home/gent/vsc400/vsc40000 +Your new private home directory is "/user/home/gent/vsc400/vsc40000". Here you can create your own +subdirectory structure, copy and prepare your applications, compile and +test them and submit your jobs on the HPC. +$ cd /apps/gent/tutorials +$ ls +Intro-HPC/ +This directory currently contains all training material for the Introduction to the HPC. More +relevant training material to work with the HPC can always be added later in +this directory. +You can now explore the content of this directory with the "ls --l" (lists long) and the "cd" (change directory) commands: +As we are interested in the use of the HPC, move further to Intro-HPC and explore the +contents up to 2 levels deep: +$ cd Intro-HPC +$ tree -L 2 +. +'-- examples + |-- Compiling-and-testing-your-software-on-the-HPC + |-- Fine-tuning-Job-Specifications + |-- Multi-core-jobs-Parallel-Computing + |-- Multi-job-submission + |-- Program-examples + |-- Running-batch-jobs + |-- Running-jobs-with-input + |-- Running-jobs-with-input-output-data + |-- example.pbs + '-- example.sh +9 directories, 5 files \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_3.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_3.1_metadata.json new file mode 100644 index 000000000000..4d2bbae35656 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_3.1_metadata.json @@ -0,0 +1,15 @@ +{ + "main_title": "connecting", + "subtitle": "Connect", + "source_file": "../../mkdocs/docs/HPC/connecting.md", + "title_depth": 3, + "directory": "connecting", + "parent_title": "First-Time-connection-to-the-HPC-infrastructure", + "links": { + "0": "https://docs.hpc.ugent.be/troubleshooting/#warning-message-when-first-connecting-to-new-host" + }, + "previous_title": "connecting_paragraph_2", + "next_title": "connecting_paragraph_4", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#connect" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1.txt new file mode 100644 index 000000000000..8c2bd12d3428 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1.txt @@ -0,0 +1,61 @@ +First Time connection to the HPC infrastructure +You can exit the connection at anytime by entering: +$ exit +logout +Connection to login.hpc.ugent.be closed. + tip "tip: Setting your Language right" + You may encounter a warning message similar to the following one during connecting: + perl: warning: Setting locale failed. + perl: warning: Please check that your locale settings: + LANGUAGE = (unset), + LC_ALL = (unset), + LC_CTYPE = "UTF-8", + LANG = (unset) + are supported and installed on your system. + perl: warning: Falling back to the standard locale ("C"). + or any other error message complaining about the locale. + This means that the correct "locale" has not yet been properly specified on your local machine. Try: + LANG= + LC_COLLATE="C" + LC_CTYPE="UTF-8" + LC_MESSAGES="C" + LC_MONETARY="C" + LC_NUMERIC="C" + LC_TIME="C" + LC_ALL= + A locale is a set of parameters that defines the user's language, country and + any special variant preferences that the user wants to see in their user + interface. Usually a locale identifier consists of at least a language + identifier and a region identifier. + Note + If you try to set a non-supported locale, then it will be automatically + set to the default. Currently the default is en_US.UFT-8 or en_US, + depending on whether your originally (non-supported) locale was UTF-8 or not. + Open the .bashrc on your local machine with your favourite editor and + add the following lines: + + $ nano ~/.bashrc + ... + export LANGUAGE="en_US.UTF-8" + export LC_ALL="en_US.UTF-8" + export LC_CTYPE="en_US.UTF-8" + export LANG="en_US.UTF-8" + ... + + tip "tip: vi" + To start entering text in vi: move to the place you want to start + entering text with the arrow keys and type "i" to switch to insert mode. You can easily exit vi by entering: ""ESC" :wq" + To exit vi without saving your changes, enter ""ESC":q!" + + + or alternatively (if you are not comfortable with the Linux editors), + again on your local machine: + + echo "export LANGUAGE=\"en_US.UTF-8\"" >> ~/.profile + echo "export LC_ALL=\"en_US.UTF-8\"" >> ~/.profile + echo "export LC_CTYPE=\"en_US.UTF-8\"" >> ~/.profile + echo "export LANG=\"en_US.UTF-8\"" >> ~/.profile + + You can now log out, open a new terminal/shell on your local machine and + reconnect to the login node, and you should not get these warnings anymore. + \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1_metadata.json new file mode 100644 index 000000000000..8f814f7b8380 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_5.1_metadata.json @@ -0,0 +1,12 @@ +{ + "main_title": "connecting", + "subtitle": "First-Time-connection-to-the-HPC-infrastructure", + "source_file": "../../mkdocs/docs/HPC/connecting.md", + "title_depth": 2, + "directory": "connecting", + "parent_title": "connecting", + "previous_title": "connecting_paragraph_4", + "next_title": "connecting_paragraph_6", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#first-time-connection-to-the-hpc-infrastructure" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_6.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_6.1.txt new file mode 100644 index 000000000000..d872c89a0f83 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_6.1.txt @@ -0,0 +1,6 @@ +Transfer Files to/from the HPC +Before you can do some work, you'll have to transfer the files you need from your desktop or department to the cluster. At the end of a job, you might want to transfer some files back. +The preferred way to transfer files is by using an scp or sftp via the +secure OpenSSH protocol. ships with an implementation of OpenSSH, so you +don't need to install any third-party software to use it. Just open a +terminal window and jump in! diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_6.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_6.1_metadata.json new file mode 100644 index 000000000000..43070f5ad834 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_6.1_metadata.json @@ -0,0 +1,12 @@ +{ + "main_title": "connecting", + "subtitle": "Transfer-Files-tofrom-the-HPC", + "source_file": "../../mkdocs/docs/HPC/connecting.md", + "title_depth": 2, + "directory": "connecting", + "parent_title": "connecting", + "previous_title": "connecting_paragraph_5", + "next_title": "connecting_paragraph_7", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#transfer-files-tofrom-the-hpc" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_7.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_7.1.txt new file mode 100644 index 000000000000..319433fd88a1 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_7.1.txt @@ -0,0 +1,42 @@ +Transfer Files tofrom the HPC +Using scp +Secure copy or SCP is a tool (command) for securely transferring files between a local +host (= your computer) and a remote host (the HPC). It is based on the +Secure Shell (SSH) protocol. The scp command is the equivalent of the cp (i.e., +copy) command, but can copy files to or from remote machines. +It's easier to copy files directly to $VSC_DATA and $VSC_SCRATCH if +you have symlinks to them in your home directory. See +the chapter titled "Uploading/downloading/editing files", section "Symlinks for data/scratch" in the intro to Linux + for how to do this. +Open an additional terminal window and check that you're working on your +local machine. +$ hostname + +If you're still using the terminal that is connected to the HPC, close the +connection by typing "exit" in the terminal window. +For example, we will copy the (local) file "localfile.txt" to your +home directory on the HPC cluster. We first generate a small dummy +"localfile.txt", which contains the word "Hello". Use your own VSC +account, which is something like "vsc40000". Don't forget the colon (:) at the +end: if you forget it, it will just create a file named vsc40000@login.hpc.ugent.be on your +local filesystem. You can even specify where to save the file on the +remote filesystem by putting a path after the colon. +$ echo "Hello" > localfile.txt +$ ls -l +... +-rw-r--r-- 1 user staff 6 Sep 18 09:37 localfile.txt +$ scp localfile.txt vsc40000@login.hpc.ugent.be: +localfile.txt 100% 6 0.0KB/s 00:00 +Connect to the HPC via another terminal, print the working directory (to +make sure you're in the home directory) and check whether the file has +arrived: +$ pwd +/user/home/gent/vsc400/vsc40000 +$ ls -l +total 1536 +drwxrwxr-x 2 +drwxrwxr-x 2 +drwxrwxr-x 10 +-rw-r--r-- 1 +$ cat localfile.txt +Hello diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_7.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_7.1_metadata.json new file mode 100644 index 000000000000..5903f61e437b --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_7.1_metadata.json @@ -0,0 +1,15 @@ +{ + "main_title": "connecting", + "subtitle": "Using-scp", + "source_file": "../../mkdocs/docs/HPC/connecting.md", + "title_depth": 3, + "directory": "connecting", + "parent_title": "Transfer-Files-tofrom-the-HPC", + "links": { + "0": "https://docs.hpc.ugent.be/localhost:8000/Gent//intro-Linux/uploading_files/#symlinks-for-datascratch" + }, + "previous_title": "connecting_paragraph_6", + "next_title": "connecting_linux_paragraph_7.2", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-scp" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_7.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_7.2.txt new file mode 100644 index 000000000000..16016dc219c6 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_7.2.txt @@ -0,0 +1,37 @@ +The scp command can also be used to copy files from the cluster to your +local machine. Let us copy the remote file "intro-HPC--Gent.pdf" from your "docs" +subdirectory on the cluster to your local computer. +First, we will confirm that the file is indeed in the "docs" +subdirectory. In the terminal on the login node, enter: +$ cd ~/docs +$ ls -l +total 1536 +-rw-r--r-- 1 vsc40000 Sep 11 09:53 intro-HPC--Gent.pdf +Now we will copy the file to the local machine. On the terminal on your +own local computer, enter: +$ scp vsc40000@login.hpc.ugent.be:./docs/intro-HPC--Gent.pdf . +intro-HPC--Gent.pdf 100% 725KB 724.6KB/s 00:01 +$ ls -l +total 899 +-rw-r--r-- 1 user staff 741995 Sep 18 09:53 +-rw-r--r-- 1 user staff 6 Sep 18 09:37 localfile.txt +The file has been copied from the HPC to your local computer. +It's also possible to copy entire directories (and their contents) with +the -r flag. For example, if we want to copy the local directory +dataset to $VSC_SCRATCH, we can use the following command (assuming +you've created the scratch symlink): +scp -r dataset vsc40000@login.hpc.ugent.be:scratch +If you don't use the -r option to copy a directory, you will run into +the following error: +$ scp dataset vsc40000@login.hpc.ugent.be:scratch +dataset: not a regular file +Using sftp +The SSH File Transfer Protocol (also Secure File Transfer Protocol, or SFTP) is a network protocol that provides file access, file +transfer and file management functionalities over any reliable data +stream. It was designed as an extension of the Secure Shell protocol +(SSH) version 2.0. This protocol assumes that it is run over a secure +channel, such as SSH, that the server has already authenticated the +client, and that the identity of the client user is available to the +protocol. +The sftp is an equivalent of the ftp command, with the difference that +it uses the secure ssh protocol to connect to the clusters. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_7.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_7.2_metadata.json new file mode 100644 index 000000000000..a4cc992a338a --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_7.2_metadata.json @@ -0,0 +1,12 @@ +{ + "main_title": "connecting", + "subtitle": "Using-sftp", + "source_file": "../../mkdocs/docs/HPC/connecting.md", + "title_depth": 3, + "directory": "connecting", + "parent_title": "Transfer-Files-tofrom-the-HPC", + "previous_title": "connecting_linux_paragraph_7.1", + "next_title": "connecting_linux_paragraph_7.3", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-sftp" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_7.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_7.3.txt new file mode 100644 index 000000000000..1ef13b80c6f0 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_7.3.txt @@ -0,0 +1,18 @@ +One easy way of starting a sftp session is +sftp vsc40000@login.hpc.ugent.be +Typical and popular commands inside an sftp session are: +| | | +|:--------------------------|:-------------------------------------------------------------------------------------| +| cd ~/exmples/fibo | Move to the examples/fibo subdirectory on the (i.e., the HPC remote machine) | +| ls | Get a list of the files in the current directory on the HPC. | +| get fibo.py | Copy the file "fibo.py" from the HPC | +| get tutorial/HPC.pdf | Copy the file "HPC.pdf" from the HPC, which is in the "tutorial" subdirectory. | +| lcd test | Move to the "test" subdirectory on your local machine. | +| lcd .. | Move up one level in the local directory. | +| lls | Get local directory listing. | +| put test.py | Copy the local file test.py to the HPC. | +| put test1.py test2.py | Copy the local file test1.py to the and rename it to test2.py. | +| bye | Quit the sftp session | +| **mget *.cc** | Copy all the remote files with extension ".cc" to the local directory. | +| **mput *.h** | Copy all the local files with extension ".h" to the HPC. | +| | | diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_7.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_7.3_metadata.json new file mode 100644 index 000000000000..eb6c58af4de1 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_7.3_metadata.json @@ -0,0 +1,16 @@ +{ + "main_title": "connecting", + "subtitle": "Using-sftp", + "source_file": "../../mkdocs/docs/HPC/connecting.md", + "title_depth": 3, + "directory": "connecting", + "parent_title": "Transfer-Files-tofrom-the-HPC", + "links": { + "0": "https://docs.hpc.ugent.be/", + "1": "https://docs.hpc.ugent.be/" + }, + "previous_title": "connecting_linux_paragraph_7.2", + "next_title": "connecting_paragraph_8", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-sftp" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_8.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_8.1.txt new file mode 100644 index 000000000000..4ad49c5ff63b --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_8.1.txt @@ -0,0 +1,17 @@ +Changing login nodes +Using a GUI +If you prefer a GUI to transfer files back and forth to the HPC, you can +use your file browser. Open your file browser and press +++"Ctrl"+"l"++ +This should open up a address bar where you can enter a URL. +Alternatively, look for the "connect to server" option in your file +browsers menu. +Enter: sftp://vsc40000@login.hpc.ugent.be/ and press enter. +You should now be able to browse files on the HPC in your file browser. +Fast file transfer for large datasets +See the section on rsync in chapter 5 of the Linux intro manual. +Changing login nodes +It can be useful to have control over which login node you are on. However, when you connect to the HPC (High-Performance Computing) system, you are directed to a random login node, which might not be the one where you already have an active session. To address this, there is a way to manually switch your active login node. +For instance, if you want to switch to the login node named gligar07.gastly.os, you can use the following command while you are connected to the gligar08.gastly.os login node on the HPC: +ssh gligar07.gastly.os +This is also possible the other way around. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_8.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_8.1_metadata.json new file mode 100644 index 000000000000..3bd725281915 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/connecting/connecting_linux_paragraph_8.1_metadata.json @@ -0,0 +1,15 @@ +{ + "main_title": "connecting", + "subtitle": "Using-a-GUI", + "source_file": "../../mkdocs/docs/HPC/connecting.md", + "title_depth": 3, + "directory": "connecting", + "parent_title": "Transfer-Files-tofrom-the-HPC", + "links": { + "0": "https://docs.hpc.ugent.be/linux-tutorial/uploading_files/#copying-faster-with-rsync" + }, + "previous_title": "connecting_paragraph_7", + "next_title": "connecting_paragraph_9", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/connecting/#using-a-gui" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/getting_started/getting_started_linux_paragraph_1.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/getting_started/getting_started_linux_paragraph_1.1.txt new file mode 100644 index 000000000000..a4ec1c8370f8 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/getting_started/getting_started_linux_paragraph_1.1.txt @@ -0,0 +1,30 @@ +Getting Connected +Getting Started +Welcome to the "Getting Started" guide. This chapter will lead you through the initial steps of logging into the HPC-UGent infrastructure and submitting your very first job. We'll also walk you through the process step by step using a practical example. +In addition to this chapter, you might find the recording of the Introduction to HPC-UGent training session to be a useful resource. +Before proceeding, read the introduction to HPC to gain an understanding of the HPC-UGent infrastructure and related terminology. +Getting Access +To get access to the HPC-UGent infrastructure, visit Getting an HPC Account. +If you have not used Linux before, +now would be a good time to follow our Linux Tutorial. +A typical workflow looks like this: +1. Connect to the login nodes +2. Transfer your files to the HPC-UGent infrastructure +3. Optional: compile your code and test it +4. Create a job script and submit your job +5. Wait for job to be executed +6. Study the results generated by your jobs, either on the cluster or + after downloading them locally. +We will walk through an illustrative workload to get you started. In this example, our objective is to train a deep learning model for recognizing hand-written digits (MNIST dataset) using TensorFlow; +see the example scripts. +Getting Connected +There are two options to connect +- Using a terminal to connect via SSH (for power users) (see First Time connection to the HPC-UGent infrastructure) +- Using the web portal +Considering your operating system is *, +it is recommended to make use of the ssh command in a terminal to get the most flexibility. +Assuming you have already generated SSH keys in the previous step (Getting Access), and that they are in a default location, you should now be able to login by running the following command: +ssh vsc40000@login.hpc.ugent.be + Warning "User your own VSC account id" + + Replace vsc40000 with your VSC account id (see https://account.vscentrum.be) diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/getting_started/getting_started_linux_paragraph_1.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/getting_started/getting_started_linux_paragraph_1.1_metadata.json new file mode 100644 index 000000000000..02eec654266a --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/getting_started/getting_started_linux_paragraph_1.1_metadata.json @@ -0,0 +1,23 @@ +{ + "main_title": "getting_started", + "subtitle": "Getting-Connected", + "source_file": "../../mkdocs/docs/HPC/getting_started.md", + "title_depth": 3, + "directory": "getting_started", + "parent_title": "getting_started", + "links": { + "0": "https://www.ugent.be/hpc/en/training/introhpcugent-recording", + "1": "https://docs.hpc.ugent.be/introduction", + "2": "https://docs.hpc.ugent.be/account", + "3": "https://docs.hpc.ugent.be/linux-tutorial", + "4": "https://www.tensorflow.org/", + "5": "https://github.com/hpcugent/vsc_user_docs/tree/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist", + "6": "https://docs.hpc.ugent.be/connecting/#first-time-connection-to-the-hpc-infrastructure", + "7": "https://docs.hpc.ugent.be/web_portal", + "8": "https://docs.hpc.ugent.be/getting_started/#getting-access" + }, + "previous_title": null, + "next_title": "getting_started_linux_paragraph_1.2", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/getting_started/#getting-connected" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/getting_started/getting_started_linux_paragraph_1.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/getting_started/getting_started_linux_paragraph_1.2.txt new file mode 100644 index 000000000000..d068845643c4 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/getting_started/getting_started_linux_paragraph_1.2.txt @@ -0,0 +1,2 @@ + Tip + You can also still use the web portal (see shell access on web portal) \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/getting_started/getting_started_linux_paragraph_1.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/getting_started/getting_started_linux_paragraph_1.2_metadata.json new file mode 100644 index 000000000000..d6309137812a --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/getting_started/getting_started_linux_paragraph_1.2_metadata.json @@ -0,0 +1,15 @@ +{ + "main_title": "getting_started", + "subtitle": "Getting-Connected", + "source_file": "../../mkdocs/docs/HPC/getting_started.md", + "title_depth": 3, + "directory": "getting_started", + "parent_title": "getting_started", + "links": { + "0": "https://docs.hpc.ugent.be/web_portal/#shell-access" + }, + "previous_title": "getting_started_linux_paragraph_1.1", + "next_title": "getting_started_paragraph_2", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/getting_started/#getting-connected" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/getting_started/getting_started_linux_paragraph_2.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/getting_started/getting_started_linux_paragraph_2.1.txt new file mode 100644 index 000000000000..438321b40a1b --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/getting_started/getting_started_linux_paragraph_2.1.txt @@ -0,0 +1,27 @@ +Submitting a job + Info + When having problems see the connection issues section on the troubleshooting page. +Transfer your files +Now that you can login, it is time to transfer files from your local computer to your home directory on the HPC-UGent infrastructure. +Download tensorflow_mnist.py +and run.sh example scripts to your computer (from here). +On your local machine you can run: +curl -OL https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist/tensorflow_mnist.py +curl -OL https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist/run.sh +Using the scp command, the files can be copied from your local host to your home directory (~) on the remote host (HPC). +scp tensorflow_mnist.py run.sh vsc40000login.hpc.ugent.be:~ +ssh vsc40000@login.hpc.ugent.be + Warning "User your own VSC account id" + + Replace vsc40000 with your VSC account id (see https://account.vscentrum.be) + Info + For more information about transfering files or scp, see tranfer files from/to hpc. +When running ls in your session on the HPC-UGent infrastructure, you should see the two files listed in your home directory (~): +$ ls ~ +run.sh tensorflow_mnist.py +When you do not see these files, make sure you uploaded the files to your home directory. +Submitting a job +Jobs are submitted and executed using job scripts. In our case run.sh can be used as a (very minimal) job script. +A job script is a shell script, a text file that specifies the resources, +the software that is used (via module load statements), +and the steps that should be executed to run the calculation. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/getting_started/getting_started_linux_paragraph_2.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/getting_started/getting_started_linux_paragraph_2.1_metadata.json new file mode 100644 index 000000000000..e9d9e24d26a0 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/linux/getting_started/getting_started_linux_paragraph_2.1_metadata.json @@ -0,0 +1,19 @@ +{ + "main_title": "getting_started", + "subtitle": "Submitting-a-job", + "source_file": "../../mkdocs/docs/HPC/getting_started.md", + "title_depth": 3, + "directory": "getting_started", + "parent_title": "getting_started", + "links": { + "0": "https://docs.hpc.ugent.be/troubleshooting/#sec:connecting-issues", + "1": "https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist/tensorflow_mnist.py", + "2": "https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist/run.sh", + "3": "https://github.com/hpcugent/vsc_user_docs/tree/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist", + "4": "https://docs.hpc.ugent.be/connecting/#transfer-files-tofrom-the-hpc" + }, + "previous_title": "getting_started_paragraph_1", + "next_title": "getting_started_paragraph_3", + "OS": "linux", + "reference_link": "https://docs.hpc.ugent.be/Linux/getting_started/#submitting-a-job" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_2.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_2.1.txt new file mode 100644 index 000000000000..3a282a73a15d --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_2.1.txt @@ -0,0 +1,4 @@ +How do SSH keys work +Since all VSC clusters use Linux as their main operating system, you +will need to get acquainted with using the command-line interface and +using the terminal (see tutorial). \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_2.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_2.1_metadata.json new file mode 100644 index 000000000000..d20916a48c89 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_2.1_metadata.json @@ -0,0 +1,15 @@ +{ + "main_title": "account", + "subtitle": "How-do-SSH-keys-work", + "source_file": "../../mkdocs/docs/HPC/account.md", + "title_depth": 3, + "directory": "account", + "parent_title": "account", + "links": { + "0": "https://docs.hpc.ugent.be/linux-tutorial" + }, + "previous_title": "account_paragraph_1", + "next_title": "account_paragraph_3", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/account/#how-do-ssh-keys-work" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_3.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_3.1.txt new file mode 100644 index 000000000000..27bb95318223 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_3.1.txt @@ -0,0 +1,45 @@ +How do SSH keys work +To open a Terminal window in macOS, open the Finder and choose +*\>\> Applications \> Utilities \> Terminal* +Before requesting an account, you need to generate a pair of ssh keys. +One popular way to do this on is using the OpenSSH client included with , which you can then also use to log on to the clusters. +Test OpenSSH +Secure Shell (ssh) is a cryptographic network protocol for secure data +communication, remote command-line login, remote command execution, and +other secure network services between two networked computers. In short, +ssh provides a secure connection between 2 computers via insecure +channels (Network, Internet, telephone lines, ...). +"Secure" means that: +1. the User is authenticated to the System; and +2. the System is authenticated to the User; and +3. all data is encrypted during transfer. +OpenSSH is a FREE implementation of the SSH connectivity protocol. comes +with its own implementation of OpenSSH, so you don't need to install any +third-party software to use it. Just open a terminal window and jump in! +On all popular Linux distributions, the OpenSSH software is readily +available, and most often installed by default. You can check whether +the OpenSSH software is installed by opening a terminal and typing: +$ ssh -V +OpenSSH_7.4p1, OpenSSL 1.0.2k-fips 26 Jan 2017 +To access the clusters and transfer your files, you will use the +following commands: +1. ssh-keygen: to generate the SSH key pair (public + private key); +2. ssh: to open a shell on a remote machine; +3. sftp: a secure equivalent of ftp; +4. scp: a secure equivalent of the remote copy command rcp. +Generate a public/private key pair with OpenSSH +A key pair might already be present in the default location inside your +home directory. Therefore, we first check if a key is available with the +"list short" ("ls") command: +ls ~/.ssh +If a key-pair is already available, you would normally get: +authorized_keys id_rsa id_rsa.pub known_hosts +Otherwise, the command will show: +ls: .ssh: No such file or directory +You can recognise a public/private key pair when a pair of files has the +same name except for the extension ".pub" added to one of them. In this +particular case, the private key is "id_rsa" and public key is +"id_rsa.pub". You may have multiple keys (not necessarily in the +directory "~/.ssh") if you or your operating system requires this. Be +aware that your existing key pair might be too short, or not the right +type. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_3.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_3.1_metadata.json new file mode 100644 index 000000000000..7ecf0f36e1ec --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_3.1_metadata.json @@ -0,0 +1,12 @@ +{ + "main_title": "account", + "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH", + "source_file": "../../mkdocs/docs/HPC/account.md", + "title_depth": 3, + "directory": "account", + "parent_title": "account", + "previous_title": "account_paragraph_2", + "next_title": "account_macos_paragraph_3.2", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/account/#generate-a-publicprivate-key-pair-with-openssh" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_3.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_3.2.txt new file mode 100644 index 000000000000..ae6c282a95d5 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_3.2.txt @@ -0,0 +1,19 @@ +You will need to generate a new key pair, when: +1. you don't have a key pair yet +2. you forgot the passphrase protecting your private key +3. your private key was compromised +4. your key pair is too short or not the right type +For extra security, the private key itself can be encrypted using a +"passphrase", to prevent anyone from using your private key even when +they manage to copy it. You have to "unlock" the private key by typing +the passphrase. Be sure to never give away your private key, it is +private and should stay private. You should not even copy it to one of +your other machines, instead, you should create a new public/private key +pair for each machine. +ssh-keygen -t rsa -b 4096 +This will ask you for a file name to store the private and public key, +and a passphrase to protect your private key. It needs to be emphasised +that you really should choose the passphrase wisely! The system will ask +you for it every time you want to use the private key that is every time +you want to access the cluster or transfer your files. +Without your key pair, you won't be able to apply for a personal VSC account. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_3.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_3.2_metadata.json new file mode 100644 index 000000000000..8509ef75bf3e --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_3.2_metadata.json @@ -0,0 +1,12 @@ +{ + "main_title": "account", + "subtitle": "Generate-a-publicprivate-key-pair-with-OpenSSH", + "source_file": "../../mkdocs/docs/HPC/account.md", + "title_depth": 3, + "directory": "account", + "parent_title": "account", + "previous_title": "account_macos_paragraph_3.1", + "next_title": "account_paragraph_4", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/account/#generate-a-publicprivate-key-pair-with-openssh" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1.txt new file mode 100644 index 000000000000..19d475c56ca7 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1.txt @@ -0,0 +1,21 @@ +Using an SSH agent (optional) +Most recent Unix derivatives include by default an SSH agent +to keep and manage the user SSH keys. If you use one of these derivatives you must include the new keys into +the SSH manager keyring to be able to connect to the HPC cluster. If +not, SSH client will display an error message (see Connecting) similar to this: +Agent admitted failure to sign using the key. +Permission denied (publickey,gssapi-keyex,gssapi-with-mic). +This could be fixed using the ssh-add command. You can include the new +private keys' identities in your keyring with: +ssh-add + tip + Without extra options ssh-add adds any key located at $HOME/.ssh + directory, but you can specify the private key location path as + argument, as example: ssh-add /path/to/my/id_rsa. +Check that your key is available from the keyring with: +ssh-add -l +After these changes the key agent will keep your SSH key to connect to +the clusters as usual. + tip + You should execute ssh-add command again if you generate a new SSH + key. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1_metadata.json new file mode 100644 index 000000000000..8a289927685c --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_4.1_metadata.json @@ -0,0 +1,15 @@ +{ + "main_title": "account", + "subtitle": "Using-an-SSH-agent-(optional)", + "source_file": "../../mkdocs/docs/HPC/account.md", + "title_depth": 3, + "directory": "account", + "parent_title": "account", + "links": { + "0": "https://docs.hpc.ugent.be/connecting" + }, + "previous_title": "account_paragraph_3", + "next_title": "account_paragraph_5", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/account/#using-an-ssh-agent-optional" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1.txt new file mode 100644 index 000000000000..2dd8fceab424 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1.txt @@ -0,0 +1,42 @@ +Welcome e mail +Applying for the account +Visit https://account.vscentrum.be/ +You will be redirected to our WAYF (Where Are You From) service where +you have to select your "Home Organisation". +Select "UGent" in the dropdown box and optionally select "Save my preference" +and "permanently". +Click "Confirm" +You will now be taken to the authentication page of your institute. +You will now have to log in with CAS using your UGent account. +You either have a login name of maximum 8 characters, or a (non-UGent) +email address if you are an external user. In case of problems with your +UGent password, please visit: https://password.ugent.be/. After +logging in, you may be requested to share your information. Click "Yes, +continue". +After you log in using your UGent login and password, you will be asked to +upload the file that contains your public key, i.e., the file +"id_rsa.pub" which you have generated earlier. Make sure that your +public key is actually accepted for upload, because if it is in a wrong +format, wrong type or too short, then it will be refused. +This file has been stored in the directory "~/.ssh/". + tip + As ".ssh" is an invisible directory, the Finder will not show it by + default. The easiest way to access the folder, is by pressing ++cmd+shift+g++ (or ++cmd+shift+"."++), + which will allow you to enter the name of a directory, which you would + like to open in Finder. Here, type "~/.ssh" and press enter. +After you have uploaded your public key you will receive an e-mail with +a link to confirm your e-mail address. After confirming your e-mail +address the VSC staff will review and if applicable approve your +account. +Welcome e-mail +Within one day, you should receive a Welcome e-mail with your VSC +account details. +Dear (Username), +Your VSC-account has been approved by an administrator. +Your vsc-username is vsc40000 +Your account should be fully active within one hour. +To check or update your account information please visit +https://account.vscentrum.be/ +For further info please visit https://www.vscentrum.be/user-portal +Kind regards, +-- The VSC administrators \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1_metadata.json new file mode 100644 index 000000000000..7882c7ff73f7 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_5.1_metadata.json @@ -0,0 +1,12 @@ +{ + "main_title": "account", + "subtitle": "Welcome-e-mail", + "source_file": "../../mkdocs/docs/HPC/account.md", + "title_depth": 3, + "directory": "account", + "parent_title": "account", + "previous_title": "account_paragraph_4", + "next_title": "account_paragraph_6", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/account/#welcome-e-mail" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1.txt new file mode 100644 index 000000000000..204ca799a10f --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1.txt @@ -0,0 +1,34 @@ +Computation Workflow on the HPC +Now, you can start using the HPC. You can always look up your VSC id later +by visiting https://account.vscentrum.be. +Adding multiple SSH public keys (optional) +In case you are connecting from different computers to the login nodes, +it is advised to use separate SSH public keys to do so. You should +follow these steps. +1. Create a new public/private SSH key pair from the new computer. + Repeat the process described in + sectionĀ Generate a public/private key pair with OpenSSH. +2. Go to https://account.vscentrum.be/django/account/edit +3. Upload the new SSH public key using the Add public key section. Make sure that your + public key is actually saved, because a public key will be refused + if it is too short, wrong type, or in a wrong format. +4. (optional) If you lost your key, you can delete the old key on the + same page. You should keep at least one valid public SSH key in your + account. +5. Take into account that it will take some time before the new SSH + public key is active in your account on the system; waiting for + 15-30 minutes should be sufficient. +Computation Workflow on the HPC +A typical Computation workflow will be: +1. Connect to the HPC +2. Transfer your files to the HPC +3. Compile your code and test it +4. Create a job script +5. Submit your job +6. Wait while + 1. your job gets into the queue + 2. your job gets executed + 3. your job finishes +7. Move your results +We'll take you through the different tasks one by one in the following +chapters. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1_metadata.json new file mode 100644 index 000000000000..96a0afcdcf6b --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/account/account_macos_paragraph_6.1_metadata.json @@ -0,0 +1,15 @@ +{ + "main_title": "account", + "subtitle": "Computation-Workflow-on-the-HPC", + "source_file": "../../mkdocs/docs/HPC/account.md", + "title_depth": 2, + "directory": "account", + "parent_title": "account", + "links": { + "0": "https://docs.hpc.ugent.be/account/#generate-a-publicprivate-key-pair-with-openssh" + }, + "previous_title": "account_paragraph_5", + "next_title": null, + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/account/#computation-workflow-on-the-hpc" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_2.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_2.1.txt new file mode 100644 index 000000000000..773d03f06893 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_2.1.txt @@ -0,0 +1,7 @@ +First Time connection to the HPC infrastructure +ssh_exchange_identification: read: Connection reset by peer +First Time connection to the HPC infrastructure +The remaining content in this chapter is primarily focused for people utilizing a terminal with SSH. +If you are instead using the web portal, the corresponding chapter might be more helpful: Using the HPC-UGent web portal. +If you have any issues connecting to the HPC after you've followed these +steps, see Issues connecting to login node to troubleshoot. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_2.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_2.1_metadata.json new file mode 100644 index 000000000000..d1e758b8938b --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_2.1_metadata.json @@ -0,0 +1,16 @@ +{ + "main_title": "connecting", + "subtitle": "First-Time-connection-to-the-HPC-infrastructure", + "source_file": "../../mkdocs/docs/HPC/connecting.md", + "title_depth": 2, + "directory": "connecting", + "parent_title": "connecting", + "links": { + "0": "https://docs.hpc.ugent.be/web_portal", + "1": "https://docs.hpc.ugent.be/troubleshooting/#issues-connecting-to-login-node" + }, + "previous_title": "connecting_paragraph_1", + "next_title": "connecting_paragraph_3", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#first-time-connection-to-the-hpc-infrastructure" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_3.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_3.1.txt new file mode 100644 index 000000000000..3afe2472575b --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_3.1.txt @@ -0,0 +1,48 @@ +First Time connection to the HPC infrastructure +Connect +Open up a terminal and enter the following command to connect to the HPC. +You can open a terminal by navigation to Applications and then Utilities in the finder and open Terminal.app, or enter Terminal in Spotlight Search. +ssh vsc40000@login.hpc.ugent.be +Here, user vsc40000 wants to make a connection to the "hpcugent" cluster at UGent via the login +node "login.hpc.ugent.be", so replace vsc40000 with your own VSC id in the above command. +The first time you make a connection to the login node, you will be +asked to verify the authenticity of the login node. Please check +Warning message when first connecting to new host on how to do this. +A possible error message you can get if you previously saved your +private key somewhere else than the default location +($HOME/.ssh/id_rsa): +Permission denied (publickey,gssapi-keyex,gssapi-with-mic). +In this case, use the -i option for the ssh command to specify the +location of your private key. For example: +ssh -i /home/example/my_keys +Congratulations, you're on the HPC infrastructure now! +To find out where you have landed you can print the current working directory: +$ pwd +/user/home/gent/vsc400/vsc40000 +Your new private home directory is "/user/home/gent/vsc400/vsc40000". Here you can create your own +subdirectory structure, copy and prepare your applications, compile and +test them and submit your jobs on the HPC. +$ cd /apps/gent/tutorials +$ ls +Intro-HPC/ +This directory currently contains all training material for the Introduction to the HPC. More +relevant training material to work with the HPC can always be added later in +this directory. +You can now explore the content of this directory with the "ls --l" (lists long) and the "cd" (change directory) commands: +As we are interested in the use of the HPC, move further to Intro-HPC and explore the +contents up to 2 levels deep: +$ cd Intro-HPC +$ tree -L 2 +. +'-- examples + |-- Compiling-and-testing-your-software-on-the-HPC + |-- Fine-tuning-Job-Specifications + |-- Multi-core-jobs-Parallel-Computing + |-- Multi-job-submission + |-- Program-examples + |-- Running-batch-jobs + |-- Running-jobs-with-input + |-- Running-jobs-with-input-output-data + |-- example.pbs + '-- example.sh +9 directories, 5 files \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_3.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_3.1_metadata.json new file mode 100644 index 000000000000..814980720978 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_3.1_metadata.json @@ -0,0 +1,15 @@ +{ + "main_title": "connecting", + "subtitle": "Connect", + "source_file": "../../mkdocs/docs/HPC/connecting.md", + "title_depth": 3, + "directory": "connecting", + "parent_title": "First-Time-connection-to-the-HPC-infrastructure", + "links": { + "0": "https://docs.hpc.ugent.be/troubleshooting/#warning-message-when-first-connecting-to-new-host" + }, + "previous_title": "connecting_paragraph_2", + "next_title": "connecting_paragraph_4", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#connect" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1.txt new file mode 100644 index 000000000000..8c2bd12d3428 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1.txt @@ -0,0 +1,61 @@ +First Time connection to the HPC infrastructure +You can exit the connection at anytime by entering: +$ exit +logout +Connection to login.hpc.ugent.be closed. + tip "tip: Setting your Language right" + You may encounter a warning message similar to the following one during connecting: + perl: warning: Setting locale failed. + perl: warning: Please check that your locale settings: + LANGUAGE = (unset), + LC_ALL = (unset), + LC_CTYPE = "UTF-8", + LANG = (unset) + are supported and installed on your system. + perl: warning: Falling back to the standard locale ("C"). + or any other error message complaining about the locale. + This means that the correct "locale" has not yet been properly specified on your local machine. Try: + LANG= + LC_COLLATE="C" + LC_CTYPE="UTF-8" + LC_MESSAGES="C" + LC_MONETARY="C" + LC_NUMERIC="C" + LC_TIME="C" + LC_ALL= + A locale is a set of parameters that defines the user's language, country and + any special variant preferences that the user wants to see in their user + interface. Usually a locale identifier consists of at least a language + identifier and a region identifier. + Note + If you try to set a non-supported locale, then it will be automatically + set to the default. Currently the default is en_US.UFT-8 or en_US, + depending on whether your originally (non-supported) locale was UTF-8 or not. + Open the .bashrc on your local machine with your favourite editor and + add the following lines: + + $ nano ~/.bashrc + ... + export LANGUAGE="en_US.UTF-8" + export LC_ALL="en_US.UTF-8" + export LC_CTYPE="en_US.UTF-8" + export LANG="en_US.UTF-8" + ... + + tip "tip: vi" + To start entering text in vi: move to the place you want to start + entering text with the arrow keys and type "i" to switch to insert mode. You can easily exit vi by entering: ""ESC" :wq" + To exit vi without saving your changes, enter ""ESC":q!" + + + or alternatively (if you are not comfortable with the Linux editors), + again on your local machine: + + echo "export LANGUAGE=\"en_US.UTF-8\"" >> ~/.profile + echo "export LC_ALL=\"en_US.UTF-8\"" >> ~/.profile + echo "export LC_CTYPE=\"en_US.UTF-8\"" >> ~/.profile + echo "export LANG=\"en_US.UTF-8\"" >> ~/.profile + + You can now log out, open a new terminal/shell on your local machine and + reconnect to the login node, and you should not get these warnings anymore. + \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1_metadata.json new file mode 100644 index 000000000000..34db04e7ecf8 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_5.1_metadata.json @@ -0,0 +1,12 @@ +{ + "main_title": "connecting", + "subtitle": "First-Time-connection-to-the-HPC-infrastructure", + "source_file": "../../mkdocs/docs/HPC/connecting.md", + "title_depth": 2, + "directory": "connecting", + "parent_title": "connecting", + "previous_title": "connecting_paragraph_4", + "next_title": "connecting_paragraph_6", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#first-time-connection-to-the-hpc-infrastructure" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_6.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_6.1.txt new file mode 100644 index 000000000000..d872c89a0f83 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_6.1.txt @@ -0,0 +1,6 @@ +Transfer Files to/from the HPC +Before you can do some work, you'll have to transfer the files you need from your desktop or department to the cluster. At the end of a job, you might want to transfer some files back. +The preferred way to transfer files is by using an scp or sftp via the +secure OpenSSH protocol. ships with an implementation of OpenSSH, so you +don't need to install any third-party software to use it. Just open a +terminal window and jump in! diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_6.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_6.1_metadata.json new file mode 100644 index 000000000000..ea88107f71af --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_6.1_metadata.json @@ -0,0 +1,12 @@ +{ + "main_title": "connecting", + "subtitle": "Transfer-Files-tofrom-the-HPC", + "source_file": "../../mkdocs/docs/HPC/connecting.md", + "title_depth": 2, + "directory": "connecting", + "parent_title": "connecting", + "previous_title": "connecting_paragraph_5", + "next_title": "connecting_paragraph_7", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#transfer-files-tofrom-the-hpc" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_7.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_7.1.txt new file mode 100644 index 000000000000..319433fd88a1 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_7.1.txt @@ -0,0 +1,42 @@ +Transfer Files tofrom the HPC +Using scp +Secure copy or SCP is a tool (command) for securely transferring files between a local +host (= your computer) and a remote host (the HPC). It is based on the +Secure Shell (SSH) protocol. The scp command is the equivalent of the cp (i.e., +copy) command, but can copy files to or from remote machines. +It's easier to copy files directly to $VSC_DATA and $VSC_SCRATCH if +you have symlinks to them in your home directory. See +the chapter titled "Uploading/downloading/editing files", section "Symlinks for data/scratch" in the intro to Linux + for how to do this. +Open an additional terminal window and check that you're working on your +local machine. +$ hostname + +If you're still using the terminal that is connected to the HPC, close the +connection by typing "exit" in the terminal window. +For example, we will copy the (local) file "localfile.txt" to your +home directory on the HPC cluster. We first generate a small dummy +"localfile.txt", which contains the word "Hello". Use your own VSC +account, which is something like "vsc40000". Don't forget the colon (:) at the +end: if you forget it, it will just create a file named vsc40000@login.hpc.ugent.be on your +local filesystem. You can even specify where to save the file on the +remote filesystem by putting a path after the colon. +$ echo "Hello" > localfile.txt +$ ls -l +... +-rw-r--r-- 1 user staff 6 Sep 18 09:37 localfile.txt +$ scp localfile.txt vsc40000@login.hpc.ugent.be: +localfile.txt 100% 6 0.0KB/s 00:00 +Connect to the HPC via another terminal, print the working directory (to +make sure you're in the home directory) and check whether the file has +arrived: +$ pwd +/user/home/gent/vsc400/vsc40000 +$ ls -l +total 1536 +drwxrwxr-x 2 +drwxrwxr-x 2 +drwxrwxr-x 10 +-rw-r--r-- 1 +$ cat localfile.txt +Hello diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_7.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_7.1_metadata.json new file mode 100644 index 000000000000..3df8e66ceb67 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_7.1_metadata.json @@ -0,0 +1,15 @@ +{ + "main_title": "connecting", + "subtitle": "Using-scp", + "source_file": "../../mkdocs/docs/HPC/connecting.md", + "title_depth": 3, + "directory": "connecting", + "parent_title": "Transfer-Files-tofrom-the-HPC", + "links": { + "0": "https://docs.hpc.ugent.be/localhost:8000/Gent//intro-Linux/uploading_files/#symlinks-for-datascratch" + }, + "previous_title": "connecting_paragraph_6", + "next_title": "connecting_macos_paragraph_7.2", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-scp" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_7.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_7.2.txt new file mode 100644 index 000000000000..16016dc219c6 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_7.2.txt @@ -0,0 +1,37 @@ +The scp command can also be used to copy files from the cluster to your +local machine. Let us copy the remote file "intro-HPC--Gent.pdf" from your "docs" +subdirectory on the cluster to your local computer. +First, we will confirm that the file is indeed in the "docs" +subdirectory. In the terminal on the login node, enter: +$ cd ~/docs +$ ls -l +total 1536 +-rw-r--r-- 1 vsc40000 Sep 11 09:53 intro-HPC--Gent.pdf +Now we will copy the file to the local machine. On the terminal on your +own local computer, enter: +$ scp vsc40000@login.hpc.ugent.be:./docs/intro-HPC--Gent.pdf . +intro-HPC--Gent.pdf 100% 725KB 724.6KB/s 00:01 +$ ls -l +total 899 +-rw-r--r-- 1 user staff 741995 Sep 18 09:53 +-rw-r--r-- 1 user staff 6 Sep 18 09:37 localfile.txt +The file has been copied from the HPC to your local computer. +It's also possible to copy entire directories (and their contents) with +the -r flag. For example, if we want to copy the local directory +dataset to $VSC_SCRATCH, we can use the following command (assuming +you've created the scratch symlink): +scp -r dataset vsc40000@login.hpc.ugent.be:scratch +If you don't use the -r option to copy a directory, you will run into +the following error: +$ scp dataset vsc40000@login.hpc.ugent.be:scratch +dataset: not a regular file +Using sftp +The SSH File Transfer Protocol (also Secure File Transfer Protocol, or SFTP) is a network protocol that provides file access, file +transfer and file management functionalities over any reliable data +stream. It was designed as an extension of the Secure Shell protocol +(SSH) version 2.0. This protocol assumes that it is run over a secure +channel, such as SSH, that the server has already authenticated the +client, and that the identity of the client user is available to the +protocol. +The sftp is an equivalent of the ftp command, with the difference that +it uses the secure ssh protocol to connect to the clusters. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_7.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_7.2_metadata.json new file mode 100644 index 000000000000..acf4056f823f --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_7.2_metadata.json @@ -0,0 +1,12 @@ +{ + "main_title": "connecting", + "subtitle": "Using-sftp", + "source_file": "../../mkdocs/docs/HPC/connecting.md", + "title_depth": 3, + "directory": "connecting", + "parent_title": "Transfer-Files-tofrom-the-HPC", + "previous_title": "connecting_macos_paragraph_7.1", + "next_title": "connecting_macos_paragraph_7.3", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-sftp" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_7.3.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_7.3.txt new file mode 100644 index 000000000000..1ef13b80c6f0 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_7.3.txt @@ -0,0 +1,18 @@ +One easy way of starting a sftp session is +sftp vsc40000@login.hpc.ugent.be +Typical and popular commands inside an sftp session are: +| | | +|:--------------------------|:-------------------------------------------------------------------------------------| +| cd ~/exmples/fibo | Move to the examples/fibo subdirectory on the (i.e., the HPC remote machine) | +| ls | Get a list of the files in the current directory on the HPC. | +| get fibo.py | Copy the file "fibo.py" from the HPC | +| get tutorial/HPC.pdf | Copy the file "HPC.pdf" from the HPC, which is in the "tutorial" subdirectory. | +| lcd test | Move to the "test" subdirectory on your local machine. | +| lcd .. | Move up one level in the local directory. | +| lls | Get local directory listing. | +| put test.py | Copy the local file test.py to the HPC. | +| put test1.py test2.py | Copy the local file test1.py to the and rename it to test2.py. | +| bye | Quit the sftp session | +| **mget *.cc** | Copy all the remote files with extension ".cc" to the local directory. | +| **mput *.h** | Copy all the local files with extension ".h" to the HPC. | +| | | diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_7.3_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_7.3_metadata.json new file mode 100644 index 000000000000..70514bc20f5a --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_7.3_metadata.json @@ -0,0 +1,16 @@ +{ + "main_title": "connecting", + "subtitle": "Using-sftp", + "source_file": "../../mkdocs/docs/HPC/connecting.md", + "title_depth": 3, + "directory": "connecting", + "parent_title": "Transfer-Files-tofrom-the-HPC", + "links": { + "0": "https://docs.hpc.ugent.be/", + "1": "https://docs.hpc.ugent.be/" + }, + "previous_title": "connecting_macos_paragraph_7.2", + "next_title": "connecting_paragraph_8", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-sftp" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_8.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_8.1.txt new file mode 100644 index 000000000000..7fa726ab70e1 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_8.1.txt @@ -0,0 +1,25 @@ +Changing login nodes +Using a GUI (Cyberduck) +Cyberduck is a graphical alternative to the scp command. It can be +installed from https://cyberduck.io. +This is the one-time setup you will need to do before connecting: +1. After starting Cyberduck, the Bookmark tab will show up. To add a + new bookmark, click on the "+" sign on the bottom left of the + window. A new window will open. +2. In the drop-down menu on top, select "SFTP (SSH File Transfer Protocol)". +3. In the "Server" field, type in login.hpc.ugent.be. In the "Username" field, type in + your VSC account id (this looks like vsc40000). +4. Select the location of your SSH private key in the "SSH Private Key" field. +5. Finally, type in a name for the bookmark in the "Nickname" field and + close the window by pressing on the red circle in the top left + corner of the window. +To open the connection, click on the "Bookmarks" icon (which +resembles an open book) and double-click on the bookmark you just +created. +Fast file transfer for large datasets +See the section on rsync in chapter 5 of the Linux intro manual. +Changing login nodes +It can be useful to have control over which login node you are on. However, when you connect to the HPC (High-Performance Computing) system, you are directed to a random login node, which might not be the one where you already have an active session. To address this, there is a way to manually switch your active login node. +For instance, if you want to switch to the login node named gligar07.gastly.os, you can use the following command while you are connected to the gligar08.gastly.os login node on the HPC: +ssh gligar07.gastly.os +This is also possible the other way around. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_8.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_8.1_metadata.json new file mode 100644 index 000000000000..d4745200cb0c --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/connecting/connecting_macos_paragraph_8.1_metadata.json @@ -0,0 +1,15 @@ +{ + "main_title": "connecting", + "subtitle": "Using-a-GUI-(Cyberduck)", + "source_file": "../../mkdocs/docs/HPC/connecting.md", + "title_depth": 3, + "directory": "connecting", + "parent_title": "Transfer-Files-tofrom-the-HPC", + "links": { + "0": "https://docs.hpc.ugent.be/linux-tutorial/uploading_files/#copying-faster-with-rsync" + }, + "previous_title": "connecting_paragraph_7", + "next_title": "connecting_paragraph_9", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/connecting/#using-a-gui-cyberduck" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/getting_started/getting_started_macos_paragraph_1.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/getting_started/getting_started_macos_paragraph_1.1.txt new file mode 100644 index 000000000000..dfec88ca4e77 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/getting_started/getting_started_macos_paragraph_1.1.txt @@ -0,0 +1,27 @@ +Getting Connected +Getting Started +Welcome to the "Getting Started" guide. This chapter will lead you through the initial steps of logging into the HPC-UGent infrastructure and submitting your very first job. We'll also walk you through the process step by step using a practical example. +In addition to this chapter, you might find the recording of the Introduction to HPC-UGent training session to be a useful resource. +Before proceeding, read the introduction to HPC to gain an understanding of the HPC-UGent infrastructure and related terminology. +Getting Access +To get access to the HPC-UGent infrastructure, visit Getting an HPC Account. +If you have not used Linux before, +now would be a good time to follow our Linux Tutorial. +A typical workflow looks like this: +1. Connect to the login nodes +2. Transfer your files to the HPC-UGent infrastructure +3. Optional: compile your code and test it +4. Create a job script and submit your job +5. Wait for job to be executed +6. Study the results generated by your jobs, either on the cluster or + after downloading them locally. +We will walk through an illustrative workload to get you started. In this example, our objective is to train a deep learning model for recognizing hand-written digits (MNIST dataset) using TensorFlow; +see the example scripts. +Getting Connected +There are two options to connect +- Using a terminal to connect via SSH (for power users) (see First Time connection to the HPC-UGent infrastructure) +- Using the web portal +Considering your operating system is *, it should be easy to make use of the ssh command in a terminal, but the web portal will work too. +The web portal offers a convenient way to upload files and gain shell access to the HPC-UGent infrastructure from a standard web browser (no software installation or configuration required). +See shell access when using the web portal, or +connection to the HPC-UGent infrastructure when using a terminal. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/getting_started/getting_started_macos_paragraph_1.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/getting_started/getting_started_macos_paragraph_1.1_metadata.json new file mode 100644 index 000000000000..a9c3e0c7726a --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/getting_started/getting_started_macos_paragraph_1.1_metadata.json @@ -0,0 +1,25 @@ +{ + "main_title": "getting_started", + "subtitle": "Getting-Connected", + "source_file": "../../mkdocs/docs/HPC/getting_started.md", + "title_depth": 3, + "directory": "getting_started", + "parent_title": "getting_started", + "links": { + "0": "https://www.ugent.be/hpc/en/training/introhpcugent-recording", + "1": "https://docs.hpc.ugent.be/introduction", + "2": "https://docs.hpc.ugent.be/account", + "3": "https://docs.hpc.ugent.be/linux-tutorial", + "4": "https://www.tensorflow.org/", + "5": "https://github.com/hpcugent/vsc_user_docs/tree/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist", + "6": "https://docs.hpc.ugent.be/connecting/#first-time-connection-to-the-hpc-infrastructure", + "7": "https://docs.hpc.ugent.be/web_portal", + "8": "https://docs.hpc.ugent.be/web_portal", + "9": "https://docs.hpc.ugent.be/web_portal/#shell-access", + "10": "https://docs.hpc.ugent.be/connecting/#first-time-connection-to-the-hpc-infrastructure" + }, + "previous_title": null, + "next_title": "getting_started_macos_paragraph_1.2", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/getting_started/#getting-connected" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/getting_started/getting_started_macos_paragraph_1.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/getting_started/getting_started_macos_paragraph_1.2.txt new file mode 100644 index 000000000000..99501b99809d --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/getting_started/getting_started_macos_paragraph_1.2.txt @@ -0,0 +1 @@ +Make sure you can get to a shell access to the HPC-UGent infrastructure before proceeding with the next steps. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/getting_started/getting_started_macos_paragraph_1.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/getting_started/getting_started_macos_paragraph_1.2_metadata.json new file mode 100644 index 000000000000..3f34e4d8f834 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/getting_started/getting_started_macos_paragraph_1.2_metadata.json @@ -0,0 +1,12 @@ +{ + "main_title": "getting_started", + "subtitle": "Getting-Connected", + "source_file": "../../mkdocs/docs/HPC/getting_started.md", + "title_depth": 3, + "directory": "getting_started", + "parent_title": "getting_started", + "previous_title": "getting_started_macos_paragraph_1.1", + "next_title": "getting_started_paragraph_2", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/getting_started/#getting-connected" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/getting_started/getting_started_macos_paragraph_2.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/getting_started/getting_started_macos_paragraph_2.1.txt new file mode 100644 index 000000000000..438321b40a1b --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/getting_started/getting_started_macos_paragraph_2.1.txt @@ -0,0 +1,27 @@ +Submitting a job + Info + When having problems see the connection issues section on the troubleshooting page. +Transfer your files +Now that you can login, it is time to transfer files from your local computer to your home directory on the HPC-UGent infrastructure. +Download tensorflow_mnist.py +and run.sh example scripts to your computer (from here). +On your local machine you can run: +curl -OL https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist/tensorflow_mnist.py +curl -OL https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist/run.sh +Using the scp command, the files can be copied from your local host to your home directory (~) on the remote host (HPC). +scp tensorflow_mnist.py run.sh vsc40000login.hpc.ugent.be:~ +ssh vsc40000@login.hpc.ugent.be + Warning "User your own VSC account id" + + Replace vsc40000 with your VSC account id (see https://account.vscentrum.be) + Info + For more information about transfering files or scp, see tranfer files from/to hpc. +When running ls in your session on the HPC-UGent infrastructure, you should see the two files listed in your home directory (~): +$ ls ~ +run.sh tensorflow_mnist.py +When you do not see these files, make sure you uploaded the files to your home directory. +Submitting a job +Jobs are submitted and executed using job scripts. In our case run.sh can be used as a (very minimal) job script. +A job script is a shell script, a text file that specifies the resources, +the software that is used (via module load statements), +and the steps that should be executed to run the calculation. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/getting_started/getting_started_macos_paragraph_2.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/getting_started/getting_started_macos_paragraph_2.1_metadata.json new file mode 100644 index 000000000000..abfa4f07b2e7 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/macos/getting_started/getting_started_macos_paragraph_2.1_metadata.json @@ -0,0 +1,19 @@ +{ + "main_title": "getting_started", + "subtitle": "Submitting-a-job", + "source_file": "../../mkdocs/docs/HPC/getting_started.md", + "title_depth": 3, + "directory": "getting_started", + "parent_title": "getting_started", + "links": { + "0": "https://docs.hpc.ugent.be/troubleshooting/#sec:connecting-issues", + "1": "https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist/tensorflow_mnist.py", + "2": "https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist/run.sh", + "3": "https://github.com/hpcugent/vsc_user_docs/tree/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist", + "4": "https://docs.hpc.ugent.be/connecting/#transfer-files-tofrom-the-hpc" + }, + "previous_title": "getting_started_paragraph_1", + "next_title": "getting_started_paragraph_3", + "OS": "macos", + "reference_link": "https://docs.hpc.ugent.be/macOS/getting_started/#submitting-a-job" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_2.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_2.1.txt new file mode 100644 index 000000000000..f47e95e91f03 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_2.1.txt @@ -0,0 +1,64 @@ +How do SSH keys work +Since all VSC clusters use Linux as their main operating system, you +will need to get acquainted with using the command-line interface and +using the terminal (see tutorial). +A typical Windows environment does not come with pre-installed software +to connect and run command-line executables on a HPC. Some tools need to be +installed on your Windows machine first, before we can start the actual +work. +Get PuTTY: A free telnet/SSH client +We recommend to use the PuTTY tools package, which is freely available. +You do not need to install PuTTY, you can download the PuTTY and +PuTTYgen executable and run it. This can be useful in situations where +you do not have the required permissions to install software on the +computer you are using. Alternatively, an installation package is also +available. +You can download PuTTY from the official address: +https://www.chiark.greenend.org.uk/~sgtatham/putty/latest.html. You +probably want the 64-bits version. If you can install software on your +computer, you can use the "Package files", if not, you can download and +use putty.exe and puttygen.exe in the "Alternative binary files" +section. +The PuTTY package consists of several components, but we'll only use +two: +1. PuTTY: the Telnet and SSH client itself (to login, see Open a terminal) +2. PuTTYgen: an RSA and DSA key generation utility (to generate a key pair, + see Generate a public/private key pair) +Generating a public/private key pair +Before requesting a VSC account, you need to generate a pair of ssh +keys. You need 2 keys, a public and a private key. You can visualise the +public key as a lock to which only you have the key (your private key). +You can send a copy of your lock to anyone without any problems, because +only you can open it, as long as you keep your private key secure. To +generate a public/private key pair, you can use the PuTTYgen key +generator. +Start PuTTYgen.exe it and follow these steps: +1. In "Parameters" (at the bottom of the window), choose "RSA" and set the number of + bits in the key to 4096. +2. Click on "Generate". To generate the key, you must move the mouse cursor over + the PuTTYgen window (this generates some random data that PuTTYgen + uses to generate the key pair). Once the key pair is generated, your + public key is shown in the field "Public key for pasting into OpenSSH authorized_keys file". +3. Next, it is advised to fill in the "Key comment" field to make it easier + identifiable afterwards. +4. Next, you should specify a passphrase in the "Key passphrase" field and retype it in + the "Confirm passphrase" field. Remember, the passphrase protects the private key against + unauthorised use, so it is best to choose one that is not too easy + to guess but that you can still remember. Using a passphrase is not + required, but we recommend you to use a good passphrase unless you + are certain that your computer's hard disk is encrypted with a + decent password. (If you are not sure your disk is encrypted, it + probably isn't.) +5. Save both the public and private keys in a folder on your personal + computer (We recommend to create and put them in the folder + "C:\\Users\\%USERNAME%\\AppData\\Local\\PuTTY\\.ssh") with the + buttons "Save public key" and "Save private key". We recommend using the name "id_rsa.pub" for the public key, and + "id_rsa.ppk" for the private key. +6. Finally, save an "OpenSSH" version of your private key (in + particular for later "X2Go" usage, see x2go) by entering the + "Conversions" menu and selecting "Export OpenSSH key" (do not select the + "force new file format" variant). Save the file in the same location + as in the previous step with filename "id_rsa". (If there is no + "Conversions" menu, you must update your "puttygen" version. If you + want to do this conversion afterwards, you can start with loading an + existing "id_rsa.ppk" and only do this conversions export.) diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_2.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_2.1_metadata.json new file mode 100644 index 000000000000..b17309fed2af --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_2.1_metadata.json @@ -0,0 +1,18 @@ +{ + "main_title": "account", + "subtitle": "Generating-a-publicprivate-key-pair", + "source_file": "../../mkdocs/docs/HPC/account.md", + "title_depth": 3, + "directory": "account", + "parent_title": "account", + "links": { + "0": "https://docs.hpc.ugent.be/linux-tutorial", + "1": "https://docs.hpc.ugent.be/connecting/#open-a-terminal", + "2": "https://docs.hpc.ugent.be/account/#generating-a-publicprivate-key-pair", + "3": "https://docs.hpc.ugent.be/" + }, + "previous_title": "account_paragraph_1", + "next_title": "account_windows_paragraph_2.2", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/account/#generating-a-publicprivate-key-pair" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_2.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_2.2.txt new file mode 100644 index 000000000000..d0425d6738f4 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_2.2.txt @@ -0,0 +1,2 @@ +If you use another program to generate a key pair, please remember that +they need to be in the OpenSSH format to access the HPC clusters. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_2.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_2.2_metadata.json new file mode 100644 index 000000000000..3df1f160c12d --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_2.2_metadata.json @@ -0,0 +1,12 @@ +{ + "main_title": "account", + "subtitle": "Generating-a-publicprivate-key-pair", + "source_file": "../../mkdocs/docs/HPC/account.md", + "title_depth": 3, + "directory": "account", + "parent_title": "account", + "previous_title": "account_windows_paragraph_2.1", + "next_title": "account_paragraph_3", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/account/#generating-a-publicprivate-key-pair" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1.txt new file mode 100644 index 000000000000..a70356b91720 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1.txt @@ -0,0 +1,29 @@ +Using an SSH agent (optional) +It is possible to setup a SSH agent in Windows. This is an optional +configuration to help you to keep all your SSH keys (if you have +several) stored in the same key ring to avoid to type the SSH key +password each time. The SSH agent is also necessary to enable SSH hops +with key forwarding from Windows. +Pageant is the SSH authentication agent used in windows. This agent should be +available from the PuTTY installation package +https://www.chiark.greenend.org.uk/~sgtatham/putty/latest.html or as +stand alone binary package. +After the installation just start the Pageant application in Windows, +this will start the agent in background. The agent icon will be visible +from the Windows panel. +At this point the agent does not contain any private key. You should +include the private key(s) generated in the previous section Generating a public/private key pair. +1. Click on "Add key" +2. Select the private key file generated in Generating a public/private key pair ("id_rsa.ppk" by default). +3. Enter the same SSH key password used to generate the key. After this + step the new key will be included in Pageant to manage the SSH + connections. +4. You can see the SSH key(s) available in the key ring just clicking + on "View Keys". +5. You can change PuTTY setup to use the SSH agent. Open PuTTY and check + Connection > SSH > Auth > Allow agent forwarding. +Now you can connect to the login nodes as usual. The SSH agent will know +which SSH key should be used and you do not have to type the SSH +passwords each time, this task is done by Pageant agent automatically. +It is also possible to use WinSCP with Pageant, see +https://winscp.net/eng/docs/ui_pageant for more details. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json new file mode 100644 index 000000000000..a5d3804cbcc3 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_4.1_metadata.json @@ -0,0 +1,16 @@ +{ + "main_title": "account", + "subtitle": "Using-an-SSH-agent-(optional)", + "source_file": "../../mkdocs/docs/HPC/account.md", + "title_depth": 3, + "directory": "account", + "parent_title": "account", + "links": { + "0": "https://docs.hpc.ugent.be/account/#generating-a-publicprivate-key-pair", + "1": "https://docs.hpc.ugent.be/account/#generating-a-publicprivate-key-pair" + }, + "previous_title": "account_paragraph_3", + "next_title": "account_paragraph_5", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/account/#using-an-ssh-agent-optional" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_5.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_5.1.txt new file mode 100644 index 000000000000..fbf46db59dd6 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_5.1.txt @@ -0,0 +1,38 @@ +Welcome e mail +Applying for the account +Visit https://account.vscentrum.be/ +You will be redirected to our WAYF (Where Are You From) service where +you have to select your "Home Organisation". +Select "UGent" in the dropdown box and optionally select "Save my preference" +and "permanently". +Click "Confirm" +You will now be taken to the authentication page of your institute. +You will now have to log in with CAS using your UGent account. +You either have a login name of maximum 8 characters, or a (non-UGent) +email address if you are an external user. In case of problems with your +UGent password, please visit: https://password.ugent.be/. After +logging in, you may be requested to share your information. Click "Yes, +continue". +After you log in using your UGent login and password, you will be asked to +upload the file that contains your public key, i.e., the file +"id_rsa.pub" which you have generated earlier. Make sure that your +public key is actually accepted for upload, because if it is in a wrong +format, wrong type or too short, then it will be refused. +This file should have been stored in the directory +"C:\\Users\\%USERNAME%\\AppData\\Local\\PuTTY\\.ssh" +After you have uploaded your public key you will receive an e-mail with +a link to confirm your e-mail address. After confirming your e-mail +address the VSC staff will review and if applicable approve your +account. +Welcome e-mail +Within one day, you should receive a Welcome e-mail with your VSC +account details. +Dear (Username), +Your VSC-account has been approved by an administrator. +Your vsc-username is vsc40000 +Your account should be fully active within one hour. +To check or update your account information please visit +https://account.vscentrum.be/ +For further info please visit https://www.vscentrum.be/user-portal +Kind regards, +-- The VSC administrators \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_5.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_5.1_metadata.json new file mode 100644 index 000000000000..ce6bf0bda3d1 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_5.1_metadata.json @@ -0,0 +1,12 @@ +{ + "main_title": "account", + "subtitle": "Welcome-e-mail", + "source_file": "../../mkdocs/docs/HPC/account.md", + "title_depth": 3, + "directory": "account", + "parent_title": "account", + "previous_title": "account_paragraph_4", + "next_title": "account_paragraph_6", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/account/#welcome-e-mail" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1.txt new file mode 100644 index 000000000000..dcb7ae5159da --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1.txt @@ -0,0 +1,34 @@ +Computation Workflow on the HPC +Now, you can start using the HPC. You can always look up your VSC id later +by visiting https://account.vscentrum.be. +Adding multiple SSH public keys (optional) +In case you are connecting from different computers to the login nodes, +it is advised to use separate SSH public keys to do so. You should +follow these steps. +1. Create a new public/private SSH key pair from Putty. Repeat the + process described in + sectionĀ Generate a public/private key pair. +2. Go to https://account.vscentrum.be/django/account/edit +3. Upload the new SSH public key using the Add public key section. Make sure that your + public key is actually saved, because a public key will be refused + if it is too short, wrong type, or in a wrong format. +4. (optional) If you lost your key, you can delete the old key on the + same page. You should keep at least one valid public SSH key in your + account. +5. Take into account that it will take some time before the new SSH + public key is active in your account on the system; waiting for + 15-30 minutes should be sufficient. +Computation Workflow on the HPC +A typical Computation workflow will be: +1. Connect to the HPC +2. Transfer your files to the HPC +3. Compile your code and test it +4. Create a job script +5. Submit your job +6. Wait while + 1. your job gets into the queue + 2. your job gets executed + 3. your job finishes +7. Move your results +We'll take you through the different tasks one by one in the following +chapters. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1_metadata.json new file mode 100644 index 000000000000..a7930a87bce0 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/account/account_windows_paragraph_6.1_metadata.json @@ -0,0 +1,15 @@ +{ + "main_title": "account", + "subtitle": "Computation-Workflow-on-the-HPC", + "source_file": "../../mkdocs/docs/HPC/account.md", + "title_depth": 2, + "directory": "account", + "parent_title": "account", + "links": { + "0": "https://docs.hpc.ugent.be/account/#generate-a-publicprivate-key-pair" + }, + "previous_title": "account_paragraph_5", + "next_title": null, + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/account/#computation-workflow-on-the-hpc" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_2.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_2.1.txt new file mode 100644 index 000000000000..7508c49411d9 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_2.1.txt @@ -0,0 +1,48 @@ +First Time connection to the HPC infrastructure +ssh_exchange_identification: read: Connection reset by peer +First Time connection to the HPC infrastructure +The remaining content in this chapter is primarily focused for people utilizing a terminal with SSH. +If you are instead using the web portal, the corresponding chapter might be more helpful: Using the HPC-UGent web portal. +If you have any issues connecting to the HPC after you've followed these +steps, see Issues connecting to login node to troubleshoot. +Open a Terminal +You've generated a public/private key pair with PuTTYgen and have an +approved account on the VSC clusters. The next step is to setup the +connection to (one of) the HPC. +In the screenshots, we show the setup for user +"vsc20167" +to the HPC cluster via the login node "login.hpc.ugent.be". +1. Start the PuTTY executable putty.exe in your directory + C:\Program Files (x86)\PuTTY and the configuration screen will pop + up. As you will often use the PuTTY tool, we recommend adding a + shortcut on your desktop. +2. Within the category , in the field , enter the name of the + login node of the cluster (i.e., "login.hpc.ugent.be") you want to connect to. + +3. In the category "Connection > Data", in the field "Auto-login username", put in , which is your VSC + username that you have received by e-mail after your request was + approved. + +4. In the category "Connection > SSH > Auth", in the field "Private key file for authentication" click on "Browse" and select the private key + (i.e., "id_rsa.ppk") that you generated and saved above. +5. In the category "Connection > SSH > X11", click the "Enable X11 Forwarding" checkbox. +6. Now go back to , and fill in "hpcugent" in the "Saved Sessions" field and press "Save" to + store the session information. + +7. Now pressing "Open", will open a terminal window and asks for you + passphrase. + +8. If this is your first time connecting, you will be asked to verify + the authenticity of the login node. Please see + sectionĀ Warning message when first connecting to new host + on how to do this. +9. After entering your correct passphrase, you will be connected to the + login-node of the HPC. +10. To check you can now "Print the Working Directory" (pwd) and check + the name of the computer, where you have logged in (hostname): + $ pwd + /user/home/gent/vsc400/vsc40000 + $ hostname -f + gligar07.gastly.os +11. For future PuTTY sessions, just select your saved session (i.e. "hpcugent") + from the list, "Load" it and press "Open". diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_2.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_2.1_metadata.json new file mode 100644 index 000000000000..dff7738ad08e --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_2.1_metadata.json @@ -0,0 +1,17 @@ +{ + "main_title": "connecting", + "subtitle": "Open-a-Terminal", + "source_file": "../../mkdocs/docs/HPC/connecting.md", + "title_depth": 3, + "directory": "connecting", + "parent_title": "Connection-restrictions", + "links": { + "0": "https://docs.hpc.ugent.be/web_portal", + "1": "https://docs.hpc.ugent.be/troubleshooting/#issues-connecting-to-login-node", + "2": "https://docs.hpc.ugent.be/troubleshooting/#warning-message-when-first-connecting-to-new-host" + }, + "previous_title": "connecting_paragraph_1", + "next_title": "connecting_paragraph_3", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#open-a-terminal" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_3.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_3.1.txt new file mode 100644 index 000000000000..aa43af81fbdd --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_3.1.txt @@ -0,0 +1,32 @@ +First Time connection to the HPC infrastructure +Congratulations, you're on the HPC infrastructure now! +To find out where you have landed you can print the current working directory: +$ pwd +/user/home/gent/vsc400/vsc40000 +Your new private home directory is "/user/home/gent/vsc400/vsc40000". Here you can create your own +subdirectory structure, copy and prepare your applications, compile and +test them and submit your jobs on the HPC. +$ cd /apps/gent/tutorials +$ ls +Intro-HPC/ +This directory currently contains all training material for the Introduction to the HPC. More +relevant training material to work with the HPC can always be added later in +this directory. +You can now explore the content of this directory with the "ls --l" (lists long) and the "cd" (change directory) commands: +As we are interested in the use of the HPC, move further to Intro-HPC and explore the +contents up to 2 levels deep: +$ cd Intro-HPC +$ tree -L 2 +. +'-- examples + |-- Compiling-and-testing-your-software-on-the-HPC + |-- Fine-tuning-Job-Specifications + |-- Multi-core-jobs-Parallel-Computing + |-- Multi-job-submission + |-- Program-examples + |-- Running-batch-jobs + |-- Running-jobs-with-input + |-- Running-jobs-with-input-output-data + |-- example.pbs + '-- example.sh +9 directories, 5 files \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_3.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_3.1_metadata.json new file mode 100644 index 000000000000..0f620b2f3260 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_3.1_metadata.json @@ -0,0 +1,12 @@ +{ + "main_title": "connecting", + "subtitle": "First-Time-connection-to-the-HPC-infrastructure", + "source_file": "../../mkdocs/docs/HPC/connecting.md", + "title_depth": 2, + "directory": "connecting", + "parent_title": "connecting", + "previous_title": "connecting_paragraph_2", + "next_title": "connecting_paragraph_4", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#first-time-connection-to-the-hpc-infrastructure" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_5.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_5.1.txt new file mode 100644 index 000000000000..7535a44a657f --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_5.1.txt @@ -0,0 +1,33 @@ +First Time connection to the HPC infrastructure +You can exit the connection at anytime by entering: +$ exit +logout +Connection to login.hpc.ugent.be closed. + tip "tip: Setting your Language right" + You may encounter a warning message similar to the following one during connecting: + perl: warning: Setting locale failed. + perl: warning: Please check that your locale settings: + LANGUAGE = (unset), + LC_ALL = (unset), + LC_CTYPE = "UTF-8", + LANG = (unset) + are supported and installed on your system. + perl: warning: Falling back to the standard locale ("C"). + or any other error message complaining about the locale. + This means that the correct "locale" has not yet been properly specified on your local machine. Try: + LANG= + LC_COLLATE="C" + LC_CTYPE="UTF-8" + LC_MESSAGES="C" + LC_MONETARY="C" + LC_NUMERIC="C" + LC_TIME="C" + LC_ALL= + A locale is a set of parameters that defines the user's language, country and + any special variant preferences that the user wants to see in their user + interface. Usually a locale identifier consists of at least a language + identifier and a region identifier. + Note + If you try to set a non-supported locale, then it will be automatically + set to the default. Currently the default is en_US.UFT-8 or en_US, + depending on whether your originally (non-supported) locale was UTF-8 or not. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_5.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_5.1_metadata.json new file mode 100644 index 000000000000..f66c3ce38b5c --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_5.1_metadata.json @@ -0,0 +1,12 @@ +{ + "main_title": "connecting", + "subtitle": "First-Time-connection-to-the-HPC-infrastructure", + "source_file": "../../mkdocs/docs/HPC/connecting.md", + "title_depth": 2, + "directory": "connecting", + "parent_title": "connecting", + "previous_title": "connecting_paragraph_4", + "next_title": "connecting_paragraph_6", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#first-time-connection-to-the-hpc-infrastructure" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_6.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_6.1.txt new file mode 100644 index 000000000000..f1012361c2aa --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_6.1.txt @@ -0,0 +1,32 @@ +Transfer Files to/from the HPC +Before you can do some work, you'll have to transfer the files you need from your desktop or department to the cluster. At the end of a job, you might want to transfer some files back. +WinSCP +To transfer files to and from the cluster, we recommend the use of +WinSCP, a graphical file management tool which can transfer files using +secure protocols such as SFTP and SCP. WinSCP is freely available from +http://www.winscp.net. +To transfer your files using WinSCP, +1. Open the program +2. The "Login" menu is shown automatically (if it is closed, click "New Session" to open it again). Fill in the necessary fields under "Session" + 1. Click "New Site". + 2. Enter "login.hpc.ugent.be" in the "Host name" field. + 3. Enter your "vsc-account" in the "User name" field. + 4. Select "SCP" as the "file" protocol. + 5. Note that the password field remains empty. + + 6. Click "Advanced...". + 7. Click "SSH > Authentication". + 8. Select your private key in the field "Private key file". +3. Press the "Save" button, to save the session under "Session > Sites" for future access. +4. Finally, when clicking on "Login", you will be asked for your key passphrase. + +The first time you make a connection to the login node, a Security +Alert will appear and you will be asked to verify the authenticity of the +login node. +Make sure the fingerprint in the alert matches one of the following: +- ssh-rsa 2048 10:2f:31:21:04:75:cb:ed:67:e0:d5:0c:a1:5a:f4:78 +- ssh-rsa 2048 SHA256:W8Wz0/FkkCR2ulN7+w8tNI9M0viRgFr2YlHrhKD2Dd0 +- ssh-ed25519 255 19:28:76:94:52:9d:ff:7d:fb:8b:27:b6:d7:69:42:eb +- ssh-ed25519 256 SHA256:8AJg3lPN27y6i+um7rFx3xoy42U8ZgqNe4LsEycHILA +- ssh-ecdsa 256 e6:d2:9c:d8:e7:59:45:03:4a:1f:dc:96:62:29:9c:5f +- ssh-ecdsa 256 SHA256:C8TVx0w8UjGgCQfCmEUaOPxJGNMqv2PXLyBNODe5eOQ diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_6.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_6.1_metadata.json new file mode 100644 index 000000000000..8ea3fdc31070 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_6.1_metadata.json @@ -0,0 +1,12 @@ +{ + "main_title": "connecting", + "subtitle": "WinSCP", + "source_file": "../../mkdocs/docs/HPC/connecting.md", + "title_depth": 3, + "directory": "connecting", + "parent_title": "First-Time-connection-to-the-HPC-infrastructure", + "previous_title": "connecting_paragraph_5", + "next_title": "connecting_windows_paragraph_6.2", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#winscp" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_6.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_6.2.txt new file mode 100644 index 000000000000..b52c614f263e --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_6.2.txt @@ -0,0 +1,7 @@ +If it does, press Yes, if it doesn't, please contact hpc@ugent.be. +Note: it is possible that the ssh-ed25519 fingerprint starts with ssh-ed25519 255 +rather than ssh-ed25519 256 (or vice versa), depending on the PuTTY version you are using. +It is safe to ignore this 255 versus 256 difference, but the part after should be +identical. +Now, try out whether you can transfer an arbitrary file from your local +machine to the HPC and back. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_6.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_6.2_metadata.json new file mode 100644 index 000000000000..3da894d85e78 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_6.2_metadata.json @@ -0,0 +1,12 @@ +{ + "main_title": "connecting", + "subtitle": "WinSCP", + "source_file": "../../mkdocs/docs/HPC/connecting.md", + "title_depth": 3, + "directory": "connecting", + "parent_title": "First-Time-connection-to-the-HPC-infrastructure", + "previous_title": "connecting_windows_paragraph_6.1", + "next_title": "connecting_paragraph_7", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#winscp" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_8.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_8.1.txt new file mode 100644 index 000000000000..b16b119665b7 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_8.1.txt @@ -0,0 +1,8 @@ +Changing login nodes +Fast file transfer for large datasets +See the section on rsync in chapter 5 of the Linux intro manual. +Changing login nodes +It can be useful to have control over which login node you are on. However, when you connect to the HPC (High-Performance Computing) system, you are directed to a random login node, which might not be the one where you already have an active session. To address this, there is a way to manually switch your active login node. +For instance, if you want to switch to the login node named gligar07.gastly.os, you can use the following command while you are connected to the gligar08.gastly.os login node on the HPC: +ssh gligar07.gastly.os +This is also possible the other way around. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_8.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_8.1_metadata.json new file mode 100644 index 000000000000..0ee95dd3ff4a --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/connecting/connecting_windows_paragraph_8.1_metadata.json @@ -0,0 +1,15 @@ +{ + "main_title": "connecting", + "subtitle": "Changing-login-nodes", + "source_file": "../../mkdocs/docs/HPC/connecting.md", + "title_depth": 2, + "directory": "connecting", + "parent_title": "connecting", + "links": { + "0": "https://docs.hpc.ugent.be/linux-tutorial/uploading_files/#copying-faster-with-rsync" + }, + "previous_title": "connecting_paragraph_7", + "next_title": "connecting_paragraph_9", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/connecting/#changing-login-nodes" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/getting_started/getting_started_windows_paragraph_1.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/getting_started/getting_started_windows_paragraph_1.1.txt new file mode 100644 index 000000000000..eaf8a9391a11 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/getting_started/getting_started_windows_paragraph_1.1.txt @@ -0,0 +1,27 @@ +Getting Connected +Getting Started +Welcome to the "Getting Started" guide. This chapter will lead you through the initial steps of logging into the HPC-UGent infrastructure and submitting your very first job. We'll also walk you through the process step by step using a practical example. +In addition to this chapter, you might find the recording of the Introduction to HPC-UGent training session to be a useful resource. +Before proceeding, read the introduction to HPC to gain an understanding of the HPC-UGent infrastructure and related terminology. +Getting Access +To get access to the HPC-UGent infrastructure, visit Getting an HPC Account. +If you have not used Linux before, +now would be a good time to follow our Linux Tutorial. +A typical workflow looks like this: +1. Connect to the login nodes +2. Transfer your files to the HPC-UGent infrastructure +3. Optional: compile your code and test it +4. Create a job script and submit your job +5. Wait for job to be executed +6. Study the results generated by your jobs, either on the cluster or + after downloading them locally. +We will walk through an illustrative workload to get you started. In this example, our objective is to train a deep learning model for recognizing hand-written digits (MNIST dataset) using TensorFlow; +see the example scripts. +Getting Connected +There are two options to connect +- Using a terminal to connect via SSH (for power users) (see First Time connection to the HPC-UGent infrastructure) +- Using the web portal +Considering your operating system is *, it is recommended to use the web portal. +The web portal offers a convenient way to upload files and gain shell access to the HPC-UGent infrastructure from a standard web browser (no software installation or configuration required). +See shell access when using the web portal, or +connection to the HPC-UGent infrastructure when using a terminal. diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/getting_started/getting_started_windows_paragraph_1.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/getting_started/getting_started_windows_paragraph_1.1_metadata.json new file mode 100644 index 000000000000..d17b01decef8 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/getting_started/getting_started_windows_paragraph_1.1_metadata.json @@ -0,0 +1,25 @@ +{ + "main_title": "getting_started", + "subtitle": "Getting-Connected", + "source_file": "../../mkdocs/docs/HPC/getting_started.md", + "title_depth": 3, + "directory": "getting_started", + "parent_title": "getting_started", + "links": { + "0": "https://www.ugent.be/hpc/en/training/introhpcugent-recording", + "1": "https://docs.hpc.ugent.be/introduction", + "2": "https://docs.hpc.ugent.be/account", + "3": "https://docs.hpc.ugent.be/linux-tutorial", + "4": "https://www.tensorflow.org/", + "5": "https://github.com/hpcugent/vsc_user_docs/tree/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist", + "6": "https://docs.hpc.ugent.be/connecting/#first-time-connection-to-the-hpc-infrastructure", + "7": "https://docs.hpc.ugent.be/web_portal", + "8": "https://docs.hpc.ugent.be/web_portal", + "9": "https://docs.hpc.ugent.be/web_portal/#shell-access", + "10": "https://docs.hpc.ugent.be/connecting/#first-time-connection-to-the-hpc-infrastructure" + }, + "previous_title": null, + "next_title": "getting_started_windows_paragraph_1.2", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/getting_started/#getting-connected" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/getting_started/getting_started_windows_paragraph_1.2.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/getting_started/getting_started_windows_paragraph_1.2.txt new file mode 100644 index 000000000000..99501b99809d --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/getting_started/getting_started_windows_paragraph_1.2.txt @@ -0,0 +1 @@ +Make sure you can get to a shell access to the HPC-UGent infrastructure before proceeding with the next steps. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/getting_started/getting_started_windows_paragraph_1.2_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/getting_started/getting_started_windows_paragraph_1.2_metadata.json new file mode 100644 index 000000000000..7c463f0f6106 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/getting_started/getting_started_windows_paragraph_1.2_metadata.json @@ -0,0 +1,12 @@ +{ + "main_title": "getting_started", + "subtitle": "Getting-Connected", + "source_file": "../../mkdocs/docs/HPC/getting_started.md", + "title_depth": 3, + "directory": "getting_started", + "parent_title": "getting_started", + "previous_title": "getting_started_windows_paragraph_1.1", + "next_title": "getting_started_paragraph_2", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/getting_started/#getting-connected" +} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/getting_started/getting_started_windows_paragraph_2.1.txt b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/getting_started/getting_started_windows_paragraph_2.1.txt new file mode 100644 index 000000000000..5c2b11ef1114 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/getting_started/getting_started_windows_paragraph_2.1.txt @@ -0,0 +1,21 @@ +Submitting a job + Info + When having problems see the connection issues section on the troubleshooting page. +Transfer your files +Now that you can login, it is time to transfer files from your local computer to your home directory on the HPC-UGent infrastructure. +Download tensorflow_mnist.py +and run.sh example scripts to your computer (from here). +The HPC-UGent web portal provides a file browser that allows uploading files. +For more information see the file browser section. +Upload both files (run.sh and tensorflow-mnist.py) to your home directory and go back to your shell. + Info + As an alternative, you can use WinSCP (see our section) +When running ls in your session on the HPC-UGent infrastructure, you should see the two files listed in your home directory (~): +$ ls ~ +run.sh tensorflow_mnist.py +When you do not see these files, make sure you uploaded the files to your home directory. +Submitting a job +Jobs are submitted and executed using job scripts. In our case run.sh can be used as a (very minimal) job script. +A job script is a shell script, a text file that specifies the resources, +the software that is used (via module load statements), +and the steps that should be executed to run the calculation. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/getting_started/getting_started_windows_paragraph_2.1_metadata.json b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/getting_started/getting_started_windows_paragraph_2.1_metadata.json new file mode 100644 index 000000000000..e0616ff60e12 --- /dev/null +++ b/scripts/HPC_chatbot_preprocessor/parsed_mds/os_specific/windows/getting_started/getting_started_windows_paragraph_2.1_metadata.json @@ -0,0 +1,21 @@ +{ + "main_title": "getting_started", + "subtitle": "Submitting-a-job", + "source_file": "../../mkdocs/docs/HPC/getting_started.md", + "title_depth": 3, + "directory": "getting_started", + "parent_title": "getting_started", + "links": { + "0": "https://docs.hpc.ugent.be/troubleshooting/#sec:connecting-issues", + "1": "https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist/tensorflow_mnist.py", + "2": "https://raw.githubusercontent.com/hpcugent/vsc_user_docs/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist/run.sh", + "3": "https://github.com/hpcugent/vsc_user_docs/tree/main/mkdocs/docs/HPC/examples/Getting_Started/tensorflow_mnist", + "4": "https://login.hpc.ugent.be", + "5": "https://docs.hpc.ugent.be/web_portal/#file-browser", + "6": "https://docs.hpc.ugent.be/connecting/#winscp" + }, + "previous_title": "getting_started_paragraph_1", + "next_title": "getting_started_paragraph_3", + "OS": "windows", + "reference_link": "https://docs.hpc.ugent.be/Windows/getting_started/#submitting-a-job" +} \ No newline at end of file From 22b62de1087154851d8d0a78dc08f50145a6a134 Mon Sep 17 00:00:00 2001 From: EwDa291 Date: Fri, 30 Aug 2024 14:46:33 +0200 Subject: [PATCH 145/145] removing unnecessary files --- scripts/HPC_chatbot_preprocessor/README.md | 196 --- .../chatbot_parser.py | 1236 ----------------- .../HPC_chatbot_preprocessor/requirements.txt | 4 - .../generic/tps1/tps1_paragraph_1.txt | 6 - .../tps1/tps1_paragraph_1_metadata.json | 15 - .../generic/tps1/tps1_paragraph_3.txt | 3 - .../tps1/tps1_paragraph_3_metadata.json | 12 - .../linux/tps1/tps1_linux_paragraph_2.1.txt | 4 - .../tps1_linux_paragraph_2.1_metadata.json | 15 - .../linux/tps1/tps1_linux_paragraph_2.2.txt | 3 - .../tps1_linux_paragraph_2.2_metadata.json | 12 - .../macos/tps1/tps1_macos_paragraph_2.1.txt | 4 - .../tps1_macos_paragraph_2.1_metadata.json | 15 - .../macos/tps1/tps1_macos_paragraph_2.2.txt | 3 - .../tps1_macos_paragraph_2.2_metadata.json | 12 - .../tps1/tps1_windows_paragraph_2.1.txt | 7 - .../tps1_windows_paragraph_2.1_metadata.json | 15 - .../tps1/tps1_windows_paragraph_2.2.txt | 6 - .../tps1_windows_paragraph_2.2_metadata.json | 12 - .../tests/test_files/ftps/tps1.md | 43 - .../tts1/Main-title/Subtitle-1/Subtitle-1.txt | 2 - .../Subtitle-1/Subtitle-1_metadata.json | 12 - .../Main-title/Subtitle-5-g/Subtitle-5-g.txt | 1 - .../Subtitle-5-g/Subtitle-5-g_metadata.json | 12 - .../Main-title/Subtitle-2-g/Subtitle-2-g.txt | 4 - .../Subtitle-2-g/Subtitle-2-g_metadata.json | 15 - .../Subtitle-4-l&m/Subtitle-4-l&m.txt | 3 - .../Subtitle-4-l&m_metadata.json | 15 - .../Main-title/Subtitle-2-g/Subtitle-2-g.txt | 4 - .../Subtitle-2-g/Subtitle-2-g_metadata.json | 15 - .../Subtitle-4-l&m/Subtitle-4-l&m.txt | 3 - .../Subtitle-4-l&m_metadata.json | 15 - .../Main-title/Subtitle-2-g/Subtitle-2-g.txt | 4 - .../Subtitle-2-g/Subtitle-2-g_metadata.json | 15 - .../Subtitle-3-w/Subtitle-3-w.txt | 3 - .../Subtitle-3-w/Subtitle-3-w_metadata.json | 15 - .../tests/test_files/ftts/tts1.md | 31 - .../if_mangler_1_input.md | 4 - .../if_mangler_1_output.md | 4 - .../if_mangler_2_input.md | 7 - .../if_mangler_2_output.md | 7 - .../if_mangler_3_input.md | 6 - .../if_mangler_3_output.md | 6 - .../if_mangler_4_input.md | 4 - .../if_mangler_4_output.md | 4 - .../if_mangler_5_input.md | 11 - .../if_mangler_5_output.md | 11 - .../if_mangler_6_input.md | 8 - .../if_mangler_6_output.md | 8 - .../if_mangler_7_input.md | 9 - .../if_mangler_7_output.md | 9 - .../if_mangler_test_files/if_mangler_input.md | 55 - .../if_mangler_output.md | 55 - .../tests/test_files/list_file/list_test.md | 15 - .../tests/test_full_script.py | 68 - .../tests/test_if_mangler.py | 32 - .../tests/test_insert_links.py | 31 - .../tests/test_links.py | 69 - .../tests/test_lists.py | 27 - .../tests/test_make_valid_title.py | 14 - .../tests/test_replace_markdown_markers.py | 46 - .../tests/test_write_metadata.py | 15 - 62 files changed, 2317 deletions(-) delete mode 100644 scripts/HPC_chatbot_preprocessor/README.md delete mode 100644 scripts/HPC_chatbot_preprocessor/chatbot_parser.py delete mode 100644 scripts/HPC_chatbot_preprocessor/requirements.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/tps1.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w.txt delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w_metadata.json delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/tts1.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_input.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_output.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_input.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_output.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_input.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_output.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_input.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_output.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_input.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_output.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_input.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_output.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_input.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_output.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_input.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_output.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_files/list_file/list_test.md delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_full_script.py delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_insert_links.py delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_links.py delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_lists.py delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_replace_markdown_markers.py delete mode 100644 scripts/HPC_chatbot_preprocessor/tests/test_write_metadata.py diff --git a/scripts/HPC_chatbot_preprocessor/README.md b/scripts/HPC_chatbot_preprocessor/README.md deleted file mode 100644 index 6cfd9be82315..000000000000 --- a/scripts/HPC_chatbot_preprocessor/README.md +++ /dev/null @@ -1,196 +0,0 @@ -# Chatbot parser - -`chatbot_parser.py` is a script that transforms the markdown sourcefiles into a structured directory as input for a chatbot. - -## Usage - -The script can be ran in a shell environment with the following command: - -```shell -python chatbot_parser.py -``` - -This command has the following possible options: - -```shell -chatbot_parser.py [-h] -src SOURCE -dst DESTINATION [-st] [-pl MIN_PARAGRAPH_LENGTH] [-td MAX_TITLE_DEPTH] [-l] [-dd] -``` - -### Options - -#### `h`/`help` - -Display the help message - -#### `src`/`source` - -This is a required option that specifies the source directory of the input files for the script. This location is also used to look for jinja templates when using jinja to parse the source files (such as the `macros` directory within `vsc_user_docs/mkdocs/docs/HPC`). - -#### `dst`/`destination` - -This is a required option that specifies where the output of the script should be written. The script also generates extra intermediate subdirectories, so subdirectories with the following names shouldn't be present in the destination directory: `parsed_mds`, `copies` and `if_mangled_files`. If any of these pose a problem, the name of the intermediate subdirectory used for the script can be changed in the macros at the top of the script. - -#### `st`/`split_on_titles` - -Including this option will split the source files based on the titles and subtitles in the markdown text. Not including this option will split the text on paragraphs with a certain minimum length. - -#### `pl`/`min_paragraph_length` - -This option allows the user to configure the minimum length a paragraph must be. Some deviations from this minimum length are possible (for example at the end of a file). The default value for this minimum paragraph length is 512 tokens. This options only works if `split_on_titles` is not enabled. - -#### `td`/`max_title_depth` - -This option allows the user to configure the maximum "title depth" (the amount of `#` in front) to be used as borders between sections if `split_on_titles` is enabled. The default value is 4. - -#### `l`/`links` - -Some of the sourcefiles might contain links. Including this option will retain the links in the plaintext. If this option is not included, the links will be dropped from the plaintext. - -#### `dd`/`deep_directories` - -Including this option will make the script generate a "deep directory" where every title encountered will be made into a subdirectory of its parent title (So for example a title with three `#`s will be made a subdirectory of the most recent title with two `#`s). This option only works if `split_on_titles` is enabled. - -## Generated file structure - -The generated directory structure is written as a subdirectory of `parsed_mds`. In `parsed_mds`, two subdirectories can be found: - -- `generic` contains the parts of the markdown sources that were non-OS-specific -- `os_specific` contains the parts of the markdown sources that were OS-specific - -Within `os_specific` a further distinction is made for each of the three possible operating systems included in the documentation. - -Both the generic and each of the three os-specific directories then contain a directory for each source file. - -If the option `deep_directories` is not enabled, all paragraphs of the source file and their corresponding metadata will be saved in this directory. The (processed) plaintext of the paragraph is written to a `.txt` file and the metadata is written to a `.json` file. - -If the option `deep_directories` is enabled, the directory of each source file will contain a subdirectory structure corresponding to the structure of the subtitles at different levels in the source file. Each subtitle in the source file corresponds to a directory nested in the directory of its parent title (So for example a title with three `#`s will be made a subdirectory of the most recent title with two `#`s). - -Finally, each of these subtitle-specific subdirectories contains a `.txt` file with the (processed) plaintext of that section and a `.json` file with the metadata of that section. - -## Requirements - -- The required Python packages are listed in `requirements.txt` - -## Restrictions on source-files - -Due to the nature of the script, some restrictions should be taken into account about the markdown files it can use as input. - -### Nested if structures - -The script uses the if-structures in the source-files to split the documentation into general documentation and os-specific documentation. As such it needs to keep track of which types of if-structures (os-related/non-os-related) it is reading from. When using certain nested if-structures, this will cause problems. The supported nested if-structures are determined by the macros `NON_OS_IF`, `NON_OS_IF_IN_OS_IF`, `OS_IF` and `OS_IF_IN_OS_IF`. So respectively a non-os-related if-structure, a non-os-related if nested in an os-related one, an os-related if-structure and an os-related if-structure nested in another os-related if-structure. All of these are allowed to be nested in an undetermined amount of non-os-related if-structures, but no non-os-related if structures should be nested in them. It is also not allowed to nest any of the allowed structures in more os-related if-structures. - -#### Examples of valid and invalid if-structures - -##### Allowed - -###### non-os-related in os-related - -This is an example of one of the basic allowed if-structures (`NON_OS_IF_IN_OS_IF`) - -``` -if OS == windows: - if site == Gent: - ... - endif -endif -``` - -###### os-related in os-related in non-os-related - -This is an example of the basic allowed if-structure `OS_IF_IN_OS_IF` nested in a non-os-specific if. - -``` -if site == Gent: - if OS == windows: - ... - else: - if OS == Linux: - ... - endif - endif -endif -``` - -##### Not allowed - -###### non-os-related in os-related in os-related - -This is an example of a non-os-related if-structure nested in one of the basic allowed if-structures (`OS_IF_IN_OS_IF`). - -``` -if OS != windows: - if OS == Linux: - if site == Gent: - ... - endif - endif -endif -``` - -This will result in the parser "forgetting" it opened an os-specific if-statement with OS != windows and not properly closing it. - -###### os-related in non-os-related in os-related - -This is an example of the basic allowed if-structure `OS_IF` (indirectly) nested in an os-specific if-structure. - -``` -if OS != windows: - if site == Gent: - if OS == Linux: - ... - endif - endif -endif -``` - -This will also result in the parser "forgetting" it opened an os-specific if-statement with OS != windows and not properly closing it. - -### Non OS-related if-statements - -Due to the way jinja parses the sourcefiles, the script slightly alters non os-specific if-statements as well. It expects if-statements of the following form: - -``` -{%- if site == gent %} -{% if site != (gent or brussel) %} -``` - -All spaces and the dash are optional. City names don't need to be fully lowercase since the parser will capitalize them properly anyway. - -### html syntax - -The input shouldn't contain any html syntax. While some failsafes are in place, the script isn't made with the use case of handling html syntax in mind. - -### Comments - -Any comments within the markdown files (for example TODO's) should follow the following syntax: - -``` - -``` - and should be limited to one line. - -Comments can be written in such a way that the script will keep them as input for the bot. To do that, the marker `INPUT_FOR_BOT` should be put in front of the content of the comment as such. - -``` - -``` - -This will be reworked to - -``` -your comment for the bot -``` - -in the final output. - -### Long filepaths - -Due to the nature of this script, it can generate large directories with very long names if `deep_directories` is enabled. Depending on the operating system, this can cause problems with filepaths being to long, resulting in files not being able to open. A possible fix for this is to make sure the filepath to where the script is located is not too long. Another solution is lowering the `max_title_depth` or disabling `deep_directories`. - -### Markdown lists - -The parser is made in a way to detect lists and not split them in multiple paragraphs. The kinds of lists it can detect is all lists with denominators `-`, `+`, `*` and list indexed with numbers or letters (one letter per list entry). It can handle list entries being spread out over multiple lines if there is an indentation of at least two spaces. It can also handle multiple paragraph list entries in this way, as long as the indentation stays. - -### Links - -Part of the metadata of the parser are links. In order for the links to be built up in the right way, links to external sites should always start with either `https://` or `http://`. diff --git a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py b/scripts/HPC_chatbot_preprocessor/chatbot_parser.py deleted file mode 100644 index 24e0b287a0a4..000000000000 --- a/scripts/HPC_chatbot_preprocessor/chatbot_parser.py +++ /dev/null @@ -1,1236 +0,0 @@ -#!/usr/bin/env python3 - -import argparse -import copy -import json -import os -import re -import shutil -import tiktoken -import yaml -from itertools import chain, tee, zip_longest -from pathlib import Path -from jinja2 import FileSystemLoader, Environment, ChoiceLoader, FunctionLoader, Template - -#################### define macro's #################### -# options -SOURCE_DIRECTORY = "SOURCE_DIRECTORY" -DESTINATION_DIRECTORY = "DESTINATION_DIRECTORY" -SPLIT_ON_TITLES = "SPLIT_ON_TITLES" -MIN_PARAGRAPH_LENGTH = "MIN_PARAGRAPH_LENGTH" -MAX_TITLE_DEPTH = "MAX_TITLE_DEPTH" -INCLUDE_LINKS_IN_PLAINTEXT = "INCLUDE_LINKS_IN_PLAINTEXT" -SPLIT_ON_PARAGRAPHS = "SPLIT_ON_PARAGRAPHS" -DEEP_DIRECTORIES = "DEEP_DIRECTORIES" -VERBOSE = "VERBOSE" - -# directories -PARSED_MDS = "parsed_mds" -COPIES = "copies" -IF_MANGLED_FILES = "if_mangled_files" -LINUX_TUTORIAL = "linux-tutorial" -RETURN_DIR = ".." -MKDOCS_DIR = "mkdocs" -DOCS_DIR = "docs" -HPC_DIR = "HPC" -EXTRA_DIR = "extra" -GENERIC_DIR = "generic" -OS_SPECIFIC_DIR = "os_specific" -MACROS = "macros" - -# OSes -LINUX = "linux" -WINDOWS = "windows" -MACOS = "macos" -GENERIC = "generic" -LINK_OS = {LINUX: "Linux", WINDOWS: "Windows", MACOS: "macOS"} # OS needs different capitalisation for use in links - -# urls -REPO_URL = 'https://github.com/hpcugent/vsc_user_docs' -DOCS_URL = "https://docs.hpc.ugent.be" - -# OS-related if-states -ACTIVE = "active" -INACTIVE = "inactive" - -# if mangler states -NON_OS_IF = 0 -NON_OS_IF_IN_OS_IF = 1 -OS_IF = 2 -OS_IF_IN_OS_IF = 3 - -# if mangler macros -IF_MANGLED_PART = "-if-" - -# actions -DONE = "done" -WRITE_TEXT = "write_text" -CHECK_EXTRA_MESSAGE = "check_extra_message" -WRITE_TEXT_AND_CHECK_EXTRA_MESSAGE = "write_text_and_check_extra_message" - -# Metadata attributes -SOURCE_FILE = "source_file" -MAIN_TITLE = "main_title" -SUBTITLE = "subtitle" -TITLE_DEPTH = "title_depth" -DIRECTORY = "directory" -LINKS = "links" -PARENT_TITLE = "parent_title" -PREVIOUS_SUBTITLE = "previous_title" -NEXT_SUBTITLE = "next_title" -METADATA_OS = "OS" -REFERENCE_LINK = "reference_link" - -# if-structure components -IF = "if" -ELSE = "else" -ENDIF = "endif" - -# link indicator -LINK_MARKER = r'§link§link§' - -# HTML tags -HTML_TAGS = ["pre", "b", "code", "sub", "br", "center", "p", "div", "u", "p", "i", "tt", "a", "t", "span"] # make sure these are always lowercase - -# regex patterns -IF_MANGLED_PATTERNS = { - IF: r'({' + IF_MANGLED_PART + r'%[-\s]*if\s+OS\s*[!=]=\s*.+?[-\s]*%' + IF_MANGLED_PART + '})', - ELSE: r'({' + IF_MANGLED_PART + r'%\s*-?\s*else\s*-?\s*%' + IF_MANGLED_PART + '})', - ENDIF: r'({' + IF_MANGLED_PART + r'%\s*-?\s*endif\s*-?\s*%' + IF_MANGLED_PART + '})' - } - -# filenames (and parts of filenames) -TEMP_JINJA_FILE = "jinja_file.txt" -_PARAGRAPH_ = "_paragraph_" -METADATA_EXTENSION = "_metadata" - -# Marker for comments for the bot -INPUT_FOR_BOT = "INPUT_FOR_BOT: " - -# Standard strings for verbose output -LINE = "------------------------------------------------------------------------------------------------------\n" - - -################### define functions ################### - -def check_for_title(line, in_code_block, curr_dirs, options): - """ - function that checks for titles in the current line. Used by split_text to split the text among the subtitles - - :param line: the current line to be checked for a title - :param in_code_block: boolean indicating whether the current line is part of a codeblock to be sure comments aren't counted as titles - :param curr_dirs: the current working directories for each level of subtitle, to be updated when a new title is found - :param options: dictionary containing the options given by the user - :return title_length: The amount of hashtags in front of the title on the current line - """ - # detect titles - match = re.match(r'^#+ ', line) - if match and len(match.group(0)) <= options[MAX_TITLE_DEPTH] + 1 and not in_code_block: - title_length = len(match.group(0)) - 1 - if options[DEEP_DIRECTORIES]: - curr_dirs[title_length] = os.path.join(curr_dirs[title_length - 1], make_valid_title(line[title_length + 1:-1].replace(' ', '-'))) - - # update the higher order current directories - for i in range(title_length + 1, options[MAX_TITLE_DEPTH] + 1): - curr_dirs[i] = curr_dirs[title_length] - - return title_length - else: - return 0 - - -def make_valid_link(link, main_title, is_linux_tutorial): - """ - Function that converts a string to a valid link to be used in the metadata - - :param link: the input string to be turned into a valid link - :param main_title: the main title of the file that contains the link - :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial - :return link: the valid link - """ - - # ugly fix for problem with links - linux_tutorial_files = ["beyond_the_basics", "common_pitfalls", "getting_started", "hpc_infrastructure", "index", "manipulating_files_and_directories", "navigating", "uploading_files"] - if is_linux_tutorial and any([linux_tutorial_files[i] in link for i in range(len(linux_tutorial_files))]): - linux_part = LINUX_TUTORIAL + '/' - else: - linux_part = "" - - if link.startswith('http://') or link.startswith('https://') or link.startswith('mailto:'): - pass - else: - if link.startswith("./"): - link = link.replace('./', '') - elif link.startswith("../"): - link = link.replace('../', '') - - if link.startswith("#"): - link = DOCS_URL + '/' + linux_part + main_title + "/" + link - elif link.endswith(".md") and ("/" not in link or "." not in link.split("/")[0]): - link = DOCS_URL + '/' + linux_part + link.replace(".md", "") - elif '.md#' in link: - link = DOCS_URL + '/' + linux_part + link.replace(".md", "/") - else: - link = DOCS_URL + '/' + linux_part + link - - link = link.replace('index/', '').replace('/index', '') - - return link - - -def replace_markdown_markers(curr_line, linklist, in_code_block, main_title, is_linux_tutorial): - """ - function that replaces certain markdown structures with the equivalent used on the website - - :param curr_line: the current line on which markdown structures need to be replaced - :param linklist: the list used to store links that need to be printed at the end of the file - :param in_code_block: boolean indicating whether the current line is part of a code block - :param main_title: the main title of the file that is being processed - :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial - :return curr_line: the adapted current line - :return linklist: the updated linklist - """ - - # replace images with an empty line - if re.search(r'(?i)!\[image]\(.*?\)', curr_line) or re.search(r'!\[.*?]\(img/.*?\.png\)', curr_line): - curr_line = "" - - # replace links with a reference - matches = re.findall(r'\[(.*?)]\((.*?)\)', curr_line) - if matches: - for match in matches: - curr_line = curr_line.replace(f"[{match[0]}]({match[1]})", match[0] + LINK_MARKER + str(len(linklist)) + LINK_MARKER) - - linklist.append(make_valid_link(match[1], main_title, is_linux_tutorial)) - - # codeblock (with ``` -> always stands on a separate line, so line can be dropped) - if '```' in curr_line: - curr_line = "" - - # structures within <> - match = re.findall(r'<(.*?)>', curr_line) - if match: - for i, content in enumerate(match): - html_tags_variations = list(chain.from_iterable([[element, element + "/", "/" + element] for element in HTML_TAGS])) - html_tags_style = [element + " style=.*" for element in HTML_TAGS] - - # add references for every link of format
- if re.search(r'a href=.*', content): - link = content[7:] - curr_line = re.sub(f'<{content}>', LINK_MARKER + str(len(linklist)) + LINK_MARKER, curr_line) - linklist.append(link) - - # drop the syntax words - elif content.lower() in html_tags_variations: - curr_line = re.sub(f'<{content}>', "", curr_line) - - # drop the version of the HTML_TAGS followed by " style=" - elif any(re.match(pattern, content) for pattern in html_tags_style): - curr_line = re.sub(r'<.*?>', "", curr_line) - - # keep comments for bot - elif re.fullmatch(r'!--' + INPUT_FOR_BOT + r'.*?--', content): - curr_line = re.sub(r'', lambda m: m.group(1), curr_line) - - # drop comments - elif re.fullmatch(r'!--.*?--', content): - curr_line = re.sub(r'<.*?>', "", curr_line) - - # drop the <> around links - elif re.match(r'http://', content) or re.match(r'https://', content): - curr_line = re.sub(r'<' + content + '>', content, curr_line ) - - # keep the rest - else: - pass - - # structures with !!! (info, tips, warnings) - if '!!!' in curr_line: - curr_line = re.sub(r'!!!', "", curr_line) - - # structures with ??? (collapsable admonitions) - if '???' in curr_line: - curr_line = re.sub(r'\?\?\?', "", curr_line) - - # get rid of other indicators (`, *, +, _) - if not in_code_block: - - backquotes = re.findall(r'`(.*?)`', curr_line) - if backquotes: - for i, content in enumerate(backquotes): - curr_line = curr_line.replace(f"`{content}`", content) - - asterisks = re.findall(r'(?' in line) ^ ('' in line)): - in_code_block = not in_code_block - if options[VERBOSE]: - if in_code_block: - print("Detected start of a codeblock, not registering titles") - else: - print("Detected end of codeblock, registering titles again") - - # only split up if current line is in a fully non-os-specific section - if in_if_statement == 0: - - title_level = check_for_title(line, in_code_block, curr_dirs, options) - - # line is a title with a maximum depth of 4 - if title_level > 0: - if after_first_title: - - # write text of previous file - if previous_contained_if: - paragraphs_os_text[title] = current_paragraph - if options[VERBOSE]: - print("Saved os-specific chunk with temporary title: " + title + "\n") - else: - paragraphs_os_free_text[title] = current_paragraph - if options[VERBOSE]: - print("Saved generic chunk with title: " + title + "\n") - - # write metadata of previous file - paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, last_dir, options[SOURCE_DIRECTORY] + '/' + main_title + '.md') - - # make a new title - title = make_valid_title(line[title_level + 1:-1]) - - # create an entry for the file in the paragraphs text dictionary - current_paragraph = "" - - after_first_title = True - subtitle_order.append(title) - - # reset link_list - link_list = [] - - previous_contained_if = False - - # line is not a title - elif after_first_title: - line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title, is_linux_tutorial) - if line != "\n": - current_paragraph += line - - # keep track of title level and directory to write to metadata upon discovering a new subtitle - if title_level > 0: - last_title_level = title_level - last_dir = curr_dirs[last_title_level] - else: - previous_contained_if = True - line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title, is_linux_tutorial) - if line != "\n": - current_paragraph += line - - # write dictionaries for the last file - if previous_contained_if: - paragraphs_os_text[title] = current_paragraph - if options[VERBOSE]: - print("Saved os-specific chunk with temporary title: " + title + "\n") - else: - paragraphs_os_free_text[title] = current_paragraph - if options[VERBOSE]: - print("Saved generic chunk with title: " + title + "\n") - paragraphs_metadata[title] = write_metadata(main_title, title, link_list, last_title_level, curr_dirs[last_title_level], options[SOURCE_DIRECTORY] + '/' + main_title + '.md') - - return paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order - - -def split_on_paragraphs(file, main_title, options, is_linux_tutorial, current_paragraph_number=-1, OS=GENERIC): - """ - Function that splits the text into smaller sections based on the paragraph structure and makes them into two dictionaries containing text and metadata - - :param file: the filepath of the file to be split - :param main_title: the main title of the file - :param options: dictionary containing the options given by the user - :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial - :param current_paragraph_number: number of the paragraph that is being split, only applicable when splitting an os-specific paragraph - :param OS: the OS of the file to be split, only applicable when splitting an os-specific paragraph - :return paragraphs_text: dictionary containing the split sections of text - :return paragraphs_metadata: dictionary containing the metadata of each split section of text - :return subtitle_order: list containing all encountered subtitles in order of appearance - """ - - if options[VERBOSE]: - print("Splitting on paragraphs\n") - - # start of assuming we are not in a code_block - in_code_block = False - - # define initial dictionaries - paragraphs_os_free_text = {} - paragraphs_os_text = {} - paragraphs_metadata = {} - - # variable to keep track of the current paragraph - current_paragraph = "" - - # list to keep track of links in the text - link_list = [] - - # list to keep track of the order of the subtitles - subtitle_order = [] - - # variable to keep track of how many if-statements deep the current line is - in_if_statement = 0 - - # variable to indicate that previous section was one with if-statements - previous_contained_if = False - - # variable to indicate that the previous line was part of a list - in_list = False - - # paragraph number to add to title - paragraph_number = 1 - - # metadata title - metadata_title = main_title - - # define metadata data if split occurs on paragraphs and last_title and title_level are known (will be replaced later on in the process) - if current_paragraph_number != -1: - last_title_level = 4 - last_dir = "PLACEHOLDER" - - # list to keep track of most recent directories on each title level - curr_dirs = [main_title for _ in range(options[MAX_TITLE_DEPTH] + 1)] - - with open(file, 'r') as readfile: - - # Create two independent iterators from the original file iterator (needed to check for lists) - current_line, next_line = tee(readfile) - - # Advance the next_line iterator by one step, so it is always one step ahead - next(next_line, None) - - # Process the lines - for line, nxt in zip_longest(current_line, next_line, fillvalue=""): - - # detect if-statements starting or ending on the current line - in_if_statement += len(re.findall(IF_MANGLED_PATTERNS[IF], line)) - len( - re.findall(IF_MANGLED_PATTERNS[ENDIF], line)) - - # detect whether the current line is in a list - if re.search(r'^(\s*)([*+-]|\d+\.|[a-zA-Z]\.)\s+.*$', line): # beginning of a list entry - in_list = True - if options[VERBOSE]: - print("First line of new list entry found, not starting new paragraphs: " + line[:-1]) - elif re.search(r'^\s{2,}.+$', line) and in_list: # middle of a list entry - pass - elif re.search(r'^(\s*)([*+-]|\d+\.|[a-zA-Z]\.)\s+.*$|^\s{2,}.+$|^\n', nxt) and in_list: # line(s) between list entries - pass - elif re.search(r'^(\s*)([*+-]|\d+\.|[a-zA-Z]\.)\s+.*$', nxt): - in_list = True - elif in_list: - if options[VERBOSE]: - print("List ended, starting new paragraphs again") - in_list = False - else: - in_list = False - - # detect codeblocks to make sure titles aren't detected in them - if '```' in line or (('
' in line) ^ ('
' in line)): - in_code_block = not in_code_block - if options[VERBOSE]: - if in_code_block: - print("Detected start of a codeblock, not starting new paragraphs") - else: - print("Detected end of codeblock, starting new paragraphs again") - - # only split up if current line is in a fully non-os-specific section - if in_if_statement == 0: - - title_level = check_for_title(line, in_code_block, curr_dirs, options) - - # check whether a new paragraph should be started - if line == "\n" and paragraph_long_enough(re.sub(r'\{' + IF_MANGLED_PART + '%.*?%' + IF_MANGLED_PART + '}', "", current_paragraph), options) and not in_code_block and not in_list: - - # create a title for the previous paragraph - if current_paragraph_number == -1: - paragraph_title = main_title + _PARAGRAPH_ + str(paragraph_number) - else: - paragraph_title = main_title + "_" + OS + _PARAGRAPH_ + str(current_paragraph_number) + '.' + str(paragraph_number) - paragraph_number += 1 - - # write text of previous file - if previous_contained_if: - paragraphs_os_text[paragraph_title] = current_paragraph - if options[VERBOSE]: - print("Saved os-specific chunk with temporary title: " + paragraph_title + "\n") - else: - paragraphs_os_free_text[paragraph_title] = current_paragraph - if options[VERBOSE]: - print("Saved generic chunk with title: " + paragraph_title + "\n") - - # write metadata of previous file - paragraphs_metadata[paragraph_title] = write_metadata(main_title, metadata_title, link_list, last_title_level, last_dir, source_file=options[SOURCE_DIRECTORY] + '/' + main_title + '.md') - subtitle_order.append(paragraph_title) - - # reset the current paragraph - current_paragraph = "" - - # reset link_list - link_list = [] - - previous_contained_if = False - - # line is a title with a maximum depth of 4 - elif title_level > 0: - - # make a new title - metadata_title = make_valid_title(line[title_level + 1:-1]) - - line, link_list = replace_markdown_markers(line[title_level + 1:], link_list, in_code_block, main_title, is_linux_tutorial) - current_paragraph += line - - # line is not a title or the beginning of a new paragraph - elif line != "\n" or previous_contained_if: - line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title, is_linux_tutorial) - current_paragraph += line - - # keep track of title level and directory to write to metadata upon discovering a new subtitle - if title_level > 0: - last_title_level = title_level - last_dir = curr_dirs[last_title_level] - else: - previous_contained_if = True - line, link_list = replace_markdown_markers(line, link_list, in_code_block, main_title, is_linux_tutorial) - current_paragraph += line - - # create a title for the last paragraph - if current_paragraph_number == -1: - paragraph_title = main_title + _PARAGRAPH_ + str(paragraph_number) - else: - paragraph_title = main_title + "_" + OS + _PARAGRAPH_ + str(current_paragraph_number) + '.' + str(paragraph_number) - - # write dictionaries for the last file - if previous_contained_if: - paragraphs_os_text[paragraph_title] = current_paragraph - if options[VERBOSE]: - print("Saved os-specific chunk with temporary title: " + paragraph_title + "\n") - else: - paragraphs_os_free_text[paragraph_title] = current_paragraph - if options[VERBOSE]: - print("Saved generic chunk with title: " + paragraph_title + "\n") - paragraphs_metadata[paragraph_title] = write_metadata(main_title, metadata_title, link_list, last_title_level, curr_dirs[last_title_level], source_file=options[SOURCE_DIRECTORY] + '/' + main_title + '.md') - subtitle_order.append(paragraph_title) - - return paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order - - -def paragraph_long_enough(paragraph, options): - """ - Function that checks if the paragraph is long enough to be split of - - :param paragraph: current paragraph - :param options: dictionary containing the options given by the user - :return: - """ - encoding = tiktoken.get_encoding("cl100k_base") - token_amount = len(encoding.encode(paragraph)) - - return token_amount >= options[MIN_PARAGRAPH_LENGTH] - - -def write_metadata(main_title, subtitle, links, title_level, directory, source_file): - """ - Function that writes metadata about a text section to a dictionary - - :param main_title: The main title of the file containing the section - :param subtitle: the title of the section - :param links: a list of links contained within the section - :param title_level: the depth of the title of the section - :param directory: the directory where the section will eventually be written (can either be generic or os-specific) - :param source_file: the source file that the section originates from - :return paragraph_metadata: dictionary containing the metadata about the section - """ - - paragraph_metadata = {MAIN_TITLE: main_title, SUBTITLE: subtitle, SOURCE_FILE: source_file, TITLE_DEPTH: title_level, DIRECTORY: directory} - - if len(links) > 0: - paragraph_metadata[LINKS] = {} - for i, link in enumerate(links): - paragraph_metadata[LINKS][str(i)] = link - - paragraph_metadata[PARENT_TITLE] = Path(directory).parent.name - - return paragraph_metadata - - -def jinja_parser(filename, copy_location, options): - """ - function that let's jinja do its thing to format the files except for the os-related if-statements - - :param filename: the name of the file that needs to be formatted using jinja - :param copy_location: the location of the file that needs to be formatted using jinja - :param options: dictionary containing the options given by the user - :return: - """ - # YAML file location - yml_file_path = os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, EXTRA_DIR, 'gent.yml') - - if options[VERBOSE]: - print("Reading YAML file from location: " + yml_file_path) - - # Read the YAML file - with open(yml_file_path, 'r') as yml_file: - words_dict = yaml.safe_load(yml_file) - - # ugly fix for index.md error that occurs because of the macro "config.repo_url" in mkdocs/docs/HPC/index.md - additional_context = { - 'config': { - 'repo_url': REPO_URL - } - } - combined_context = {**words_dict, **additional_context} - - if options[VERBOSE]: - print("Mangling OS-specific if-statements") - - # Mangle the OS-related if-statements - mangle_ifs(copy_location, filename, options) - - if options[VERBOSE]: - print("Altering other if-statements to parse properly") - - # Alter the other if-statements - alter_ifs(filename, options) - - # Use Jinja2 to replace the macros - template_loader = ChoiceLoader([FileSystemLoader(searchpath=[os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES), options[SOURCE_DIRECTORY], os.path.join(options[SOURCE_DIRECTORY], RETURN_DIR)]), FunctionLoader(load_macros)]) - templateEnv = Environment(loader=template_loader) - template = templateEnv.get_template(filename) - rendered_content = template.render(combined_context) - - if options[VERBOSE]: - print("jinja parsing finished\nWriting jinja-parsed file to location: " + copy_location) - - # Save the rendered content to a new file - with open(copy_location, 'w', encoding='utf-8', errors='ignore') as output_file: - output_file.write(rendered_content) - - -def load_macros(name): - """ - function used by the jinja FunctionLoader to retrieve templates from the macros folder since the normal FileSystemLoader can't locate them properly - - :param name: name of the package - :return: - """ - - macros_location = os.path.join(RETURN_DIR, RETURN_DIR, MKDOCS_DIR, DOCS_DIR, MACROS) - - if "../" + MACROS + "/" in name: - package_name = name.split("../" + MACROS + "/")[1] - file_location = os.path.join(macros_location, package_name) - - with open(file_location, 'r') as readfile: - return readfile.read() - - -def mangle_os_ifs(line, is_os, options): - """ - function that mangles the os-related if-statements. This is needed because we want to keep these if-statements intact after jinja-parsing to build the directory structure. - We don't want to mangle all if-related statements (such as else and endif) so we need to keep track of the context of the last few if-statements. - - :param line: the current line to check for os-related if-statements - :param is_os: variable keep track of the current os-state of the if-statements. Can be NON_OS_IF, NON_OS_IF_IN_OS_IF, OS_IF or OS_IF_IN_OS_IF - NON_OS_IF: not in an os-if - NON_OS_IF_IN_OS_IF: in a non-os-if nested in an os-if - OS_IF: in an os-if - OS_IF_IN_OS_IF: in an os-if nested in an os-if - :param options: dictionary containing the options given by the user - :return line: the modified line with mangled os-related if-statements - """ - - match = re.search(r'\{%(.*?)%}(.*)', line) - - start_index = 0 - added_length = 0 - - while match: - - constr_match = re.search(r'\{%.*?%}', match.string) - if_match = re.search(r'if ', match.group(1)) - if_os_match = re.search(r'if OS', match.group(1)) - endif_match = re.search(r'endif', match.group(1)) - else_match = re.search(r'else', match.group(1)) - - # mangle positions - pos_first_mangle = constr_match.start() + start_index + added_length + 1 - pos_second_mangle = constr_match.end() + start_index + added_length - 1 - - # different parts of the original string - part_before_mangling = line[:pos_first_mangle] - part_between_mangling = line[pos_first_mangle:pos_second_mangle] - part_after_mangling = line[pos_second_mangle:] - - # this logic isn't flawless, there are number of nested if-constructions that are technically possible that would break this logic, but these don't appear in the documentation as it doesn't make sense to have these - if endif_match: - if is_os in (OS_IF, OS_IF_IN_OS_IF): - if options[VERBOSE]: - print("OS-specific endif statement found in line: " + line[:-1]) - line = part_before_mangling + IF_MANGLED_PART + part_between_mangling + IF_MANGLED_PART + part_after_mangling - added_length += 2 * len(IF_MANGLED_PART) - if is_os == OS_IF: - is_os = NON_OS_IF - elif is_os == OS_IF_IN_OS_IF: - is_os = OS_IF - elif is_os == NON_OS_IF_IN_OS_IF: - is_os = OS_IF - - elif if_match: - if if_os_match: - if options[VERBOSE]: - print("OS-specific if statement found in line: " + line[:-1]) - line = part_before_mangling + IF_MANGLED_PART + part_between_mangling + IF_MANGLED_PART + part_after_mangling - added_length += 2 * len(IF_MANGLED_PART) - if is_os == OS_IF: - is_os = OS_IF_IN_OS_IF - else: - is_os = OS_IF - else: - if is_os == OS_IF: - is_os = NON_OS_IF_IN_OS_IF - else: - is_os = NON_OS_IF - - elif else_match: - if is_os in (OS_IF, OS_IF_IN_OS_IF): - if options[VERBOSE]: - print("OS-specific else statement found in line: " + line[:-1]) - line = part_before_mangling + IF_MANGLED_PART + part_between_mangling + IF_MANGLED_PART + part_after_mangling - added_length += 2 * len(IF_MANGLED_PART) - - start_index += constr_match.end() - match = re.search(r'\{%(.*?)%}(.*)', match.group(2)) - return line, is_os - - -def mangle_ifs(directory, filename, options): - """ - function that writes the if-mangled version of a file to a location where the jinja parser will use it - - :param directory: the directory of the file to be if mangled - :param filename: the filename of the file to be mangled - :param options: dictionary containing the options given by the user - :return: - """ - # variable to keep track of latest if-statement scope - is_os = NON_OS_IF - - with open(os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES, filename), 'w') as write_file: - with open(directory, 'r') as read_file: - for line in read_file: - new_line, is_os = mangle_os_ifs(line, is_os, options) - write_file.write(new_line) - - -def alter_ifs(filename, options): - """ - Function that further adapts the if-statements in a file and writes it to a location where the jinja parser will use it. - This is because the jinja parser doesn't seem to be able to handle statements like {% site == gent %} with context {'site': 'Gent'} in this case. - These statements get changed to {% site == 'Gent' %} in this function. - - :param filename: the filename of the file to be transformed - :param options: dictionary containing the options given by the user - :return: - """ - - with open(os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES, filename), 'r') as read_file: - content = read_file.read() - - pattern = r'(\{%-?\s?[a-zA-Z\s]*?[!=]=\s?\(?)([a-zA-Z\s]+(?:\sor\s[a-zA-Z\s]+)*)(\)?\s?%})' - content = re.sub(pattern, - lambda match: (f"{match.group(1)}" + - " or ".join([f"'{city.strip().capitalize()}'" for city in match.group(2).split(" or ")]) + - f"{match.group(3)}" - ), - content) - - with open(os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES, filename), 'w') as write_file: - write_file.write(content) - - -def make_valid_title(title): - """ - function that makes sure all titles can be used as valid filenames - - :param title: the string that will be used as title and filename - :return valid_filename: the adapted title that can be used as filename - """ - # Define a regex pattern for invalid characters on both Windows and Linux - invalid_chars = r'[<>:"/\\|?*\0]' - - # get rid of extra information between {} brackets - title = re.sub(r'\{.*?}', '', title) - - # Remove invalid characters - valid_filename = re.sub(invalid_chars, '', title) - - # Strip leading/trailing whitespace - valid_filename = valid_filename.strip().strip('-').replace(' ', '-').replace("--", "-") - - return valid_filename - - -def write_generic_file(title, paragraphs_text, paragraphs_metadata, title_order, title_order_number, options, is_linux_tutorial): - """ - Function that writes text and metadata of a generic (non-os-specific) file - - :param title: title of section - :param paragraphs_text: dictionary containing all paragraphs of text - :param paragraphs_metadata: dictionary containing the metadata for all paragraphs of text - :param title_order: list containing all subtitles in order - :param title_order_number: order number of the title of the section that is being written - :param options: dictionary containing the options given by the user - :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial - :return: - """ - - if len(paragraphs_text[title]) > 0: - # make the directory needed for the files that will be written - filepath = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, GENERIC_DIR, paragraphs_metadata[title][DIRECTORY]) - os.makedirs(filepath, exist_ok=True) - - if options[VERBOSE]: - print("Writing generic section " + title + " to filepath: " + str(filepath)) - - write_files(title, paragraphs_text[title], paragraphs_metadata, title_order, title_order_number, filepath, GENERIC, options, is_linux_tutorial) - else: - # don't write empty files - pass - - -def write_files(title, text, paragraphs_metadata, title_order, title_order_number, filepath, OS, options, is_linux_tutorial): - """ - Function to write files to a certain filepath - - :param title: title of the section to be written - :param text: section of text to be written - :param paragraphs_metadata: dictionary containing the metadata for all paragraphs of text - :param title_order: list containing all subtitles in order - :param title_order_number: order number of the title of the section that is being written - :param filepath: filepath to write files to - :param OS: OS to be included in the metadata - :param options: dictionary containing the options given by the user - :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial - :return: - """ - - metadata = copy.deepcopy(paragraphs_metadata[title]) - - file_title = title - - # write text file - with open(os.path.join(filepath, file_title + ".txt"), 'w') as writefile: - if LINKS in paragraphs_metadata[title].keys(): - adapted_text, metadata[LINKS] = insert_links(text, metadata[LINKS], options) - writefile.write(adapted_text) - else: - writefile.write(text) - - # write metadata - # check if links in metadata is not empty - if LINKS in metadata.keys() and len(metadata[LINKS].keys()) == 0: - del metadata[LINKS] - - # add previous subtitle - if title_order_number != 0: - metadata[PREVIOUS_SUBTITLE] = title_order[title_order_number - 1] - else: - metadata[PREVIOUS_SUBTITLE] = None - - # add next subtitle - if title_order_number != len(title_order) - 1: - metadata[NEXT_SUBTITLE] = title_order[title_order_number + 1] - else: - metadata[NEXT_SUBTITLE] = None - - # add OS - metadata[METADATA_OS] = OS - - # add reference link - if is_linux_tutorial: - linux_part = LINUX_TUTORIAL + "/" - else: - linux_part = "" - if OS == GENERIC: - os_part = "" - else: - os_part = LINK_OS[OS] + "/" - if "index" not in paragraphs_metadata[title][MAIN_TITLE]: - metadata[REFERENCE_LINK] = DOCS_URL + "/" + os_part + linux_part + paragraphs_metadata[title][MAIN_TITLE] + "/#" + ''.join(char.lower() for char in paragraphs_metadata[title][SUBTITLE] if char.isalnum() or char == '-').strip('-') - else: - metadata[REFERENCE_LINK] = DOCS_URL - - # write metadata to file - with open(os.path.join(filepath, file_title + METADATA_EXTENSION + ".json"), 'w') as writefile: - json.dump(metadata, writefile, indent=4) - - -def insert_links(text, links, options): - """ - Function that inserts links in the plaintext or takes out the references to the links depending on the value of INCLUDE_LINKS_IN_PLAINTEXT - - :param text: The plaintext that needs to be adapted - :param links: The links that might need to be inserted - :param options: dictionary containing the options given by the user - :return text: The adapted plaintext - :return links: The links that were actually present in the text - """ - - present_links = [] - new_links = {} - for link_number in re.finditer(LINK_MARKER + r'([0-9]*?)' + LINK_MARKER, text): - present_links.append(link_number.group(1)) - if options[INCLUDE_LINKS_IN_PLAINTEXT]: - text = re.sub(LINK_MARKER + link_number.group(1) + LINK_MARKER, " " + links[link_number.group(1)] + " ", text) - else: - text = re.sub(LINK_MARKER + link_number.group(1) + LINK_MARKER, "", text) - - for link_number in links.keys(): - if link_number in present_links: - new_links[str(len(new_links.keys()))] = links[link_number] - - return text, new_links - - -def split_and_write_os_specific_section(text, metadata, subtitle_order, title_order_number, all_metadata, options, is_linux_tutorial): - """ - Function that splits os-specific sections into subtitles, parses them using jinja and writes them away - - :param text: full os specific section - :param metadata: metadata generated for the full os specific section - :param subtitle_order: order of the subtitles generated by the splitter - :param title_order_number: order number of the section - :param all_metadata: all metadata generated by the splitter - :param options: dictionary containing the options given by the user - :param is_linux_tutorial: boolean indicating whether the current file is part of the linux tutorial - :return: - """ - - # Unmangle if's to use jinja parser - text = re.sub(IF_MANGLED_PART, "", text) - - for OS in [LINUX, WINDOWS, MACOS]: - - # slightly alter if-statements to be able to use predefined macros - text = re.sub(OS, '"' + OS + '"', text) - - # Use jinja to render a different version of the text for each OS - template = Template(text) - jinja_text = template.render(OS=OS) - - if len(jinja_text) != 0: - - # add first subtitle in front of section again - if options[SPLIT_ON_TITLES] or metadata[SUBTITLE] not in make_valid_title(jinja_text[:len(metadata[SUBTITLE]) + 1]): - jinja_text = "#" * metadata[TITLE_DEPTH] + " " + metadata[SUBTITLE].replace("-", " ") + "\n" + jinja_text - else: - jinja_text = "#" * metadata[TITLE_DEPTH] + " " + jinja_text - - # re-adjust text to correct overcorrections - jinja_text = re.sub('"' + OS + '"', OS, jinja_text) - - with open(TEMP_JINJA_FILE, 'w') as writefile: - writefile.write(jinja_text) - - # split in right way - _, os_specific_text, os_specific_metadata, os_subtitle_order = split_text(TEMP_JINJA_FILE, metadata[MAIN_TITLE], options, is_linux_tutorial, current_paragraph_number=subtitle_order[title_order_number].split('_')[-1], OS=OS) - - # prepare variables to fix metadata - total_subtitle_order = subtitle_order[:title_order_number] + os_subtitle_order + subtitle_order[title_order_number+1:] - all_metadata.update(os_specific_metadata) - - # write to files - for os_i, os_subtitle in enumerate(os_subtitle_order): - # check that file actually has some content - if len(os_specific_text[os_subtitle]) > 0: - # add the links to the metadata - if LINKS in metadata.keys(): - os_specific_metadata[os_subtitle][LINKS] = metadata[LINKS] - - # fix parent in the metadata - parent_i = 0 - parent_depth = os_specific_metadata[os_subtitle][TITLE_DEPTH] - 1 - parent = os_specific_metadata[os_subtitle][MAIN_TITLE] - - while total_subtitle_order[parent_i] != os_subtitle and parent_i != len(total_subtitle_order): - if all_metadata[total_subtitle_order[parent_i]][TITLE_DEPTH] == parent_depth: - parent = total_subtitle_order[parent_i] - parent_i += 1 - - if options[SPLIT_ON_PARAGRAPHS] and parent != os_specific_metadata[os_subtitle][MAIN_TITLE]: - os_specific_metadata[os_subtitle][PARENT_TITLE] = all_metadata[parent][SUBTITLE] - else: - os_specific_metadata[os_subtitle][PARENT_TITLE] = parent - - # fix directory in the metadata if needed - if options[DEEP_DIRECTORIES]: - if parent == os_specific_metadata[os_subtitle][MAIN_TITLE]: - os_specific_metadata[os_subtitle][DIRECTORY] = os.path.join(parent, os_specific_metadata[os_subtitle][SUBTITLE]) - else: - os_specific_metadata[os_subtitle][DIRECTORY] = os.path.join(all_metadata[parent][DIRECTORY], os_specific_metadata[os_subtitle][SUBTITLE]) - - # make a directory to save the files - filepath = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, OS_SPECIFIC_DIR, OS, os_specific_metadata[os_subtitle][DIRECTORY]) - os.makedirs(filepath, exist_ok=True) - - if options[VERBOSE]: - print("Writing os-specific section " + os_subtitle + " to filepath: " + str(filepath)) - - # write to files - write_files(os_subtitle, os_specific_text[os_subtitle], os_specific_metadata, total_subtitle_order, os_i + title_order_number, filepath, OS, options, is_linux_tutorial) - else: - # don't write empty files - pass - else: - # don't split empty texts - pass - - -def main(options): - """ - main function - - :param options: dictionary containing the options specified by the user to run the script: - {SOURCE_DIRECTORY: The source directory where the original files are located, - DESTINATION_DIRECTORY: The destination directory where the processed files should be written to, - SPLIT_ON_TITLES: boolean indicating whether to split on titles, - SPLIT_ON_PARAGRAPHS: boolean indicating whether to split on paragraphs (should always be the opposite of SPLIT_ON_TITLES), - MIN_PARAGRAPH_LENGTH: integer representing the minimum length of a paragraph, - MAX_TITLE_DEPTH: integer representing the maximum depth of a title for it to be used when splitting the text, - INCLUDE_LINKS_IN_PLAINTEXT: boolean indicating whether links should be included in the plaintext, - DEEP_DIRECTORIES: boolean indicating whether the generated directories should be nested by title-structure or not, - VERBOSE: enable or disable verbose mode} - :return: - """ - - if options[VERBOSE]: - print("Running chatbot parser with options: " + str(options)) - - if options[DEEP_DIRECTORIES] and options[VERBOSE]: - print("WARNING: This script generates a file structure that contains rather long filepaths. Depending on where the script is ran, some of these paths might exceed the maximum length allowed by the system resulting in problems opening the files.") - - # remove the directories from a previous run of the parser if they weren't cleaned up properly for some reason - shutil.rmtree(os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS), ignore_errors=True) - shutil.rmtree(os.path.join(options[DESTINATION_DIRECTORY], COPIES), ignore_errors=True) - shutil.rmtree(os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES), ignore_errors=True) - - # make the necessary directories - for directory in [COPIES, PARSED_MDS, IF_MANGLED_FILES]: - directory = os.path.join(options[DESTINATION_DIRECTORY], directory) - if not os.path.exists(directory): - os.makedirs(directory) - - ################### define loop-invariant variables ################### - - # constant that keeps track of the source directory - source_directory = options[SOURCE_DIRECTORY] - - # list of all the filenames - filenames = {} - all_items = os.listdir(source_directory) - files = [f for f in all_items if os.path.isfile(os.path.join(source_directory, f)) and ".md" in f[-3:]] - for file in files: - filenames[file] = os.path.join(source_directory, file) - - # for loops over all files - for filename in filenames.keys(): - ################### define/reset loop specific variables ################### - - # boolean indicating whether the current file is part of the linux tutorial - is_linux_tutorial = bool(LINUX_TUTORIAL in filenames[filename]) - - # make a copy of the original file in order to make sure the original does not get altered - copy_file = os.path.join(options[DESTINATION_DIRECTORY], COPIES, filename) - shutil.copyfile(filenames[filename], copy_file) - - # variable that keeps track of the directories that are used to write in at different levels - root_dir_generic = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, GENERIC_DIR) - root_dir_os_specific = os.path.join(options[DESTINATION_DIRECTORY], PARSED_MDS, OS_SPECIFIC_DIR) - root_dir_os_specific_linux = os.path.join(root_dir_os_specific, LINUX) - root_dir_os_specific_windows = os.path.join(root_dir_os_specific, WINDOWS) - root_dir_os_specific_macos = os.path.join(root_dir_os_specific, MACOS) - - # variable for the main title (needed for reference links) - main_title = filename[:-3] - - # variable that keeps track of the directories that are used to write in at different levels - curr_dirs = [filename[:-3] for _ in range(options[MAX_TITLE_DEPTH] + 1)] - - ################### actually parse the md file ################### - - if options[VERBOSE]: - print(LINE + "Processing " + filename) - print("Location: " + filenames[filename]) - print("\nMaking directories:") - - # create directories for the source markdown file - for directory in [root_dir_generic, root_dir_os_specific, root_dir_os_specific_linux, root_dir_os_specific_windows, root_dir_os_specific_macos, os.path.join(root_dir_generic, curr_dirs[0]), os.path.join(root_dir_os_specific_linux, curr_dirs[0]), os.path.join(root_dir_os_specific_windows, curr_dirs[0]), os.path.join(root_dir_os_specific_macos, curr_dirs[0])]: - if options[VERBOSE]: - print(directory) - os.makedirs(directory, exist_ok=True) - - if options[VERBOSE]: - print("\nParsing the sourcefile with jinja") - - # process the jinja macros - jinja_parser(filename, copy_file, options) - - if options[VERBOSE]: - print("\nSplitting the file for the first time (split in sufficiently small generic sections and large os-specific chunks)") - - # split the text in paragraphs - paragraphs_os_text, paragraphs_os_free_text, paragraphs_metadata, subtitle_order = split_text(copy_file, main_title, options, is_linux_tutorial) - - if options[VERBOSE]: - print("\nFurther splitting os-specific chunks and writing generic and os-specific sections to files with metadata") - - # for every section, either make the whole section generic, or create an os-specific file for each OS - for i, subtitle in enumerate(subtitle_order): - - # generic - if subtitle in paragraphs_os_free_text.keys(): - write_generic_file(subtitle, paragraphs_os_free_text, paragraphs_metadata, subtitle_order, i, options, is_linux_tutorial) - - # os-specific - else: - split_and_write_os_specific_section(paragraphs_os_text[subtitle], paragraphs_metadata[subtitle], subtitle_order, i, paragraphs_metadata, options, is_linux_tutorial) - - if options[VERBOSE]: - print("\nFinished processing " + filename) - - if options[VERBOSE]: - print(LINE + "Cleaning up directories:") - print(os.path.join(options[DESTINATION_DIRECTORY], COPIES)) - print(os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES)) - print(os.path.join(options[DESTINATION_DIRECTORY], LINUX_TUTORIAL)) - # clean up temporary directories and files - shutil.rmtree(os.path.join(options[DESTINATION_DIRECTORY], COPIES), ignore_errors=True) - shutil.rmtree(os.path.join(options[DESTINATION_DIRECTORY], IF_MANGLED_FILES), ignore_errors=True) - shutil.rmtree(os.path.join(options[DESTINATION_DIRECTORY], LINUX_TUTORIAL), ignore_errors=True) - if os.path.exists(TEMP_JINJA_FILE): - os.remove(TEMP_JINJA_FILE) - - if options[VERBOSE]: - print("Parsing finished successfully") - - -################### run the script ################### -if __name__ == '__main__': - parser = argparse.ArgumentParser(description="Preprocessing script for the chatbot\n") - - # adding command-line options - parser.add_argument("-src", "--source", required=True, type=str, help="The source directory where the original files are located") - parser.add_argument("-dst", "--destination", required=True, type=str, help="The destination directory where the processed files should be written to") - parser.add_argument("-st", "--split_on_titles", action="store_true", help="Splits the text based on titles and subtitles instead of paragraphs with a minimum length.") - parser.add_argument("-pl", "--min_paragraph_length", type=int, default=512, help="Minimum length in characters of a paragraph, only works if split on titles is disabled (default: 683)") - parser.add_argument("-td", "--max_title_depth", type=int, default=4, help="Maximum depth of titles that divide the source text into sections, only works if split on titles is enabled (default: 4)") - parser.add_argument("-l", "--links", action="store_true", help="Add links to the output texts") - parser.add_argument("-dd", "--deep_directories", action="store_true", help="Generate a nested directory structure following the structure of the subtitles. Only works if split on titles is enabled") - parser.add_argument("-v", "--verbose", action="store_true", help="Run the script with verbose output") - - args = parser.parse_args() - - options_dict = {SOURCE_DIRECTORY: args.source, - DESTINATION_DIRECTORY: args.destination, - SPLIT_ON_TITLES: args.split_on_titles, - SPLIT_ON_PARAGRAPHS: not args.split_on_titles, - MIN_PARAGRAPH_LENGTH: args.min_paragraph_length, - MAX_TITLE_DEPTH: args.max_title_depth, - INCLUDE_LINKS_IN_PLAINTEXT: args.links, - DEEP_DIRECTORIES: args.deep_directories and args.split_on_titles, - VERBOSE: args.verbose} - - main(options_dict) diff --git a/scripts/HPC_chatbot_preprocessor/requirements.txt b/scripts/HPC_chatbot_preprocessor/requirements.txt deleted file mode 100644 index 37137582aad6..000000000000 --- a/scripts/HPC_chatbot_preprocessor/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -PyYAML==6.0.2 -Jinja2==3.1.4 -tiktoken~=0.7.0 -pathlib~=1.0.1 \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1.txt deleted file mode 100644 index 94270ff37e3d..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1.txt +++ /dev/null @@ -1,6 +0,0 @@ -Main title -This is the first paragraph of text. It is non-os-specific, however it does contain a link. -It also contains some other Markdown syntax and an -example code block. -This intro needs to be sufficiently long as will be explained in the following section (we want to hit the minimum -character limit for a section). diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1_metadata.json deleted file mode 100644 index 08c0b4e49731..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_1_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "tps1", - "subtitle": "Main-title", - "source_file": "tests/test_files/ftps/tps1.md", - "title_depth": 1, - "directory": "tps1", - "links": { - "0": "https://docs.hpc.ugent.be/generic" - }, - "parent_title": "", - "previous_title": null, - "next_title": "tps1_paragraph_2", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/tps1/#main-title" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3.txt deleted file mode 100644 index 58eedc06aa02..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3.txt +++ /dev/null @@ -1,3 +0,0 @@ -Conclusion -Coming up with what to write in test texts is very hard. I think I got the most important test cases in there, but I -might add to this if needed. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3_metadata.json deleted file mode 100644 index 2f1ea4dcd1fe..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/generic/tps1/tps1_paragraph_3_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "tps1", - "subtitle": "Conclusion", - "source_file": "tests/test_files/ftps/tps1.md", - "title_depth": 2, - "directory": "tps1", - "parent_title": "", - "previous_title": "tps1_paragraph_2", - "next_title": null, - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/tps1/#conclusion" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1.txt deleted file mode 100644 index d0ee9ce82564..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1.txt +++ /dev/null @@ -1,4 +0,0 @@ -OS specific sections -This is the second section, it is the start of some -text specific to OSes that aren't windows. I feel like there is no need to make this section very long, however I will -still add a link. diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1_metadata.json deleted file mode 100644 index 208cb3472f40..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.1_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "tps1", - "subtitle": "OS-specific-sections", - "source_file": "tests/test_files/ftps/tps1.md", - "title_depth": 2, - "directory": "tps1", - "parent_title": "Main-title", - "links": { - "0": "https://docs.hpc.ugent.be/linuxmacos" - }, - "previous_title": "tps1_paragraph_1", - "next_title": "tps1_linux_paragraph_2.2", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/tps1/#os-specific-sections" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2.txt deleted file mode 100644 index 1a3867e69fa9..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2.txt +++ /dev/null @@ -1,3 +0,0 @@ -Non Windows section -Whereas the Windows version of this section had a lot of unnecessary newlines, this one will just be a short and concise -section that ends right here. diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2_metadata.json deleted file mode 100644 index b975dfe4e039..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/linux/tps1/tps1_linux_paragraph_2.2_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "tps1", - "subtitle": "Non-Windows-section", - "source_file": "tests/test_files/ftps/tps1.md", - "title_depth": 3, - "directory": "tps1", - "parent_title": "OS-specific-sections", - "previous_title": "tps1_linux_paragraph_2.1", - "next_title": "tps1_paragraph_3", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/tps1/#non-windows-section" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1.txt deleted file mode 100644 index e0642d6ac96b..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1.txt +++ /dev/null @@ -1,4 +0,0 @@ -OS specific sections -This is the second section, it is the start of some -text specific to OSes that aren't "windows". I feel like there is no need to make this section very long, however I will -still add a link. diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1_metadata.json deleted file mode 100644 index 9c605eb9004e..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.1_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "tps1", - "subtitle": "OS-specific-sections", - "source_file": "tests/test_files/ftps/tps1.md", - "title_depth": 2, - "directory": "tps1", - "parent_title": "Main-title", - "links": { - "0": "https://docs.hpc.ugent.be/linuxmacos" - }, - "previous_title": "tps1_paragraph_1", - "next_title": "tps1_macos_paragraph_2.2", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/tps1/#os-specific-sections" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2.txt deleted file mode 100644 index 1a3867e69fa9..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2.txt +++ /dev/null @@ -1,3 +0,0 @@ -Non Windows section -Whereas the Windows version of this section had a lot of unnecessary newlines, this one will just be a short and concise -section that ends right here. diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2_metadata.json deleted file mode 100644 index e3ca81d7cc5e..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/macos/tps1/tps1_macos_paragraph_2.2_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "tps1", - "subtitle": "Non-Windows-section", - "source_file": "tests/test_files/ftps/tps1.md", - "title_depth": 3, - "directory": "tps1", - "parent_title": "OS-specific-sections", - "previous_title": "tps1_macos_paragraph_2.1", - "next_title": "tps1_paragraph_3", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/tps1/#non-windows-section" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1.txt deleted file mode 100644 index 9a9cbe1f3d27..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1.txt +++ /dev/null @@ -1,7 +0,0 @@ -OS specific sections -This is the second section, it is the start of some text specific to windows. -In this section it is probably no longer needed to test the Markdown syntax again, however I will make it somewhat longer -to make sure we get a long section that is over the minimum required length for the next newline character to be -classified as the end of this section. I am doing this because for the next sections I want to test whether they will be -grouped together if they are not long enough to reach the minimum paragraph length on their own. Also, before I forget, -let's add a link in this section as well. diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1_metadata.json deleted file mode 100644 index ab58c622b8c5..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.1_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "tps1", - "subtitle": "OS-specific-sections", - "source_file": "tests/test_files/ftps/tps1.md", - "title_depth": 2, - "directory": "tps1", - "parent_title": "Main-title", - "links": { - "0": "https://docs.hpc.ugent.be/windows" - }, - "previous_title": "tps1_paragraph_1", - "next_title": "tps1_windows_paragraph_2.2", - "OS": "windows", - "reference_link": "https://docs.hpc.ugent.be/Windows/tps1/#os-specific-sections" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2.txt deleted file mode 100644 index 6b57235f68fd..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2.txt +++ /dev/null @@ -1,6 +0,0 @@ -Windows specific section -Like this. -And this. -And also this. -These section should all be grouped together under the windows specific section of the output. The addition of this long -section at the end should make sure the combination of sections comes to an end here. diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2_metadata.json deleted file mode 100644 index 435c9e9c484e..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/output/parsed_mds/os_specific/windows/tps1/tps1_windows_paragraph_2.2_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "tps1", - "subtitle": "Windows-specific-section", - "source_file": "tests/test_files/ftps/tps1.md", - "title_depth": 3, - "directory": "tps1", - "parent_title": "OS-specific-sections", - "previous_title": "tps1_windows_paragraph_2.1", - "next_title": "tps1_paragraph_3", - "OS": "windows", - "reference_link": "https://docs.hpc.ugent.be/Windows/tps1/#windows-specific-section" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/tps1.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/tps1.md deleted file mode 100644 index d9b10d0c5241..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftps/tps1.md +++ /dev/null @@ -1,43 +0,0 @@ -# Main title - -This is the first paragraph of text. It is non-os-specific, however it does contain [a link](generic.md). -It also contains some `other` *Markdown* _syntax_ and an -```shell -example code block. -``` -This intro needs to be sufficiently long as will be explained in the following section (we want to hit the minimum -character limit for a section). - -## OS specific sections - -This is the second section, it is the start of some {% if OS == windows %} text specific to windows. -In this section it is probably no longer needed to test the Markdown syntax again, however I will make it somewhat longer -to make sure we get a long section that is over the minimum required length for the next newline character to be -classified as the end of this section. I am doing this because for the next sections I want to test whether they will be -grouped together if they are not long enough to reach the minimum paragraph length on their own. Also, before I forget, -let's add [a link](windows.md) in this section as well. - -### Windows specific section - -Like this. - -And this. - -And also this. - -These section should all be grouped together under the windows specific section of the output. The addition of this long -section at the end should make sure the combination of sections comes to an end here. -{% else %} -text specific to OSes that aren't windows. I feel like there is no need to make this section very long, however I will -still add [a link](linuxmacos.md). - -### Non Windows section - -Whereas the Windows version of this section had a lot of unnecessary newlines, this one will just be a short and concise -section that ends right here. -{% endif %} - -## Conclusion - -Coming up with what to write in test texts is very hard. I think I got the most important test cases in there, but I -might add to this if needed. diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1.txt deleted file mode 100644 index f62a4f31feec..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1.txt +++ /dev/null @@ -1,2 +0,0 @@ -blablabla -blablablabla diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1_metadata.json deleted file mode 100644 index b7786c066a7f..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-1/Subtitle-1_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "tts1", - "subtitle": "Subtitle-1", - "source_file": "tests/test_files/ftts/tts1.md", - "title_depth": 2, - "directory": "tts1\\Main-title\\Subtitle-1", - "parent_title": "Main-title", - "previous_title": "Main-title", - "next_title": "Subtitle-2-g", - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/tts1/#subtitle-1" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g.txt deleted file mode 100644 index bdf68551202d..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g.txt +++ /dev/null @@ -1 +0,0 @@ -blablabla \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g_metadata.json deleted file mode 100644 index eb5403804e24..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/generic/tts1/Main-title/Subtitle-5-g/Subtitle-5-g_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "main_title": "tts1", - "subtitle": "Subtitle-5-g", - "source_file": "tests/test_files/ftts/tts1.md", - "title_depth": 2, - "directory": "tts1\\Main-title\\Subtitle-5-g", - "parent_title": "Main-title", - "previous_title": "Subtitle-2-g", - "next_title": null, - "OS": "generic", - "reference_link": "https://docs.hpc.ugent.be/tts1/#subtitle-5-g" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt deleted file mode 100644 index 48125d91679e..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt +++ /dev/null @@ -1,4 +0,0 @@ -blablabla generic -blablabla generic -blablabla Linux macOS -blablablabla Linux macOS with a link diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json deleted file mode 100644 index f7330bec86d8..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "tts1", - "subtitle": "Subtitle-2-g", - "source_file": "tests/test_files/ftts/tts1.md", - "title_depth": 2, - "directory": "tts1\\Main-title\\Subtitle-2-g", - "parent_title": "Main-title", - "links": { - "0": "https://docs.hpc.ugent.be/linuxmacos" - }, - "previous_title": "Subtitle-1", - "next_title": "Subtitle-4-l&m", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/tts1/#subtitle-2-g" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt deleted file mode 100644 index b221f26074b2..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt +++ /dev/null @@ -1,3 +0,0 @@ -blablabla Linux macOS -blablablabla Linux macOS -blablabla generic with a link \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json deleted file mode 100644 index a76f852c8749..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/linux/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "tts1", - "subtitle": "Subtitle-4-l&m", - "source_file": "tests/test_files/ftts/tts1.md", - "title_depth": 3, - "directory": "tts1\\Main-title\\Subtitle-2-g\\Subtitle-4-l&m", - "parent_title": "Subtitle-2-g", - "links": { - "0": "https://docs.hpc.ugent.be/generic" - }, - "previous_title": "Subtitle-2-g", - "next_title": "Subtitle-5-g", - "OS": "linux", - "reference_link": "https://docs.hpc.ugent.be/Linux/tts1/#subtitle-4-lm" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt deleted file mode 100644 index 48125d91679e..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt +++ /dev/null @@ -1,4 +0,0 @@ -blablabla generic -blablabla generic -blablabla Linux macOS -blablablabla Linux macOS with a link diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json deleted file mode 100644 index 8b234c92fa6d..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "tts1", - "subtitle": "Subtitle-2-g", - "source_file": "tests/test_files/ftts/tts1.md", - "title_depth": 2, - "directory": "tts1\\Main-title\\Subtitle-2-g", - "parent_title": "Main-title", - "links": { - "0": "https://docs.hpc.ugent.be/linuxmacos" - }, - "previous_title": "Subtitle-1", - "next_title": "Subtitle-4-l&m", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/tts1/#subtitle-2-g" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt deleted file mode 100644 index b221f26074b2..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m.txt +++ /dev/null @@ -1,3 +0,0 @@ -blablabla Linux macOS -blablablabla Linux macOS -blablabla generic with a link \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json deleted file mode 100644 index 732d309da81a..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/macos/tts1/Main-title/Subtitle-2-g/Subtitle-4-l&m/Subtitle-4-l&m_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "tts1", - "subtitle": "Subtitle-4-l&m", - "source_file": "tests/test_files/ftts/tts1.md", - "title_depth": 3, - "directory": "tts1\\Main-title\\Subtitle-2-g\\Subtitle-4-l&m", - "parent_title": "Subtitle-2-g", - "links": { - "0": "https://docs.hpc.ugent.be/generic" - }, - "previous_title": "Subtitle-2-g", - "next_title": "Subtitle-5-g", - "OS": "macos", - "reference_link": "https://docs.hpc.ugent.be/macOS/tts1/#subtitle-4-lm" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt deleted file mode 100644 index f9f205928327..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g.txt +++ /dev/null @@ -1,4 +0,0 @@ -blablabla generic -blablabla generic -blablabla windows -blablabla windows with a link diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json deleted file mode 100644 index 7a43426a85f3..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-2-g_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "tts1", - "subtitle": "Subtitle-2-g", - "source_file": "tests/test_files/ftts/tts1.md", - "title_depth": 2, - "directory": "tts1\\Main-title\\Subtitle-2-g", - "parent_title": "Main-title", - "links": { - "0": "https://docs.hpc.ugent.be/windows" - }, - "previous_title": "Subtitle-1", - "next_title": "Subtitle-3-w", - "OS": "windows", - "reference_link": "https://docs.hpc.ugent.be/Windows/tts1/#subtitle-2-g" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w.txt b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w.txt deleted file mode 100644 index 0b587cef85ab..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w.txt +++ /dev/null @@ -1,3 +0,0 @@ -blablabla windows -blablablabla windows -blablabla generic with a link \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w_metadata.json b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w_metadata.json deleted file mode 100644 index 4d7f494320d7..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/output/parsed_mds/os_specific/windows/tts1/Main-title/Subtitle-2-g/Subtitle-3-w/Subtitle-3-w_metadata.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "main_title": "tts1", - "subtitle": "Subtitle-3-w", - "source_file": "tests/test_files/ftts/tts1.md", - "title_depth": 3, - "directory": "tts1\\Main-title\\Subtitle-2-g\\Subtitle-3-w", - "parent_title": "Subtitle-2-g", - "links": { - "0": "https://docs.hpc.ugent.be/generic" - }, - "previous_title": "Subtitle-2-g", - "next_title": "Subtitle-5-g", - "OS": "windows", - "reference_link": "https://docs.hpc.ugent.be/Windows/tts1/#subtitle-3-w" -} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/tts1.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/tts1.md deleted file mode 100644 index 2f3ad7f9c088..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/ftts/tts1.md +++ /dev/null @@ -1,31 +0,0 @@ -# Main title - -## Subtitle 1 - -blablabla -blablablabla - -## Subtitle 2 g - -blablabla generic -blablabla generic -{% if OS == windows %}blablabla windows -blablabla windows with a [link](windows.md) - -### Subtitle 3 w - -blablabla windows -blablablabla windows -{% else %}blablabla Linux macOS -blablablabla Linux macOS with a [link](linuxmacos.md) - -### Subtitle 4 l&m - -blablabla Linux macOS -blablablabla Linux macOS -{% endif %} -blablabla generic with a [link](generic.md) - -## Subtitle 5 g - -blablabla diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_input.md deleted file mode 100644 index 6a74b3c0181b..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_input.md +++ /dev/null @@ -1,4 +0,0 @@ -test1: OS_IF -{% if OS == windows %} -test1 -{% endif %} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_output.md deleted file mode 100644 index 2f9cdc38294b..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_1_output.md +++ /dev/null @@ -1,4 +0,0 @@ -test1: OS_IF -{-if-% if OS == windows %-if-} -test1 -{-if-% endif %-if-} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_input.md deleted file mode 100644 index 360a4a59ba38..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_input.md +++ /dev/null @@ -1,7 +0,0 @@ -test2: OS_IF in NON_OS_IF -{% if site == Gent %} -test2 -{% if OS == windows %} -test2 -{% endif %} -{% endif %} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_output.md deleted file mode 100644 index 798dcf6db24a..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_2_output.md +++ /dev/null @@ -1,7 +0,0 @@ -test2: OS_IF in NON_OS_IF -{% if site == Gent %} -test2 -{-if-% if OS == windows %-if-} -test2 -{-if-% endif %-if-} -{% endif %} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_input.md deleted file mode 100644 index d93125a59716..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_input.md +++ /dev/null @@ -1,6 +0,0 @@ -test3: OS_IF with else -{% if OS == linux %} -test3 -{% else %} -test3 -{% endif %} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_output.md deleted file mode 100644 index 02141961338d..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_3_output.md +++ /dev/null @@ -1,6 +0,0 @@ -test3: OS_IF with else -{-if-% if OS == linux %-if-} -test3 -{-if-% else %-if-} -test3 -{-if-% endif %-if-} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_input.md deleted file mode 100644 index cc15fae1df11..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_input.md +++ /dev/null @@ -1,4 +0,0 @@ -test4: OS_IF with wrong syntax -{ if OS == macos } -test4 -{ endif } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_output.md deleted file mode 100644 index cc15fae1df11..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_4_output.md +++ /dev/null @@ -1,4 +0,0 @@ -test4: OS_IF with wrong syntax -{ if OS == macos } -test4 -{ endif } \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_input.md deleted file mode 100644 index bdb288474e24..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_input.md +++ /dev/null @@ -1,11 +0,0 @@ -test5: OS_IF in OS_IF -{% if OS == windows %} -test5 -{% else %} -{% if OS == linux %} -test5 -{% else %} -test5 -{% endif %} -test5 -{% endif %} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_output.md deleted file mode 100644 index 10443eb67a4f..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_5_output.md +++ /dev/null @@ -1,11 +0,0 @@ -test5: OS_IF in OS_IF -{-if-% if OS == windows %-if-} -test5 -{-if-% else %-if-} -{-if-% if OS == linux %-if-} -test5 -{-if-% else %-if-} -test5 -{-if-% endif %-if-} -test5 -{-if-% endif %-if-} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_input.md deleted file mode 100644 index 0731ee3588ce..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_input.md +++ /dev/null @@ -1,8 +0,0 @@ -test6: NON_OS_IF in OS_IF -{% if OS == macos %} -test6 -{% if site == Gent %} -test6 -{% endif %} -test6 -{% endif %} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_output.md deleted file mode 100644 index cd37117cb004..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_6_output.md +++ /dev/null @@ -1,8 +0,0 @@ -test6: NON_OS_IF in OS_IF -{-if-% if OS == macos %-if-} -test6 -{% if site == Gent %} -test6 -{% endif %} -test6 -{-if-% endif %-if-} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_input.md deleted file mode 100644 index 6a72a338527a..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_input.md +++ /dev/null @@ -1,9 +0,0 @@ -test7: weird spacing and dashes - {%if OS == windows %} - test7 -{%- else%} - test7 - {% if OS == linux%} -test7 - {%-endif %} -{%endif%} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_output.md deleted file mode 100644 index dfe342ebfb14..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_7_output.md +++ /dev/null @@ -1,9 +0,0 @@ -test7: weird spacing and dashes - {-if-%if OS == windows %-if-} - test7 -{-if-%- else%-if-} - test7 - {-if-% if OS == linux%-if-} -test7 - {-if-%-endif %-if-} -{-if-%endif%-if-} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_input.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_input.md deleted file mode 100644 index fb8c1f8b5396..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_input.md +++ /dev/null @@ -1,55 +0,0 @@ -test1: OS_IF -{% if OS == windows %} -test1 -{% endif %} - -test2: OS_IF in NON_OS_IF -{% if site == Gent %} -test2 -{% if OS == windows %} -test2 -{% endif %} -{% endif %} - -test3: OS_IF with else -{% if OS == linux %} -test3 -{% else %} -test3 -{% endif %} - -test4: OS_IF with wrong syntax -{ if OS == macos } -test4 -{ endif } - -test5: OS_IF in OS_IF -{% if OS == windows %} -test5 -{% else %} -{% if OS == linux %} -test5 -{% else %} -test5 -{% endif %} -test5 -{% endif %} - -test6: NON_OS_IF in OS_IF -{% if OS == macos %} -test6 -{% if site == Gent %} -test6 -{% endif %} -test6 -{% endif %} - -test7: weird spacing and dashes - {%if OS == windows %} - test7 -{%- else%} - test7 - {% if OS == linux%} -test7 - {%-endif %} -{%endif%} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_output.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_output.md deleted file mode 100644 index 796e94348fa2..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/if_mangler_test_files/if_mangler_output.md +++ /dev/null @@ -1,55 +0,0 @@ -test1: OS_IF -{-if-% if OS == windows %-if-} -test1 -{-if-% endif %-if-} - -test2: OS_IF in NON_OS_IF -{% if site == Gent %} -test2 -{-if-% if OS == windows %-if-} -test2 -{-if-% endif %-if-} -{% endif %} - -test3: OS_IF with else -{-if-% if OS == linux %-if-} -test3 -{-if-% else %-if-} -test3 -{-if-% endif %-if-} - -test4: OS_IF with wrong syntax -{ if OS == macos } -test4 -{ endif } - -test5: OS_IF in OS_IF -{-if-% if OS == windows %-if-} -test5 -{-if-% else %-if-} -{-if-% if OS == linux %-if-} -test5 -{-if-% else %-if-} -test5 -{-if-% endif %-if-} -test5 -{-if-% endif %-if-} - -test6: NON_OS_IF in OS_IF -{-if-% if OS == macos %-if-} -test6 -{% if site == Gent %} -test6 -{% endif %} -test6 -{-if-% endif %-if-} - -test7: weird spacing and dashes - {-if-%if OS == windows %-if-} - test7 -{-if-%- else%-if-} - test7 - {-if-% if OS == linux%-if-} -test7 - {-if-%-endif %-if-} -{-if-%endif%-if-} \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_files/list_file/list_test.md b/scripts/HPC_chatbot_preprocessor/tests/test_files/list_file/list_test.md deleted file mode 100644 index 1e18a1495d51..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_files/list_file/list_test.md +++ /dev/null @@ -1,15 +0,0 @@ -# Title - -Some explanation about the following list that is quite long. This could be problematic since this could mean that the explanation of the content of the list would be part of a different paragraph than the list. - -1. First entry that is very verbose since we want to hit the character limit for a paragraph to make sure a list can't be split in the middle. If this entry is long enough, the character limit should make it so that any of the following newlines can be the start of a new section if the splitter doesn't know it is in a list. - -2. Second entry - -3. Third entry - - ![image](img/an_image_for_the_third_entry.png) - -4. Fourth entry that is very verbose, so we hit the character limit for a section split, even though it shouldn't be necessary since the explanation of the list is already well above the character limit. - -And now the text continues like normal in a new section. \ No newline at end of file diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_full_script.py b/scripts/HPC_chatbot_preprocessor/tests/test_full_script.py deleted file mode 100644 index 91605dec651f..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_full_script.py +++ /dev/null @@ -1,68 +0,0 @@ -import pytest -import os -import shutil -from chatbot_parser import main - - -@pytest.mark.parametrize("input_directory,actual_output_directory,expected_output_directory, options", [ - ("tests/test_files/ftps", "tests/test_files/ftps/actual", - "tests/test_files/ftps/output", - {"SOURCE_DIRECTORY": "tests/test_files/ftps", - "DESTINATION_DIRECTORY": "tests/test_files/ftps/actual", - "SPLIT_ON_TITLES": False, - "SPLIT_ON_PARAGRAPHS": True, - "MIN_PARAGRAPH_LENGTH": 160, - "MAX_TITLE_DEPTH": 4, - "INCLUDE_LINKS_IN_PLAINTEXT": False, - "DEEP_DIRECTORIES": False, - "VERBOSE": False} - ), - ("tests/test_files/ftts", "tests/test_files/ftts/actual", - "tests/test_files/ftts/output", - {"SOURCE_DIRECTORY": "tests/test_files/ftts", - "DESTINATION_DIRECTORY": "tests/test_files/ftts/actual", - "SPLIT_ON_TITLES": True, - "SPLIT_ON_PARAGRAPHS": False, - "MIN_PARAGRAPH_LENGTH": 160, - "MAX_TITLE_DEPTH": 4, - "INCLUDE_LINKS_IN_PLAINTEXT": False, - "DEEP_DIRECTORIES": True, - "VERBOSE": False} - ) -]) -def test_full_script_generated_directories(input_directory, actual_output_directory, expected_output_directory, options): - # run the script - main(options) - - # Compare directories and files - for dirpath, dirnames, filenames in os.walk(expected_output_directory): - relative_path = os.path.relpath(dirpath, expected_output_directory) - actual_dir = os.path.join(actual_output_directory, relative_path) - - # Check if the directory exists - assert os.path.isdir(actual_dir), f"Directory '{actual_dir}' is missing." - - # Check for files - for filename in filenames: - ref_file = os.path.join(dirpath, filename) - gen_file = os.path.join(actual_dir, filename) - - # Check if the file exists - assert os.path.isfile(gen_file), f"File '{gen_file}' is missing." - - # Check file content - with open(ref_file, 'r') as ref_f, open(gen_file, 'r') as gen_f: - ref_content = ref_f.read().strip() - gen_content = gen_f.read().strip() - assert ref_content == gen_content, f"Content of file '{gen_file}' does not match." - - # check that not too many directories have been generated - for dirpath, dirnames, filenames in os.walk(actual_output_directory): - relative_path = os.path.relpath(dirpath, actual_output_directory) - expected_dir = os.path.join(expected_output_directory, relative_path) - - # Check if the directory exists - assert os.path.isdir(expected_dir), f"Directory '{relative_path}' was made, but shouldn't have been." - - # remove directory - shutil.rmtree(actual_output_directory, ignore_errors=True) diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py b/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py deleted file mode 100644 index 4d0dd8761034..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_if_mangler.py +++ /dev/null @@ -1,32 +0,0 @@ -import pytest -import os -import shutil -from chatbot_parser import mangle_ifs - - -@pytest.mark.parametrize("input_file,output_file", [ - ("if_mangler_1_input.md", "if_mangler_1_output.md"), - ("if_mangler_2_input.md", "if_mangler_2_output.md"), - ("if_mangler_3_input.md", "if_mangler_3_output.md"), - ("if_mangler_4_input.md", "if_mangler_4_output.md"), - ("if_mangler_5_input.md", "if_mangler_5_output.md"), - ("if_mangler_6_input.md", "if_mangler_6_output.md"), - ("if_mangler_7_input.md", "if_mangler_7_output.md") -]) -def test_if_mangler(input_file, output_file): - # make directory - os.makedirs(os.path.join("if_mangled_files"), exist_ok=True) - - # make filepaths - input_file_path = os.path.join("tests", "test_files", "if_mangler_test_files", input_file) - expected_output_file_path = os.path.join("tests", "test_files", "if_mangler_test_files", output_file) - actual_output_file_path = os.path.join("if_mangled_files", input_file) - mangle_ifs(input_file_path, input_file, {"DESTINATION_DIRECTORY": '.'}) - - # check every line - with open(expected_output_file_path, "r") as expected_read_file: - with open(actual_output_file_path, "r") as actual_read_file: - assert all([expected_line == actual_line for expected_line, actual_line in zip(expected_read_file, actual_read_file)]) - - # remove directory - shutil.rmtree("if_mangled_files", ignore_errors=True) diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_insert_links.py b/scripts/HPC_chatbot_preprocessor/tests/test_insert_links.py deleted file mode 100644 index 9109f2518ad3..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_insert_links.py +++ /dev/null @@ -1,31 +0,0 @@ -import pytest -from chatbot_parser import insert_links - -options_include = {"INCLUDE_LINKS_IN_PLAINTEXT": True} -options_leave_out = {"INCLUDE_LINKS_IN_PLAINTEXT": False} -links_input = {"0": "https://first_link.com", "1": "https://second_link.be", "2": "https://docs.hpc.ugent.be/account#welcome-e-mail", "3": "https://final-link.org"} - - -@pytest.mark.parametrize("text_input, options_input, text_output, new_links", [ - # Text without links - # don't include links - ("Text without links\nand with two lines.", options_leave_out, "Text without links\nand with two lines.", {}), - # include links - ("Text without links\nand with two lines.", options_include, "Text without links\nand with two lines.", {}), - # Text with all links - # don't include links - ("Text with all the links\nand with multiple lines.\n§link§link§0§link§link§\n§link§link§1§link§link§\n§link§link§2§link§link§\n§link§link§3§link§link§", options_leave_out, - "Text with all the links\nand with multiple lines.\n\n\n\n", links_input), - # include links - ("Text with all the links\nand with multiple lines.\n§link§link§0§link§link§\n§link§link§1§link§link§\n§link§link§2§link§link§\n§link§link§3§link§link§", options_include, - "Text with all the links\nand with multiple lines.\n https://first_link.com \n https://second_link.be \n https://docs.hpc.ugent.be/account#welcome-e-mail \n https://final-link.org ", links_input), - # Text with some links - # don't include links - ("Text with all the links\nand with multiple lines.\n§link§link§1§link§link§\n§link§link§3§link§link§", options_leave_out, - "Text with all the links\nand with multiple lines.\n\n", {"0": "https://second_link.be", "1": "https://final-link.org"}), - # include links - ("Text with all the links\nand with multiple lines.\n§link§link§0§link§link§\n§link§link§2§link§link§", options_include, - "Text with all the links\nand with multiple lines.\n https://first_link.com \n https://docs.hpc.ugent.be/account#welcome-e-mail ", {"0": "https://first_link.com", "1": "https://docs.hpc.ugent.be/account#welcome-e-mail"}) -]) -def test_insert_links(text_input, options_input, text_output, new_links): - assert insert_links(text_input, links_input, options_input) == (text_output, new_links) diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_links.py b/scripts/HPC_chatbot_preprocessor/tests/test_links.py deleted file mode 100644 index d1acca1d7409..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_links.py +++ /dev/null @@ -1,69 +0,0 @@ -import os -import pytest -from urllib import request -from chatbot_parser import main -import json - -whitelist = ["mailto:hpc@ugent.be"] -slow_list = ["https://login.hpc.ugent.be", "https://www.edx.org/course/introduction-linux-linuxfoundationx-lfs101x-0"] - -options_general = {"SOURCE_DIRECTORY": "../../mkdocs/docs/HPC", - "DESTINATION_DIRECTORY": ".", - "SPLIT_ON_TITLES": False, - "SPLIT_ON_PARAGRAPHS": True, - "MIN_PARAGRAPH_LENGTH": 683, - "MAX_TITLE_DEPTH": 4, - "INCLUDE_LINKS_IN_PLAINTEXT": False, - "DEEP_DIRECTORIES": False, - "VERBOSE": False} -options_os_specific = {"SOURCE_DIRECTORY": "../../mkdocs/docs/HPC/linux-tutorial", - "DESTINATION_DIRECTORY": "./linux-tutorial", - "SPLIT_ON_TITLES": False, - "SPLIT_ON_PARAGRAPHS": True, - "MIN_PARAGRAPH_LENGTH": 683, - "MAX_TITLE_DEPTH": 4, - "INCLUDE_LINKS_IN_PLAINTEXT": False, - "DEEP_DIRECTORIES": False, - "VERBOSE": False} - - -@pytest.mark.parametrize("options", [options_general, options_os_specific]) -def test_all_links(options): - all_links = {} - main(options) - broken_links = {} - empty_links = {} - - for (dirpath, dirnames, filenames) in os.walk(os.path.join(options['DESTINATION_DIRECTORY'], 'parsed_mds')): - for filename in filenames: - all_links[filename] = [] - if filename.endswith('metadata.json'): - data = json.load(open(os.path.join(dirpath, filename))) - if 'links' in data.keys(): - for key in data['links'].keys(): - all_links[filename].append(data['links'][key]) - all_links[filename].append(data['reference_link'].split("#")[0]) - - for filename in all_links.keys(): - all_links[filename] = list(set(all_links[filename])) - for link in all_links[filename]: - if len(link) != 0: - try: - if link not in whitelist and link not in slow_list: - with request.urlopen(link) as res: - if res.status == 200: - pass - except: - print("Broken link in " + filename + ": " + link) - if filename in broken_links.keys(): - broken_links[filename].append(link) - else: - broken_links[filename] = [link] - else: - print("Empty link in " + filename) - if filename in empty_links.keys(): - empty_links[filename].append(link) - else: - empty_links[filename] = [link] - assert len(empty_links.keys()) == 0 - assert len(broken_links.keys()) == 0 diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_lists.py b/scripts/HPC_chatbot_preprocessor/tests/test_lists.py deleted file mode 100644 index 06e56a5cb2c3..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_lists.py +++ /dev/null @@ -1,27 +0,0 @@ -import pytest -from chatbot_parser import split_on_paragraphs - - -@pytest.mark.parametrize("file, main_title, options, is_linux_tutorial, expected_text", [ - ("./test_files/list_file/list_test.md", - "list_test.md", - { - "SOURCE_DIRECTORY": "./test_files/list_file", - "DESTINATION_DIRECTORY": "./test_files/list_file", - "SPLIT_ON_TITLES": False, - "SPLIT_ON_PARAGRAPHS": True, - "MIN_PARAGRAPH_LENGTH": 100, - "MAX_TITLE_DEPTH": 4, - "INCLUDE_LINKS_IN_PLAINTEXT": False, - "DEEP_DIRECTORIES": False, - "VERBOSE": False - }, - False, - { - 'list_test.md_paragraph_1': "Title\nSome explanation about the following list that is quite long. This could be problematic since this could mean that the explanation of the content of the list would be part of a different paragraph than the list.\n1. First entry that is very verbose since we want to hit the character limit for a paragraph to make sure a list can't be split in the middle. If this entry is long enough, the character limit should make it so that any of the following newlines can be the start of a new section if the splitter doesn't know it is in a list.\n2. Second entry\n3. Third entry\n4. Fourth entry that is very verbose, so we hit the character limit for a section split, even though it shouldn't be necessary since the explanation of the list is already well above the character limit.\n", - 'list_test.md_paragraph_2': 'And now the text continues like normal in a new section.' - } - ) -]) -def test_links(file, main_title, options, is_linux_tutorial, expected_text): - assert split_on_paragraphs(file, main_title, options, is_linux_tutorial)[1] == expected_text diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py b/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py deleted file mode 100644 index 225c368477d9..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_make_valid_title.py +++ /dev/null @@ -1,14 +0,0 @@ -import pytest -from chatbot_parser import make_valid_title - - -@pytest.mark.parametrize("input_string,expected", [ - ("", ""), - ("A-good-filename-with-dashes", "A-good-filename-with-dashes"), - (" A very good filename beginning and ending in a space ", "A-very-good-filename-beginning-and-ending-in-a-space"), - ("-A-very-good-filename-beginning-and-ending-in-a-dash-", "A-very-good-filename-beginning-and-ending-in-a-dash"), - ("A filename containing bad characters <>:\"/\\|?*\0", "A-filename-containing-bad-characters"), - ("A filename ending with {some jinja garbage}", "A-filename-ending-with") -]) -def test_make_valid_title(input_string, expected): - assert make_valid_title(input_string) == expected diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_replace_markdown_markers.py b/scripts/HPC_chatbot_preprocessor/tests/test_replace_markdown_markers.py deleted file mode 100644 index f4cee6dd75cf..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_replace_markdown_markers.py +++ /dev/null @@ -1,46 +0,0 @@ -import pytest -from chatbot_parser import replace_markdown_markers - - -@pytest.mark.parametrize("input_line, input_linklist, in_code_block, main_title, expected_line, expected_linklist", [ - # baseline test - ("A normal line with nothing special", [], False, "", "A normal line with nothing special", []), - # image 1 - ("![image](a-nice-image.png)", [], False, "", "", []), - # image 2 - ("![](img/Look-at-this-photograph.png)", [], False, "", "", []), - # link 1 (outside docs) - ("A line with a [link](a-nice-link.com)", ["another-link.be"], False, "", - "A line with a link§link§link§1§link§link§", ["another-link.be", "a-nice-link.com"]), - # link 2 (another document within the docs) - ("A line with a [link to the docs](account.md#welcome-e-mail)", ["another-link.be"], False, "", - "A line with a link to the docs§link§link§1§link§link§", ["another-link.be", "https://docs.hpc.ugent.be/account/#welcome-e-mail"]), - # link 3 (the same document) - ("A line with a [link to the same doc](#welcome-e-mail)", ["another-link.be"], False, "account.md", - "A line with a link to the same doc§link§link§1§link§link§", ["another-link.be", "https://docs.hpc.ugent.be/account/#welcome-e-mail"]), - # codeblock - ("```shell", [], True, "", "", []), - # html syntax 1 (normal syntax) - ("A line with something in Bold", [], False, "", "A line with something in Bold", []), - # html syntax 2 (link) - ("A line with another link
", ["other-website.com"], False, "", - "A line with another link§link§link§1§link§link§", ["other-website.com", "website.com"]), - # html syntax 3 (style) - ("

A line with style

", [], False, "", "A line with style", []), - # Bot comment - ("", [], False, "", "Something about the following table", []), - # non-Bot comment - ("", [], False, "", "", []), - # something else with <> - ("A line with an example where you should put ", [], False, "", "A line with an example where you should put ", []), - # info/tips/warnings - ("!!! warning", [], False, "", " warning", []), - # collapsable admonitions - ("??? note", [], False, "", " note", []), - # Markdown syntax 1 (not in code block) - ("`Line` **with** ++a++ _lot_ *of* _++markdown++_ `syntax`", [], False, "", "Line with a lot of markdown syntax", []), - # Markdown syntax 2 (in code block) - ("`Line` **with** ++slightly++ _less_ *markdown* _++syntax++_", [], True, "", "`Line` **with** ++slightly++ _less_ *markdown* _++syntax++_", []) -]) -def test_replace_markdown_markers(input_line, input_linklist, in_code_block, main_title, expected_line, expected_linklist): - assert replace_markdown_markers(input_line, input_linklist, in_code_block, main_title) == (expected_line, expected_linklist) diff --git a/scripts/HPC_chatbot_preprocessor/tests/test_write_metadata.py b/scripts/HPC_chatbot_preprocessor/tests/test_write_metadata.py deleted file mode 100644 index 6c30fef7985d..000000000000 --- a/scripts/HPC_chatbot_preprocessor/tests/test_write_metadata.py +++ /dev/null @@ -1,15 +0,0 @@ -import pytest -import os -from chatbot_parser import write_metadata - - -@pytest.mark.parametrize("main_title,subtitle,links,title_level,directory,source_file,output", [ - ("", "", [], 1, "", "", {"source_file": "", "main_title": "", "subtitle": "", "title_depth": 1, "directory": "", "parent_title": ""}), - ("A_very_good_main_title", "An_extremely_good_subtitle", ["the_first.link", "the_second.link"], 2, - os.path.join("A_very_good_main_title", "An_awesome_parent_file", "An_extremely_good_subtitle"), "source", - {"source_file": "source", "main_title": "A_very_good_main_title", "subtitle": "An_extremely_good_subtitle", "title_depth": 2, - "directory": os.path.join("A_very_good_main_title", "An_awesome_parent_file", "An_extremely_good_subtitle"), - "parent_title": "An_awesome_parent_file", "links": {"0": "the_first.link", "1": "the_second.link"}}) -]) -def test_write_metadata(main_title, subtitle, links, title_level, directory, source_file, output): - assert write_metadata(main_title, subtitle, links, title_level, directory, source_file) == output