feat: added more metrics to the ingestion benchmark

JoaoBraveCoding · JoaoBraveCoding · commit 78381e4d2e91 · 2024-10-15T10:49:52.000+01:00
feat: added script to transform the report/README output into a docx so
that it can be uploaded to gdoc
diff --git a/benchmarks/ingestion_path_test.go b/benchmarks/ingestion_path_test.go
@@ -66,6 +66,8 @@ var _ = Describe("Ingestion Path", func() {
 				job := benchCfg.Metrics.Jobs.Distributor
 				annotation := metrics.DistributorAnnotation
 
+				err = metricsClient.MeasureResourceUsageMetrics(e, job, samplingRange, annotation)
+				Expect(err).Should(Succeed(), fmt.Sprintf("Failed - %v", err))
 				err = metricsClient.MeasureHTTPRequestMetrics(e, metrics.WriteRequestPath, job, samplingRange, annotation)
 				Expect(err).Should(Succeed(), fmt.Sprintf("Failed - %v", err))
 
@@ -79,7 +81,7 @@ var _ = Describe("Ingestion Path", func() {
 				Expect(err).Should(Succeed(), fmt.Sprintf("Failed - %v", err))
 				err = metricsClient.MeasureGRPCRequestMetrics(e, metrics.WriteRequestPath, job, samplingRange, annotation)
 				Expect(err).Should(Succeed(), fmt.Sprintf("Failed - %v", err))
-				err = metricsClient.MeasureBoltDBShipperRequestMetrics(e, metrics.WriteRequestPath, job, samplingRange)
+				err = metricsClient.MeasureIndexRequestMetrics(e, metrics.WriteRequestPath, job, samplingRange)
 				Expect(err).Should(Succeed(), fmt.Sprintf("Failed - %v", err))
 			}, samplingCfg)
 		})
diff --git a/benchmarks/query_path_test.go b/benchmarks/query_path_test.go
@@ -122,7 +122,7 @@ var _ = Describe("Query Path", func() {
 				Expect(err).Should(Succeed(), fmt.Sprintf("Failed - %v", err))
 				err = metricsClient.MeasureGRPCRequestMetrics(e, metrics.ReadRequestPath, job, samplingRange, annotation)
 				Expect(err).Should(Succeed(), fmt.Sprintf("Failed - %v", err))
-				err = metricsClient.MeasureBoltDBShipperRequestMetrics(e, metrics.ReadRequestPath, job, samplingRange)
+				err = metricsClient.MeasureIndexRequestMetrics(e, metrics.ReadRequestPath, job, samplingRange)
 				Expect(err).Should(Succeed(), fmt.Sprintf("Failed - %v", err))
 			}, samplingCfg)
 		})
diff --git a/hack/scripts/create-gdoc.py b/hack/scripts/create-gdoc.py
@@ -0,0 +1,117 @@
+import os
+import sys
+import argparse
+from docx import Document
+from docx.shared import Inches
+from docx.oxml.ns import qn
+from docx.oxml import OxmlElement
+import markdown
+from bs4 import BeautifulSoup
+
+def add_hyperlink(paragraph, url, text, color="0000FF", underline=True):
+    part = paragraph.part
+    r_id = part.relate_to(url, 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink', is_external=True)
+
+    hyperlink = OxmlElement('w:hyperlink')
+    hyperlink.set(qn('r:id'), r_id)
+
+    new_run = OxmlElement('w:r')
+    rPr = OxmlElement('w:rPr')
+
+    if color:
+        c = OxmlElement('w:color')
+        c.set(qn('w:val'), color)
+        rPr.append(c)
+
+    if underline:
+        u = OxmlElement('w:u')
+        u.set(qn('w:val'), 'single')
+        rPr.append(u)
+
+    new_run.append(rPr)
+    new_run.text = text
+    hyperlink.append(new_run)
+
+    paragraph._p.append(hyperlink)
+    return hyperlink
+
+def add_table_of_contents(soup, doc):
+    toc = soup.find('ul')
+    if toc:
+        for li in toc.find_all('li'):
+            link = li.find('a')
+            if link and link['href'].startswith('#'):
+                heading_text = link.text
+                toc_paragraph = doc.add_paragraph()
+                add_hyperlink(toc_paragraph, f'#{heading_text}', heading_text)
+
+def add_markdown_to_docx(md_content, doc, base_path):
+    html = markdown.markdown(md_content)
+    soup = BeautifulSoup(html, 'html.parser')
+
+    heading_map = {}
+    toc_inserted = False
+
+    for element in soup:
+        if element.name == 'h1':
+            paragraph = doc.add_heading(element.text, level=1)
+            heading_map[element.text] = paragraph
+        elif element.name == 'h2':
+            paragraph = doc.add_heading(element.text, level=2)
+            heading_map[element.text] = paragraph
+            if element.text.lower() == 'table of contents' and not toc_inserted:
+                add_table_of_contents(soup, doc)
+                toc_inserted = True
+        elif element.name == 'h3':
+            paragraph = doc.add_heading(element.text, level=3)
+            heading_map[element.text] = paragraph
+        elif element.name == 'p':
+            paragraph = doc.add_paragraph(element.text)
+            for img in element.find_all('img'):
+                img_src = img['src'].lstrip('./')
+                img_path = os.path.join(base_path, img_src)
+                if os.path.exists(img_path):
+                    doc.add_picture(img_path, width=Inches(5.0))
+                else:
+                    paragraph.add_run(f"[Image not found: {img_path}]")
+        elif element.name == 'ul' and not toc_inserted:
+            for li in element.find_all('li'):
+                doc.add_paragraph(li.text, style='ListBullet')
+        elif element.name == 'ol':
+            for li in element.find_all('li'):
+                doc.add_paragraph(li.text, style='ListNumber')
+        elif element.name == 'a':
+            paragraph = doc.add_paragraph()
+            add_hyperlink(paragraph, element['href'], element.text)
+
+    for heading_text, paragraph in heading_map.items():
+        bookmark = OxmlElement('w:bookmarkStart')
+        bookmark.set(qn('w:id'), str(hash(heading_text)))
+        bookmark.set(qn('w:name'), heading_text)
+        paragraph._p.insert(0, bookmark)
+        bookmark_end = OxmlElement('w:bookmarkEnd')
+        bookmark_end.set(qn('w:id'), str(hash(heading_text)))
+        paragraph._p.append(bookmark_end)
+
+def convert_readme_to_docx(readme_dir, output_path):
+    readme_path = os.path.join(readme_dir, 'README.md')
+    if not os.path.exists(readme_path):
+        print(f"README.md not found in {readme_dir}")
+        return
+
+    with open(readme_path, 'r') as file:
+        md_content = file.read()
+
+    doc = Document()
+    add_markdown_to_docx(md_content, doc, readme_dir)
+    doc.save(output_path)
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Convert a README.md file to a DOCX file.')
+    parser.add_argument('readme_dir', type=str, help='Directory containing the README.md file')
+    args = parser.parse_args()
+
+    readme_dir = args.readme_dir
+    output_path = os.path.join(readme_dir, 'README.docx')
+    convert_readme_to_docx(readme_dir, output_path)
+    print(f"Converted README.md in {readme_dir} to {output_path}")
diff --git a/hack/scripts/generate_report.py b/hack/scripts/generate_report.py
@@ -25,14 +25,13 @@ def plot_measurement(measurements, output_dir, plot_index):
         values = measurement['Values']
         units = measurement['Units']
         annotations = measurement.get('Annotations', [])
-        component = annotations[0].capitalize() if annotations else 'Unknown Component'
         
         # Generate time values for x-axis starting from 3 minutes
         time_values = [(i + 1) * 3 for i in range(len(values))]
         
         plt.plot(time_values, values, marker='o', label=description)
     
-    plt.title(f'{component} {name}')
+    plt.title(f'{name}')
     plt.xlabel('Time (minutes)')
     plt.ylabel(f'{units}')
     plt.legend()
@@ -42,7 +41,7 @@ def plot_measurement(measurements, output_dir, plot_index):
     plt.savefig(plot_filename)
     plt.close()
     
-    return f'./plots/plot_{plot_index}.png', f'{component} {name}'
+    return f'./plots/plot_{plot_index}.png', f'{name}'
 
 # Collect all measurements from the provided directories
 all_measurements = {}
diff --git a/internal/metrics/client.go b/internal/metrics/client.go
@@ -110,7 +110,7 @@ func (c *Client) MeasureGRPCRequestMetrics(
 	}
 }
 
-func (c *Client) MeasureBoltDBShipperRequestMetrics(
+func (c *Client) MeasureIndexRequestMetrics(
 	e *gmeasure.Experiment,
 	path RequestPath,
 	job string,
@@ -249,29 +249,43 @@ func (c *Client) measureCommonRequestMetrics(
 	sampleRange model.Duration,
 	annotation gmeasure.Annotation,
 ) error {
-	var name, code, requestRateName string
+	var name, code, badCode, requestRateName, badRequestRateName string
 
 	if method == GRPCMethod {
-		name = fmt.Sprintf("successful GRPC %s", route)
+		name = fmt.Sprintf("%s successful GRPC %s", job, route)
 		code = "success"
-
 		requestRateName = name
 		if pathRoutes == GRPCReadPathRoutes {
 			requestRateName = "successful GRPC reads"
 		}
+
+		badCode = "error|cancel"
+		badRequestRateName = fmt.Sprintf("%s unsuccessful GRPC %s", job, route)
+		if pathRoutes == GRPCReadPathRoutes {
+			requestRateName = "unsuccessful GRPC reads"
+		}
 	} else {
-		name = fmt.Sprintf("2xx %s", route)
+		name = fmt.Sprintf("%s 2xx %s", job, route)
 		code = "2.*"
-
 		requestRateName = name
 		if pathRoutes == HTTPReadPathRoutes {
 			requestRateName = "2xx reads"
 		}
+
+		badCode = "5.*"
+		badRequestRateName = fmt.Sprintf("%s 5xx %s", job, route)
+		if pathRoutes == HTTPReadPathRoutes {
+			badRequestRateName = "5xx reads"
+		}
 	}
 
+	// Rate request of 200 or success
 	if err := c.Measure(e, RequestRate(requestRateName, job, pathRoutes, code, sampleRange, annotation)); err != nil {
 		return err
 	}
+	if err := c.Measure(e, RequestRate(badRequestRateName, job, pathRoutes, badCode, sampleRange, annotation)); err != nil {
+		return err
+	}
 	if err := c.Measure(e, RequestDurationAverage(name, job, method, route, code, sampleRange, annotation)); err != nil {
 		return err
 	}
diff --git a/internal/metrics/requests.go b/internal/metrics/requests.go
@@ -79,8 +79,9 @@ func RequestDurationQuantile(
 ) Measurement {
 	return Measurement{
 		Name: fmt.Sprintf("%s request duration P%d", name, percentile),
+		// clamp_min is used to avoid NaN which breaks the reporting
 		Query: fmt.Sprintf(
-			`histogram_quantile(0.%d, sum by (job, le) (rate(loki_request_duration_seconds_bucket{job=~".*%s.*", method="%s", route=~"%s", status_code=~"%s"}[%s]))) * %d`,
+			`histogram_quantile(0.%d, sum by (job, le) (clamp_min(rate(loki_request_duration_seconds_bucket{job=~".*%s.*", method="%s", route=~"%s", status_code=~"%s"}[%s]), 0.01))) * %d`,
 			percentile, job, method, route, code, duration, SecondsToMillisecondsMultiplier,
 		),
 		Unit:       MillisecondsUnit,
diff --git a/internal/metrics/resources.go b/internal/metrics/resources.go
@@ -9,7 +9,7 @@ import (
 
 func ContainerCPU(job string, duration model.Duration, annotation gmeasure.Annotation) Measurement {
 	return Measurement{
-		Name: "Sum of Container CPU Usage",
+		Name: fmt.Sprintf("%s Sum of Container CPU Usage", job),
 		Query: fmt.Sprintf(
 			`sum(avg_over_time(pod:container_cpu_usage:sum{pod=~".*%s.*"}[%s])) * %d`,
 			job, duration, CoresToMillicores,
@@ -21,7 +21,7 @@ func ContainerCPU(job string, duration model.Duration, annotation gmeasure.Annot
 
 func ContainerMemoryWorkingSetBytes(job string, duration model.Duration, annotation gmeasure.Annotation) Measurement {
 	return Measurement{
-		Name: "Sum of Container WorkingSet Memory",
+		Name: fmt.Sprintf("%s Sum of Container WorkingSet Memory", job),
 		Query: fmt.Sprintf(
 			`sum(avg_over_time(container_memory_working_set_bytes{pod=~".*%s.*", container=""}[%s]) / %d)`,
 			job, duration, BytesToGigabytesMultiplier,
@@ -33,7 +33,7 @@ func ContainerMemoryWorkingSetBytes(job string, duration model.Duration, annotat
 
 func ContainerGoMemstatsHeapInuse(job string, _ model.Duration, annotation gmeasure.Annotation) Measurement {
 	return Measurement{
-		Name: "Sum of Container Go Memstats Heap Inuse",
+		Name: fmt.Sprintf("%s Sum of Container Go Memstats Heap Inuse", job),
 		Query: fmt.Sprintf(
 			`sum(go_memstats_heap_inuse_bytes{pod=~".*%s.*"}) / %d`,
 			job, BytesToGigabytesMultiplier,
@@ -45,7 +45,7 @@ func ContainerGoMemstatsHeapInuse(job string, _ model.Duration, annotation gmeas
 
 func PersistentVolumeUsedBytes(job string, duration model.Duration, annotation gmeasure.Annotation) Measurement {
 	return Measurement{
-		Name: "Sum of Persistent Volume Used Bytes",
+		Name: fmt.Sprintf("%s Sum of Persistent Volume Used Bytes", job),
 		Query: fmt.Sprintf(
 			`sum(avg_over_time(kubelet_volume_stats_used_bytes{persistentvolumeclaim=~".*%s.*"}[%s]) / %d)`,
 			job, duration, BytesToGigabytesMultiplier,