Skip to content

Commit

Permalink
emit minion task generation time and error metrics (#10026)
Browse files Browse the repository at this point in the history
* emit minion task generation time and error metrics

* address comments
  • Loading branch information
zhtaoxiang authored Dec 23, 2022
1 parent 6303fc9 commit 880a5c7
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,20 @@ rules:
table: "$1"
tableType: "$2"
taskType: "$3"
- pattern: "\"org.apache.pinot.common.metrics\"<type=\"ControllerMetrics\", name=\"pinot.controller.timeMsSinceLastSuccessfulMinionTaskGeneration.(\\w+)_(\\w+)\\.(\\w+)\"><>(\\w+)"
name: "pinot_controller_timeMsSinceLastSuccessfulMinionTaskGeneration_$4"
cache: true
labels:
table: "$1"
tableType: "$2"
taskType: "$3"
- pattern: "\"org.apache.pinot.common.metrics\"<type=\"ControllerMetrics\", name=\"pinot.controller.lastMinionTaskGenerationEncountersError.(\\w+)_(\\w+)\\.(\\w+)\"><>(\\w+)"
name: "pinot_controller_lastMinionTaskGenerationEncountersError_$4"
cache: true
labels:
table: "$1"
tableType: "$2"
taskType: "$3"
- pattern: "\"org.apache.pinot.common.metrics\"<type=\"ControllerMetrics\", name=\"pinot.controller.pinotLeadControllerResourceEnabled\"><>(\\w+)"
name: "pinot_controller_pinotLeadControllerResourceEnabled_$1"
cache: true
Expand Down
14 changes: 14 additions & 0 deletions docker/images/pinot/etc/jmx_prometheus_javaagent/configs/pinot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,20 @@ rules:
table: "$1"
tableType: "$2"
taskType: "$3"
- pattern: "\"org.apache.pinot.common.metrics\"<type=\"ControllerMetrics\", name=\"pinot.controller.timeMsSinceLastSuccessfulMinionTaskGeneration.(\\w+)_(\\w+)\\.(\\w+)\"><>(\\w+)"
name: "pinot_controller_timeMsSinceLastSuccessfulMinionTaskGeneration_$4"
cache: true
labels:
table: "$1"
tableType: "$2"
taskType: "$3"
- pattern: "\"org.apache.pinot.common.metrics\"<type=\"ControllerMetrics\", name=\"pinot.controller.lastMinionTaskGenerationEncountersError.(\\w+)_(\\w+)\\.(\\w+)\"><>(\\w+)"
name: "pinot_controller_lastMinionTaskGenerationEncountersError_$4"
cache: true
labels:
table: "$1"
tableType: "$2"
taskType: "$3"
- pattern: "\"org.apache.pinot.common.metrics\"<type=\"ControllerMetrics\", name=\"pinot.controller.pinotLeadControllerResourceEnabled\"><>(\\w+)"
name: "pinot_controller_pinotLeadControllerResourceEnabled_$1"
cache: true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ public enum ControllerGauge implements AbstractMetrics.Gauge {
DISABLED_TABLE_COUNT("TableCount", true),
PERIODIC_TASK_NUM_TABLES_PROCESSED("PeriodicTaskNumTablesProcessed", true),
TIME_MS_SINCE_LAST_MINION_TASK_METADATA_UPDATE("TimeMsSinceLastMinionTaskMetadataUpdate", false),
TIME_MS_SINCE_LAST_SUCCESSFUL_MINION_TASK_GENERATION("TimeMsSinceLastSuccessfulMinionTaskGeneration", false),
LAST_MINION_TASK_GENERATION_ENCOUNTERS_ERROR("LastMinionTaskGenerationEncountersError", false),
NUM_MINION_TASKS_IN_PROGRESS("NumMinionTasksInProgress", true),
NUM_MINION_SUBTASKS_WAITING("NumMinionSubtasksWaiting", true),
NUM_MINION_SUBTASKS_RUNNING("NumMinionSubtasksRunning", true),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -541,20 +541,35 @@ private String scheduleTask(PinotTaskGenerator taskGenerator, List<TableConfig>
generateTasks() return a list of TaskGeneratorMostRecentRunInfo for each table
*/
pinotTaskConfigs = taskGenerator.generateTasks(enabledTableConfigs);
long successRunTimestamp = System.currentTimeMillis();
for (TableConfig tableConfig : enabledTableConfigs) {
_taskManagerStatusCache.saveTaskGeneratorInfo(tableConfig.getTableName(), taskGenerator.getTaskType(),
taskGeneratorMostRecentRunInfo -> taskGeneratorMostRecentRunInfo.addSuccessRunTs(
System.currentTimeMillis()));
taskGeneratorMostRecentRunInfo -> taskGeneratorMostRecentRunInfo.addSuccessRunTs(successRunTimestamp));
// before the first task schedule, the follow two gauge metrics will be empty
// TODO: find a better way to report task generation information
_controllerMetrics.addOrUpdateGauge(
ControllerGauge.TIME_MS_SINCE_LAST_SUCCESSFUL_MINION_TASK_GENERATION.getGaugeName() + "."
+ tableConfig.getTableName() + "." + taskGenerator.getTaskType(),
() -> System.currentTimeMillis() - successRunTimestamp);
_controllerMetrics.addOrUpdateGauge(
ControllerGauge.LAST_MINION_TASK_GENERATION_ENCOUNTERS_ERROR.getGaugeName() + "."
+ tableConfig.getTableName() + "." + taskGenerator.getTaskType(), () -> 0L);
}
} catch (Exception e) {
StringWriter errors = new StringWriter();
try (PrintWriter pw = new PrintWriter(errors)) {
e.printStackTrace(pw);
}
long successRunTimestamp = System.currentTimeMillis();
for (TableConfig tableConfig : enabledTableConfigs) {
_taskManagerStatusCache.saveTaskGeneratorInfo(tableConfig.getTableName(), taskGenerator.getTaskType(),
taskGeneratorMostRecentRunInfo -> taskGeneratorMostRecentRunInfo.addErrorRunMessage(
System.currentTimeMillis(), errors.toString()));
successRunTimestamp, errors.toString()));
// before the first task schedule, the follow gauge metric will be empty
// TODO: find a better way to report task generation information
_controllerMetrics.addOrUpdateGauge(
ControllerGauge.LAST_MINION_TASK_GENERATION_ENCOUNTERS_ERROR.getGaugeName() + "."
+ tableConfig.getTableName() + "." + taskGenerator.getTaskType(), () -> 1L);
}
throw e;
}
Expand Down

0 comments on commit 880a5c7

Please sign in to comment.