Skip to content

Commit 77d1596

Browse files
committed
Time Unit Unification for bin/stats
Signed-off-by: Kai Huang <[email protected]> # Conflicts: # integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinCommandIT.java
1 parent 3388dc7 commit 77d1596

File tree

6 files changed

+156
-57
lines changed

6 files changed

+156
-57
lines changed

core/src/main/java/org/opensearch/sql/calcite/utils/binning/SpanParser.java

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ public class SpanParser {
5252
Map.entry("months", "months"),
5353
Map.entry("month", "months"),
5454
Map.entry("mon", "months"),
55+
Map.entry("M", "months"), // Uppercase M for months (case-sensitive)
5556
// Milliseconds
5657
Map.entry("ms", "ms"),
5758
// Microseconds
@@ -63,7 +64,16 @@ public class SpanParser {
6364

6465
// Build direct lookup map for efficient unit detection
6566
for (String unit : NORMALIZED_UNITS.keySet()) {
66-
UNIT_LOOKUP.put(unit.toLowerCase(Locale.ROOT), unit);
67+
// Preserve case for case-sensitive units: M (month), m (minute), us, cs, ds
68+
if (unit.equals("M")
69+
|| unit.equals("m")
70+
|| unit.equals("us")
71+
|| unit.equals("cs")
72+
|| unit.equals("ds")) {
73+
UNIT_LOOKUP.put(unit, unit);
74+
} else {
75+
UNIT_LOOKUP.put(unit.toLowerCase(Locale.ROOT), unit);
76+
}
6777
}
6878
}
6979

@@ -135,15 +145,27 @@ private static SpanInfo parseNumericSpan(String spanStr) {
135145

136146
/** Extracts time unit from span string (returns original matched unit, not normalized). */
137147
public static String extractTimeUnit(String spanStr) {
138-
String lowerSpanStr = spanStr.toLowerCase(Locale.ROOT);
139148
String longestMatch = null;
140149

141150
// Find the longest unit that matches as a suffix
142151
for (String unit : UNIT_LOOKUP.keySet()) {
143-
if (lowerSpanStr.endsWith(unit)) {
152+
// For case-sensitive units (M, m, us, cs, ds), match case-sensitively
153+
boolean matches;
154+
if (unit.equals("M")
155+
|| unit.equals("m")
156+
|| unit.equals("us")
157+
|| unit.equals("cs")
158+
|| unit.equals("ds")) {
159+
matches = spanStr.endsWith(unit);
160+
} else {
161+
// For other units, match case-insensitively
162+
matches = spanStr.toLowerCase(Locale.ROOT).endsWith(unit.toLowerCase(Locale.ROOT));
163+
}
164+
165+
if (matches) {
144166
// Ensure this is a word boundary (not part of a larger word)
145-
int unitStartPos = lowerSpanStr.length() - unit.length();
146-
if (unitStartPos == 0 || !Character.isLetter(lowerSpanStr.charAt(unitStartPos - 1))) {
167+
int unitStartPos = spanStr.length() - unit.length();
168+
if (unitStartPos == 0 || !Character.isLetter(spanStr.charAt(unitStartPos - 1))) {
147169
// Keep the longest match
148170
if (longestMatch == null || unit.length() > longestMatch.length()) {
149171
longestMatch = unit;

core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/TimeSpanHelper.java

Lines changed: 5 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,14 @@ public RexNode createTimeSpanExpression(
3131
private boolean shouldApplyAligntime(String spanStr) {
3232
if (spanStr == null) return false;
3333

34-
spanStr = spanStr.replace("'", "").replace("\"", "").trim().toLowerCase();
34+
spanStr = spanStr.replace("'", "").replace("\"", "").trim();
3535
String timeUnit = SpanParser.extractTimeUnit(spanStr);
3636

3737
if (timeUnit == null) return true; // Pure number, assume hours
3838

3939
// Aligntime ignored for days, months, years
40-
String normalizedUnit = normalizeTimeUnit(timeUnit);
41-
return !normalizedUnit.equals("d") && !normalizedUnit.equals("M");
40+
String normalizedUnit = SpanParser.getNormalizedUnit(timeUnit);
41+
return !normalizedUnit.equals("d") && !normalizedUnit.equals("months");
4242
}
4343

4444
private RexNode createAlignedTimeSpan(
@@ -64,7 +64,7 @@ private RexNode createAlignedTimeSpan(
6464
if (timeUnit != null) {
6565
String valueStr = spanStr.substring(0, spanStr.length() - timeUnit.length());
6666
intervalValue = Integer.parseInt(valueStr);
67-
normalizedUnit = normalizeTimeUnit(timeUnit);
67+
normalizedUnit = SpanParser.getNormalizedUnit(timeUnit);
6868
} else {
6969
intervalValue = Integer.parseInt(spanStr);
7070
normalizedUnit = "h";
@@ -86,7 +86,7 @@ private RexNode createStandardTimeSpan(
8686
if (timeUnit != null) {
8787
String valueStr = spanStr.substring(0, spanStr.length() - timeUnit.length());
8888
int value = Integer.parseInt(valueStr);
89-
String normalizedUnit = normalizeTimeUnit(timeUnit);
89+
String normalizedUnit = SpanParser.getNormalizedUnit(timeUnit);
9090
return BinTimeSpanUtils.createBinTimeSpanExpression(
9191
fieldExpr, value, normalizedUnit, 0, context);
9292
} else {
@@ -120,39 +120,4 @@ private String extractModifier(String aligntimeStr) {
120120

121121
return null;
122122
}
123-
124-
private String normalizeTimeUnit(String unit) {
125-
switch (unit.toLowerCase()) {
126-
case "s", "sec", "secs", "second", "seconds" -> {
127-
return "s";
128-
}
129-
case "m", "min", "mins", "minute", "minutes" -> {
130-
return "m";
131-
}
132-
case "h", "hr", "hrs", "hour", "hours" -> {
133-
return "h";
134-
}
135-
case "d", "day", "days" -> {
136-
return "d";
137-
}
138-
case "mon", "month", "months" -> {
139-
return "months";
140-
}
141-
case "us" -> {
142-
return "us";
143-
}
144-
case "ms" -> {
145-
return "ms";
146-
}
147-
case "cs" -> {
148-
return "cs";
149-
}
150-
case "ds" -> {
151-
return "ds";
152-
}
153-
default -> {
154-
return unit;
155-
}
156-
}
157-
}
158123
}

core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/TimeUnitRegistry.java

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,33 +14,36 @@ public class TimeUnitRegistry {
1414
private static final Map<String, TimeUnitConfig> UNIT_MAPPING = new HashMap<>();
1515

1616
static {
17-
// Microseconds
17+
// Microseconds (case-sensitive, lowercase only)
1818
UNIT_MAPPING.put("us", TimeUnitConfig.MICROSECONDS);
1919

2020
// Milliseconds
2121
UNIT_MAPPING.put("ms", TimeUnitConfig.MILLISECONDS);
2222

23-
// Centiseconds
23+
// Centiseconds (case-sensitive, lowercase only)
2424
UNIT_MAPPING.put("cs", TimeUnitConfig.CENTISECONDS);
2525

26-
// Deciseconds
26+
// Deciseconds (case-sensitive, lowercase only)
2727
UNIT_MAPPING.put("ds", TimeUnitConfig.DECISECONDS);
2828

2929
// Seconds
3030
UNIT_MAPPING.put("s", TimeUnitConfig.SECONDS);
3131
UNIT_MAPPING.put("sec", TimeUnitConfig.SECONDS);
32+
UNIT_MAPPING.put("secs", TimeUnitConfig.SECONDS);
3233
UNIT_MAPPING.put("second", TimeUnitConfig.SECONDS);
3334
UNIT_MAPPING.put("seconds", TimeUnitConfig.SECONDS);
3435

35-
// Minutes
36+
// Minutes (case-sensitive lowercase 'm')
3637
UNIT_MAPPING.put("m", TimeUnitConfig.MINUTES);
3738
UNIT_MAPPING.put("min", TimeUnitConfig.MINUTES);
39+
UNIT_MAPPING.put("mins", TimeUnitConfig.MINUTES);
3840
UNIT_MAPPING.put("minute", TimeUnitConfig.MINUTES);
3941
UNIT_MAPPING.put("minutes", TimeUnitConfig.MINUTES);
4042

4143
// Hours
4244
UNIT_MAPPING.put("h", TimeUnitConfig.HOURS);
4345
UNIT_MAPPING.put("hr", TimeUnitConfig.HOURS);
46+
UNIT_MAPPING.put("hrs", TimeUnitConfig.HOURS);
4447
UNIT_MAPPING.put("hour", TimeUnitConfig.HOURS);
4548
UNIT_MAPPING.put("hours", TimeUnitConfig.HOURS);
4649

@@ -49,7 +52,7 @@ public class TimeUnitRegistry {
4952
UNIT_MAPPING.put("day", TimeUnitConfig.DAYS);
5053
UNIT_MAPPING.put("days", TimeUnitConfig.DAYS);
5154

52-
// Months (case-sensitive M)
55+
// Months (case-sensitive uppercase 'M')
5356
UNIT_MAPPING.put("M", TimeUnitConfig.MONTHS);
5457
UNIT_MAPPING.put("mon", TimeUnitConfig.MONTHS);
5558
UNIT_MAPPING.put("month", TimeUnitConfig.MONTHS);
@@ -59,15 +62,19 @@ public class TimeUnitRegistry {
5962
/**
6063
* Gets the time unit configuration for the given unit string.
6164
*
62-
* @param unit The unit string (e.g., "h", "hours", "M")
65+
* @param unit The unit string (e.g., "h", "hours", "M", "m")
6366
* @return The time unit configuration, or null if not found
6467
*/
6568
public static TimeUnitConfig getConfig(String unit) {
66-
if (unit.equals("M")) {
67-
// M is case-sensitive for months
69+
// Handle case-sensitive units: M (month), m (minute), and subsecond units (us, cs, ds)
70+
if (unit.equals("M")
71+
|| unit.equals("m")
72+
|| unit.equals("us")
73+
|| unit.equals("cs")
74+
|| unit.equals("ds")) {
6875
return UNIT_MAPPING.get(unit);
6976
} else {
70-
// For all other units, use lowercase lookup
77+
// For all other units, use lowercase lookup for case-insensitive matching
7178
return UNIT_MAPPING.get(unit.toLowerCase());
7279
}
7380
}

docs/user/ppl/cmd/bin.rst

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,12 +75,25 @@ Specifies the width of each bin interval with support for multiple span types:
7575
- Creates logarithmic bin boundaries instead of linear
7676

7777
**3. Time Scale Span (comprehensive time units)**
78-
- **Subseconds**: ``us`` (microseconds), ``ms`` (milliseconds), ``cs`` (centiseconds), ``ds`` (deciseconds)
78+
79+
The ``bin`` command supports a comprehensive set of time units, including subsecond precision:
80+
81+
- **Subseconds** (case-sensitive, lowercase only): ``us`` (microseconds), ``ms`` (milliseconds), ``cs`` (centiseconds), ``ds`` (deciseconds)
7982
- **Seconds**: ``s``, ``sec``, ``secs``, ``second``, ``seconds``
80-
- **Minutes**: ``m``, ``min``, ``mins``, ``minute``, ``minutes``
83+
- **Minutes** (case-sensitive): ``m`` (lowercase), ``min``, ``mins``, ``minute``, ``minutes``
8184
- **Hours**: ``h``, ``hr``, ``hrs``, ``hour``, ``hours``
8285
- **Days**: ``d``, ``day``, ``days`` - **Uses precise daily binning algorithm**
83-
- **Months**: ``mon``, ``month``, ``months`` - **Uses precise monthly binning algorithm**
86+
- **Months** (case-sensitive): ``M`` (uppercase), ``mon``, ``month``, ``months`` - **Uses precise monthly binning algorithm**
87+
88+
**Case Sensitivity Note**:
89+
- ``m`` (lowercase) = minute
90+
- ``M`` (uppercase) = month
91+
- Subsecond units (``us``, ``ms``, ``cs``, ``ds``) are case-sensitive and must be lowercase
92+
93+
.. note::
94+
95+
The ``bin`` command implements time binning using **Calcite**, which provides full control over the binning logic. This allows it to support **subsecond units** (us, ms, cs, ds) that are not available in the ``stats`` command, which relies on OpenSearch's calendar intervals.
96+
8497
- **Examples**:
8598
- ``span=30seconds``
8699
- ``span=15minutes``

docs/user/ppl/cmd/stats.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,16 @@ stats [bucket_nullable=bool] <aggregation>... [by-clause]
8484
| year (y) |
8585
+----------------------------+
8686

87+
**Case Sensitivity Note**:
88+
- ``m`` (lowercase) = minute
89+
- ``M`` (uppercase) = month
90+
91+
.. note::
92+
93+
The ``stats`` command uses **OpenSearch calendar intervals** for time-based aggregations. **Subsecond units** (us, cs, ds) are **not supported** by the stats command due to OpenSearch limitations.
94+
95+
If you need subsecond precision for time binning, use the ``bin`` command instead, which implements time binning using Calcite and supports all time units including subsecond precision.
96+
8797
Configuration
8898
=============
8999
Some aggregation functions require Calcite to be enabled for proper functionality. To enable Calcite, use the following command:

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinCommandIT.java

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -984,4 +984,86 @@ public void testStatsWithBinsOnTimeAndTermField_Avg() throws IOException {
984984
rows(50, "us-east", "2024-07-01 00:05:00"),
985985
rows(40.25, "us-west", "2024-07-01 00:01:00"));
986986
}
987+
988+
@Test
989+
public void testBinCaseSensitivity_mon_vs_M() throws IOException {
990+
// Test uppercase 'M' for months - bin by 1 month
991+
JSONObject monthResultM =
992+
executeQuery(
993+
String.format(
994+
"source=%s | bin @timestamp span=1M | fields `@timestamp` | sort `@timestamp` |"
995+
+ " head 1",
996+
TEST_INDEX_TIME_DATA));
997+
verifySchema(monthResultM, schema("@timestamp", null, "string"));
998+
verifyDataRows(monthResultM, rows("2025-07"));
999+
1000+
// Test full name 'mon' for months - should produce same result as 'M'
1001+
JSONObject monthResultMon =
1002+
executeQuery(
1003+
String.format(
1004+
"source=%s | bin @timestamp span=1mon | fields `@timestamp` | sort `@timestamp` |"
1005+
+ " head 1",
1006+
TEST_INDEX_TIME_DATA));
1007+
verifySchema(monthResultMon, schema("@timestamp", null, "string"));
1008+
verifyDataRows(monthResultMon, rows("2025-07"));
1009+
}
1010+
1011+
@Test
1012+
public void testBinWithSubsecondUnits() throws IOException {
1013+
// Test milliseconds (ms) - bin by 100 milliseconds
1014+
JSONObject msResult =
1015+
executeQuery(
1016+
String.format(
1017+
"source=%s | bin @timestamp span=100ms | fields `@timestamp` | sort `@timestamp` |"
1018+
+ " head 3",
1019+
TEST_INDEX_TIME_DATA));
1020+
verifySchema(msResult, schema("@timestamp", null, "timestamp"));
1021+
verifyDataRows(
1022+
msResult,
1023+
rows("2025-07-28 00:15:23"),
1024+
rows("2025-07-28 01:42:15"),
1025+
rows("2025-07-28 02:28:45"));
1026+
1027+
// Test microseconds (us) - bin by 500 microseconds
1028+
JSONObject usResult =
1029+
executeQuery(
1030+
String.format(
1031+
"source=%s | bin @timestamp span=500us | fields `@timestamp` | sort `@timestamp` |"
1032+
+ " head 3",
1033+
TEST_INDEX_TIME_DATA));
1034+
verifySchema(usResult, schema("@timestamp", null, "timestamp"));
1035+
verifyDataRows(
1036+
usResult,
1037+
rows("2025-07-28 00:15:23"),
1038+
rows("2025-07-28 01:42:15"),
1039+
rows("2025-07-28 02:28:45"));
1040+
1041+
// Test centiseconds (cs) - bin by 10 centiseconds (100ms)
1042+
JSONObject csResult =
1043+
executeQuery(
1044+
String.format(
1045+
"source=%s | bin @timestamp span=10cs | fields `@timestamp` | sort `@timestamp` |"
1046+
+ " head 3",
1047+
TEST_INDEX_TIME_DATA));
1048+
verifySchema(csResult, schema("@timestamp", null, "timestamp"));
1049+
verifyDataRows(
1050+
csResult,
1051+
rows("2025-07-28 00:15:23"),
1052+
rows("2025-07-28 01:42:15"),
1053+
rows("2025-07-28 02:28:45"));
1054+
1055+
// Test deciseconds (ds) - bin by 5 deciseconds (500ms)
1056+
JSONObject dsResult =
1057+
executeQuery(
1058+
String.format(
1059+
"source=%s | bin @timestamp span=5ds | fields `@timestamp` | sort `@timestamp` |"
1060+
+ " head 3",
1061+
TEST_INDEX_TIME_DATA));
1062+
verifySchema(dsResult, schema("@timestamp", null, "timestamp"));
1063+
verifyDataRows(
1064+
dsResult,
1065+
rows("2025-07-28 00:15:23"),
1066+
rows("2025-07-28 01:42:15"),
1067+
rows("2025-07-28 02:28:45"));
1068+
}
9871069
}

0 commit comments

Comments
 (0)