@@ -6,6 +6,7 @@ from libc.math cimport (
66 sqrt,
77)
88from libcpp.deque cimport deque
9+ from libcpp.stack cimport stack
910from libcpp.unordered_map cimport unordered_map
1011
1112from pandas._libs.algos cimport TiebreakEnumType
@@ -991,36 +992,24 @@ def roll_median_c(const float64_t[:] values, ndarray[int64_t] start,
991992# Moving maximum / minimum code taken from Bottleneck
992993# Licence at LICENSES/BOTTLENECK_LICENCE
993994
995+ cdef int64_t bisect_left(
996+ deque[int64_t]& a,
997+ int64_t x,
998+ int64_t lo = 0 ,
999+ int64_t hi = - 1
1000+ ) nogil:
1001+ cdef int64_t mid
1002+ if hi == - 1 :
1003+ hi = a.size()
1004+ while lo < hi:
1005+ mid = (lo + hi) // 2
1006+ if a.at(mid) < x:
1007+ lo = mid + 1
1008+ else :
1009+ hi = mid
1010+ return lo
9941011
995- cdef float64_t init_mm(float64_t ai, Py_ssize_t * nobs, bint is_max) noexcept nogil:
996-
997- if ai == ai:
998- nobs[0 ] = nobs[0 ] + 1
999- elif is_max:
1000- ai = MINfloat64
1001- else :
1002- ai = MAXfloat64
1003-
1004- return ai
1005-
1006-
1007- cdef void remove_mm(float64_t aold, Py_ssize_t * nobs) noexcept nogil:
1008- """ remove a value from the mm calc """
1009- if aold == aold:
1010- nobs[0 ] = nobs[0 ] - 1
1011-
1012-
1013- cdef float64_t calc_mm(int64_t minp, Py_ssize_t nobs,
1014- float64_t value) noexcept nogil:
1015- cdef:
1016- float64_t result
1017-
1018- if nobs >= minp:
1019- result = value
1020- else :
1021- result = NaN
1022-
1023- return result
1012+ from libc.math cimport isnan
10241013
10251014
10261015def roll_max (ndarray[float64_t] values , ndarray[int64_t] start ,
@@ -1068,69 +1057,124 @@ def roll_min(ndarray[float64_t] values, ndarray[int64_t] start,
10681057 return _roll_min_max(values , start , end , minp , is_max = 0 )
10691058
10701059
1071- cdef _roll_min_max(ndarray[float64_t] values ,
1072- ndarray[int64_t] starti ,
1073- ndarray[int64_t] endi ,
1074- int64_t minp ,
1075- bint is_max ):
1060+ def _roll_min_max(
1061+ ndarray[float64_t] values ,
1062+ ndarray[int64_t] start ,
1063+ ndarray[int64_t] end ,
1064+ int64_t minp ,
1065+ bint is_max
1066+ ):
10761067 cdef:
1077- float64_t ai
1078- int64_t curr_win_size, start
1079- Py_ssize_t i, k, nobs = 0 , N = len (starti)
1080- deque Q[int64_t] # min/max always the front
1081- deque W[int64_t] # track the whole window for nobs compute
1068+ Py_ssize_t i, i_next, k, valid_start, last_end, last_start, N = len (start)
1069+ deque Q[int64_t]
1070+ stack Dominators[int64_t]
10821071 ndarray[float64_t, ndim= 1 ] output
10831072
1073+ # ideally want these in the i-loop scope
1074+ Py_ssize_t this_start, this_end, stash_start
1075+ int64_t q_idx
1076+
10841077 output = np.empty(N, dtype = np.float64)
10851078 Q = deque[int64_t]()
1086- W = deque[int64_t]()
1079+ Dominators = stack[int64_t]()
1080+
1081+ # This function was "ported" / translated from sliding_min_max()
1082+ # in /pandas/core/_numba/kernels/min_max_.py. (See there for detailed
1083+ # comments and credits.)
1084+ # Code translation assumptions/rules:
1085+ # - min_periods --> minp
1086+ # - deque[0] --> front()
1087+ # - deque[-1] --> back()
1088+ # - stack[-1] --> top()
1089+ # - bool(stack/deque) --> !empty()
1090+ # - deque.append() --> push_back()
1091+ # - stack.append() --> push()
1092+ # - deque.popleft --> pop_front()
1093+ # - deque.pop() --> pop_back()
10871094
10881095 with nogil:
1096+ if minp < 1 :
1097+ minp = 1
1098+
1099+ if N> 2 :
1100+ i_next = N - 1
1101+ for i in range (N - 2 , - 1 , - 1 ):
1102+ if start[i_next] < start[i] \
1103+ and (
1104+ Dominators.empty()
1105+ or start[Dominators.top()] > start[i_next]
1106+ ):
1107+ Dominators.push(i_next)
1108+ i_next = i
10891109
1090- # This is using a modified version of the C++ code in this
1091- # SO post: https://stackoverflow.com/a/12239580
1092- # The original impl didn't deal with variable window sizes
1093- # So the code was optimized for that
1110+ valid_start = - minp
1111+
1112+ last_end = 0
1113+ last_start = - 1
10941114
1095- # first window's size
1096- curr_win_size = endi[0 ] - starti[0 ]
1097- # GH 32865
1098- # Anchor output index to values index to provide custom
1099- # BaseIndexer support
11001115 for i in range (N):
1116+ this_start = start[i]
1117+ this_end = end[i]
1118+
1119+ if (not Dominators.empty() and Dominators.top() == i):
1120+ Dominators.pop()
11011121
1102- curr_win_size = endi[i] - starti[i]
1103- if i == 0 :
1104- start = starti[i]
1122+ if not (this_end > last_end
1123+ or (this_end == last_end and this_start >= last_start)):
1124+ raise ValueError (
1125+ " Start/End ordering requirement is violated at index {}" .format(i))
1126+
1127+ if Dominators.empty():
1128+ stash_start = this_start
11051129 else :
1106- start = endi[i - 1 ]
1107-
1108- for k in range (start, endi[i]):
1109- ai = init_mm(values[k], & nobs, is_max)
1110- # Discard previous entries if we find new min or max
1111- if is_max:
1112- while not Q.empty() and ((ai >= values[Q.back()]) or
1113- values[Q.back()] != values[Q.back()]):
1114- Q.pop_back()
1115- else :
1116- while not Q.empty() and ((ai <= values[Q.back()]) or
1117- values[Q.back()] != values[Q.back()]):
1118- Q.pop_back()
1119- Q.push_back(k)
1120- W.push_back(k)
1121-
1122- # Discard entries outside and left of current window
1123- while not Q.empty() and Q.front() <= starti[i] - 1 :
1130+ stash_start = min (this_start, start[Dominators.top()])
1131+
1132+ while not Q.empty() and Q.front() < stash_start:
11241133 Q.pop_front()
1125- while not W.empty() and W.front() <= starti[i] - 1 :
1126- remove_mm(values[W.front()], & nobs)
1127- W.pop_front()
11281134
1129- # Save output based on index in input value array
1130- if not Q.empty() and curr_win_size > 0 :
1131- output[i] = calc_mm(minp, nobs, values[Q.front()])
1132- else :
1135+ for k in range (last_end, this_end):
1136+ if not isnan(values[k]):
1137+ valid_start += 1
1138+ while valid_start>= 0 and isnan(values[valid_start]):
1139+ valid_start += 1
1140+
1141+ # Sadly, this runs more than 15% faster than trying to use
1142+ # generic comparison functions.
1143+ # That is, I tried:
1144+ #
1145+ # | cdef inline bint le(float64_t a, float64_t b) nogil:
1146+ # | return a <= b
1147+ # | cdef inline bint ge(float64_t a, float64_t b) nogil:
1148+ # | return a >= b
1149+ # | ctypedef bint (*cmp_func_t) (float64_t a, float64_t b) nogil
1150+ # | ...
1151+ # | cmp_func_t cmp
1152+ # |
1153+ # | if is_max:
1154+ # | cmp = ge
1155+ # | else:
1156+ # | cmp = le
1157+ # and, finally
1158+ # | while not Q.empty() and cmp(values[k], values[Q.back()]):
1159+ # | Q.pop_back()
1160+
1161+ if is_max:
1162+ while not Q.empty() and values[k] >= values[Q.back()]:
1163+ Q.pop_back()
1164+ else :
1165+ while not Q.empty() and values[k] <= values[Q.back()]:
1166+ Q.pop_back()
1167+ Q.push_back(k)
1168+
1169+ if Q.empty() or this_start > valid_start:
11331170 output[i] = NaN
1171+ elif Q.front() >= this_start:
1172+ output[i] = values[Q.front()]
1173+ else :
1174+ q_idx = bisect_left(Q, this_start, lo = 1 )
1175+ output[i] = values[Q[q_idx]]
1176+ last_end = this_end
1177+ last_start = this_start
11341178
11351179 return output
11361180
0 commit comments