@@ -651,30 +651,19 @@ def factorize_(
651651
652652 found_groups .append (np .array (expect ))
653653 else :
654- idx , groups = pd .factorize (flat , sort = sort ) # type: ignore[arg-type]
655654 if expect is not None and reindex :
656- assert sort
657- # https://stackoverflow.com/questions/5036816/numpy-lookup-map-or-point/5036900#5036900
658- # sorter = np.argsort(expect)
659- # groups = expect[(sorter,)] if sort else expect
660- #ii = np.argsort(groups)
661- #C = np.digitize(idx, groups[ii]) - 1
662- #idx = ii[C]
663- # key=np.argsort(groups)
664- # idx=key[groups[key].searchsorted(idx)]
665- inds = np .searchsorted (expect , groups )
666- # print(groups, inds)
667- mask = ~ np .isin (groups , expect ) | (inds == len (expect ))
668- codes_to_nan_out = np .arange (len (groups ))[mask ]
669- print (codes_to_nan_out , groupvar .shape , len (groups ))
670- # codes_to_nan_out, groups, groups[codes_to_nan_out]
671- # key=np.argsort(expect)
672- # key = np.arange(len(expect))
673- # idx=key[groups[key].searchsorted(idx)]
674- idx = idx [ ]
675- idx [np .isin (idx , codes_to_nan_out )] = - 1
676- print (np .unique (idx ))
677-
655+ sorter = np .argsort (expect )
656+ groups = expect [(sorter ,)] if sort else expect
657+ idx = np .searchsorted (expect , flat , sorter = sorter )
658+ mask = ~ np .isin (flat , expect ) | isnull (flat ) | (idx == len (expect ))
659+ if not sort :
660+ # idx is the index in to the sorted array.
661+ # if we didn't want sorting, unsort it back
662+ idx [(idx == len (expect ),)] = - 1
663+ idx = sorter [(idx ,)]
664+ idx [mask ] = - 1
665+ else :
666+ idx , groups = pd .factorize (flat , sort = sort ) # type: ignore[arg-type]
678667
679668 found_groups .append (np .array (groups ))
680669 factorized .append (idx .reshape (groupvar .shape ))
0 commit comments