|
13 | 13 | import org.apache.lucene.util.SetOnce; |
14 | 14 | import org.elasticsearch.ElasticsearchException; |
15 | 15 | import org.elasticsearch.ExceptionsHelper; |
| 16 | +import org.elasticsearch.TransportVersion; |
16 | 17 | import org.elasticsearch.action.ActionListener; |
17 | 18 | import org.elasticsearch.action.NoShardAvailableActionException; |
18 | 19 | import org.elasticsearch.action.OriginalIndices; |
|
21 | 22 | import org.elasticsearch.action.support.SubscribableListener; |
22 | 23 | import org.elasticsearch.action.support.TransportActions; |
23 | 24 | import org.elasticsearch.cluster.ClusterState; |
| 25 | +import org.elasticsearch.cluster.node.DiscoveryNodes; |
24 | 26 | import org.elasticsearch.common.bytes.BytesReference; |
25 | 27 | import org.elasticsearch.common.io.stream.NamedWriteableRegistry; |
26 | 28 | import org.elasticsearch.common.util.Maps; |
|
30 | 32 | import org.elasticsearch.rest.action.search.SearchResponseMetrics; |
31 | 33 | import org.elasticsearch.search.SearchContextMissingException; |
32 | 34 | import org.elasticsearch.search.SearchPhaseResult; |
| 35 | +import org.elasticsearch.search.SearchService; |
33 | 36 | import org.elasticsearch.search.SearchShardTarget; |
34 | 37 | import org.elasticsearch.search.builder.PointInTimeBuilder; |
| 38 | +import org.elasticsearch.search.builder.SearchSourceBuilder; |
35 | 39 | import org.elasticsearch.search.internal.AliasFilter; |
36 | 40 | import org.elasticsearch.search.internal.SearchContext; |
37 | 41 | import org.elasticsearch.search.internal.ShardSearchContextId; |
|
40 | 44 |
|
41 | 45 | import java.util.ArrayList; |
42 | 46 | import java.util.Arrays; |
| 47 | +import java.util.Collection; |
| 48 | +import java.util.HashMap; |
43 | 49 | import java.util.List; |
44 | 50 | import java.util.Map; |
45 | 51 | import java.util.concurrent.ConcurrentHashMap; |
|
53 | 59 | import java.util.function.Supplier; |
54 | 60 | import java.util.stream.Collectors; |
55 | 61 |
|
| 62 | +import static org.elasticsearch.action.search.TransportClosePointInTimeAction.closeContexts; |
56 | 63 | import static org.elasticsearch.core.Strings.format; |
57 | 64 |
|
58 | 65 | /** |
@@ -94,11 +101,13 @@ abstract class AbstractSearchAsyncAction<Result extends SearchPhaseResult> exten |
94 | 101 | private final Map<String, PendingExecutions> pendingExecutionsPerNode; |
95 | 102 | private final AtomicBoolean requestCancelled = new AtomicBoolean(); |
96 | 103 | private final int skippedCount; |
| 104 | + private final TransportVersion mintransportVersion; |
97 | 105 | protected final SearchResponseMetrics searchResponseMetrics; |
98 | 106 | protected long phaseStartTimeInNanos; |
99 | 107 |
|
100 | 108 | // protected for tests |
101 | 109 | protected final SubscribableListener<Void> doneFuture = new SubscribableListener<>(); |
| 110 | + private final Supplier<DiscoveryNodes> discoveryNodes; |
102 | 111 |
|
103 | 112 | AbstractSearchAsyncAction( |
104 | 113 | String name, |
@@ -153,6 +162,8 @@ abstract class AbstractSearchAsyncAction<Result extends SearchPhaseResult> exten |
153 | 162 | this.nodeIdToConnection = nodeIdToConnection; |
154 | 163 | this.concreteIndexBoosts = concreteIndexBoosts; |
155 | 164 | this.clusterStateVersion = clusterState.version(); |
| 165 | + this.mintransportVersion = clusterState.getMinTransportVersion(); |
| 166 | + this.discoveryNodes = clusterState::nodes; |
156 | 167 | this.aliasFilter = aliasFilter; |
157 | 168 | this.results = resultConsumer; |
158 | 169 | // register the release of the query consumer to free up the circuit breaker memory |
@@ -422,6 +433,7 @@ protected final void onShardFailure(final int shardIndex, SearchShardTarget shar |
422 | 433 | onShardGroupFailure(shardIndex, shard, e); |
423 | 434 | } |
424 | 435 | if (lastShard == false) { |
| 436 | + logger.debug("Retrying shard [{}] with target [{}]", shard.getShardId(), nextShard); |
425 | 437 | performPhaseOnShard(shardIndex, shardIt, nextShard); |
426 | 438 | } else { |
427 | 439 | // count down outstanding shards, we're done with this shard as there's no more copies to try |
@@ -613,10 +625,87 @@ public void sendSearchResponse(SearchResponseSections internalSearchResponse, At |
613 | 625 | } |
614 | 626 |
|
615 | 627 | protected BytesReference buildSearchContextId(ShardSearchFailure[] failures) { |
616 | | - var source = request.source(); |
617 | | - return source != null && source.pointInTimeBuilder() != null && source.pointInTimeBuilder().singleSession() == false |
618 | | - ? source.pointInTimeBuilder().getEncodedId() |
619 | | - : null; |
| 628 | + SearchSourceBuilder source = request.source(); |
| 629 | + // only (re-)build a search context id if we are running a long-lived point-in-time request |
| 630 | + if (source != null && source.pointInTimeBuilder() != null && source.pointInTimeBuilder().singleSession() == false) { |
| 631 | + if (SearchService.PIT_RELOCATION_FEATURE_FLAG.isEnabled()) { |
| 632 | + // we want to change node ids in the PIT id if any shards and its PIT context have moved |
| 633 | + return maybeReEncodeNodeIds( |
| 634 | + source.pointInTimeBuilder(), |
| 635 | + results.getAtomicArray().asList(), |
| 636 | + namedWriteableRegistry, |
| 637 | + mintransportVersion, |
| 638 | + searchTransportService, |
| 639 | + discoveryNodes.get(), |
| 640 | + logger |
| 641 | + ); |
| 642 | + } else { |
| 643 | + return source.pointInTimeBuilder().getEncodedId(); |
| 644 | + } |
| 645 | + } else { |
| 646 | + return null; |
| 647 | + } |
| 648 | + } |
| 649 | + |
| 650 | + static <Result extends SearchPhaseResult> BytesReference maybeReEncodeNodeIds( |
| 651 | + PointInTimeBuilder originalPit, |
| 652 | + List<Result> results, |
| 653 | + NamedWriteableRegistry namedWriteableRegistry, |
| 654 | + TransportVersion mintransportVersion, |
| 655 | + SearchTransportService searchTransportService, |
| 656 | + DiscoveryNodes nodes, |
| 657 | + Logger logger |
| 658 | + ) { |
| 659 | + SearchContextId original = originalPit.getSearchContextId(namedWriteableRegistry); |
| 660 | + // only create the following two collections if we detect an id change |
| 661 | + Map<ShardId, SearchContextIdForNode> updatedShardMap = null; |
| 662 | + Collection<SearchContextIdForNode> contextsToClose = null; |
| 663 | + logger.debug("checking search result shards to detect PIT node changes"); |
| 664 | + for (Result result : results) { |
| 665 | + SearchShardTarget searchShardTarget = result.getSearchShardTarget(); |
| 666 | + ShardId shardId = searchShardTarget.getShardId(); |
| 667 | + SearchContextIdForNode originalShard = original.shards().get(shardId); |
| 668 | + if (originalShard != null && originalShard.getSearchContextId() != null && originalShard.getSearchContextId().isRetryable()) { |
| 669 | + // check if the node is different, if so we need to re-encode the PIT |
| 670 | + String originalNode = originalShard.getNode(); |
| 671 | + if (originalNode != null && originalNode.equals(searchShardTarget.getNodeId()) == false) { |
| 672 | + // the target node for this shard entry in the PIT has changed, we need to update it |
| 673 | + if (updatedShardMap == null) { |
| 674 | + // initialize the map with entries from old map to keep ids for shards that have not responded in this results |
| 675 | + updatedShardMap = new HashMap<>(original.shards()); |
| 676 | + contextsToClose = new ArrayList<>(); |
| 677 | + } |
| 678 | + SearchContextIdForNode updatedId = new SearchContextIdForNode( |
| 679 | + searchShardTarget.getClusterAlias(), |
| 680 | + searchShardTarget.getNodeId(), |
| 681 | + result.getContextId() |
| 682 | + ); |
| 683 | + |
| 684 | + logger.debug("changing node for PIT shard id from [{}] to [{}]", originalShard, updatedId); |
| 685 | + updatedShardMap.put(shardId, updatedId); |
| 686 | + contextsToClose.add(original.shards().get(shardId)); |
| 687 | + |
| 688 | + } |
| 689 | + } |
| 690 | + } |
| 691 | + if (updatedShardMap != null) { |
| 692 | + // we free all old contexts that have moved, just in case we have re-tried them elsewhere |
| 693 | + // but they still exist in the old location |
| 694 | + closeContexts(nodes, searchTransportService, contextsToClose, new ActionListener<Integer>() { |
| 695 | + @Override |
| 696 | + public void onResponse(Integer integer) { |
| 697 | + // ignore |
| 698 | + } |
| 699 | + |
| 700 | + @Override |
| 701 | + public void onFailure(Exception e) { |
| 702 | + logger.trace("Failure while freeing old point in time contexts", e); |
| 703 | + } |
| 704 | + }); |
| 705 | + return SearchContextId.encode(updatedShardMap, original.aliasFilter(), mintransportVersion, ShardSearchFailure.EMPTY_ARRAY); |
| 706 | + } else { |
| 707 | + return originalPit.getEncodedId(); |
| 708 | + } |
620 | 709 | } |
621 | 710 |
|
622 | 711 | /** |
|
0 commit comments