-
Notifications
You must be signed in to change notification settings - Fork 25.4k
Draft: PIT context relocation on shard relocation #132251
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
fccadb1
f037bc0
83a170e
f5e4887
d5759dc
b72487c
0d1adda
b7e9568
7b02a00
015b5d2
f2b20da
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the "Elastic License | ||
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side | ||
* Public License v 1"; you may not use this file except in compliance with, at | ||
* your election, the "Elastic License 2.0", the "GNU Affero General Public | ||
* License v3.0 only", or the "Server Side Public License, v 1". | ||
*/ | ||
|
||
package org.elasticsearch.action.search; | ||
|
||
import org.elasticsearch.TransportVersion; | ||
import org.elasticsearch.common.bytes.BytesArray; | ||
import org.elasticsearch.common.bytes.BytesReference; | ||
import org.elasticsearch.common.util.Maps; | ||
import org.elasticsearch.index.shard.ShardId; | ||
|
||
import java.io.IOException; | ||
import java.util.Base64; | ||
import java.util.Collections; | ||
import java.util.Map; | ||
|
||
public class PITHelper { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. just for debugging atm |
||
|
||
public static SearchContextId decodePITId(String id) throws IOException { | ||
return decodePITId(new BytesArray(Base64.getUrlDecoder().decode(id))); | ||
} | ||
|
||
public static SearchContextId decodePITId(BytesReference id) throws IOException { | ||
try (var in = id.streamInput()) { | ||
final TransportVersion version = TransportVersion.readVersion(in); | ||
in.setTransportVersion(version); | ||
final Map<ShardId, SearchContextIdForNode> shards = Collections.unmodifiableMap( | ||
in.readCollection(Maps::newHashMapWithExpectedSize, (i, map) -> map.put(new ShardId(in), new SearchContextIdForNode(in))) | ||
); | ||
return new SearchContextId(shards, Collections.emptyMap()); | ||
} catch (IOException e) { | ||
assert false : e; | ||
throw new IllegalArgumentException(e); | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,45 +13,51 @@ | |
import org.elasticsearch.action.ActionType; | ||
import org.elasticsearch.action.support.ActionFilters; | ||
import org.elasticsearch.action.support.HandledTransportAction; | ||
import org.elasticsearch.cluster.project.ProjectResolver; | ||
import org.elasticsearch.cluster.service.ClusterService; | ||
import org.elasticsearch.common.io.stream.NamedWriteableRegistry; | ||
import org.elasticsearch.common.util.concurrent.EsExecutors; | ||
import org.elasticsearch.index.shard.ShardId; | ||
import org.elasticsearch.injection.guice.Inject; | ||
import org.elasticsearch.tasks.Task; | ||
import org.elasticsearch.transport.TransportService; | ||
|
||
import java.util.Collection; | ||
import java.util.Map; | ||
|
||
public class TransportClosePointInTimeAction extends HandledTransportAction<ClosePointInTimeRequest, ClosePointInTimeResponse> { | ||
|
||
public static final ActionType<ClosePointInTimeResponse> TYPE = new ActionType<>("indices:data/read/close_point_in_time"); | ||
private final ClusterService clusterService; | ||
private final SearchTransportService searchTransportService; | ||
private final NamedWriteableRegistry namedWriteableRegistry; | ||
private final ProjectResolver projectResolver; | ||
|
||
@Inject | ||
public TransportClosePointInTimeAction( | ||
TransportService transportService, | ||
ClusterService clusterService, | ||
ActionFilters actionFilters, | ||
SearchTransportService searchTransportService, | ||
NamedWriteableRegistry namedWriteableRegistry | ||
NamedWriteableRegistry namedWriteableRegistry, | ||
ProjectResolver projectResolver | ||
) { | ||
super(TYPE.name(), transportService, actionFilters, ClosePointInTimeRequest::new, EsExecutors.DIRECT_EXECUTOR_SERVICE); | ||
this.clusterService = clusterService; | ||
this.searchTransportService = searchTransportService; | ||
this.namedWriteableRegistry = namedWriteableRegistry; | ||
this.projectResolver = projectResolver; | ||
} | ||
|
||
@Override | ||
protected void doExecute(Task task, ClosePointInTimeRequest request, ActionListener<ClosePointInTimeResponse> listener) { | ||
final SearchContextId searchContextId = SearchContextId.decode(namedWriteableRegistry, request.getId()); | ||
final Collection<SearchContextIdForNode> contextIds = searchContextId.shards().values(); | ||
Map<ShardId, SearchContextIdForNode> shards = searchContextId.shards(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See above changes in ClearScrollController. We also need to pass in the shardIds now so we can retry if the original node is gone. |
||
ClearScrollController.closeContexts( | ||
clusterService.state().nodes(), | ||
clusterService, | ||
projectResolver, | ||
searchTransportService, | ||
contextIds, | ||
listener.map(freed -> new ClosePointInTimeResponse(freed == contextIds.size(), freed)) | ||
shards, | ||
listener.map(freed -> new ClosePointInTimeResponse(freed == shards.size(), freed)) | ||
); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -371,6 +371,7 @@ void executeRequest( | |
); | ||
frozenIndexCheck(resolvedIndices); | ||
} | ||
logger.info("Executing search request on node [{}] with indices [{}]", clusterService.getNodeName(), resolvedIndices); | ||
|
||
ActionListener<SearchRequest> rewriteListener = listener.delegateFailureAndWrap((delegate, rewritten) -> { | ||
if (ccsCheckCompatibility) { | ||
|
@@ -1316,6 +1317,7 @@ private void executeSearch( | |
SearchResponse.Clusters clusters, | ||
SearchPhaseProvider searchPhaseProvider | ||
) { | ||
logger.info("Executing search locally."); | ||
if (searchRequest.allowPartialSearchResults() == null) { | ||
// No user preference defined in search request - apply cluster service default | ||
searchRequest.allowPartialSearchResults(searchService.defaultAllowPartialSearchResults()); | ||
|
@@ -1905,10 +1907,11 @@ static List<SearchShardIterator> getLocalShardsIteratorFromPointInTime( | |
try { | ||
final ShardIterator shards = OperationRouting.getShards(projectState.routingTable(), shardId); | ||
// Prefer executing shard requests on nodes that are part of PIT first. | ||
if (projectState.cluster().nodes().nodeExists(perNode.getNode())) { | ||
boolean nodeExists = projectState.cluster().nodes().nodeExists(perNode.getNode()); | ||
if (nodeExists) { | ||
targetNodes.add(perNode.getNode()); | ||
} | ||
if (perNode.getSearchContextId().getSearcherId() != null) { | ||
if (perNode.getSearchContextId().getSearcherId() != null || nodeExists == false) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is where we re-try other shards when the original PIT node is gone now. Trying every node with a shard copy might be too much on the long run but without a cluster-wide service that keeps track of where the PIT contexts are this might be unavoidable. Needs follow up in terms of altering the PIT id once we found the new node where the PIT context lives now. |
||
for (ShardRouting shard : shards) { | ||
if (shard.currentNodeId().equals(perNode.getNode()) == false) { | ||
targetNodes.add(shard.currentNodeId()); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
note: We need to close the contexts after moving them when the "old" PIT is used, so if the originally encoded node it gone we try all remaining ones that currently hold that shard here (regardless of whether that node also hold a pit context.