-
Notifications
You must be signed in to change notification settings - Fork 6.9k
[doc][rdt] Add the limitations of rdt #58063
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
b493941
3baee79
914291f
7613301
d0a7794
cee4ec7
6debf69
3b9aa76
35a488f
3947b87
5d5bd93
b2440dc
2bf832b
d69f7ae
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -56,3 +56,36 @@ def consume_with_nixl(self, refs): | |
| ref1 = receiver.consume_with_nixl.remote(refs) | ||
| print(ray.get(ref1)) | ||
| # __nixl_put__and_get_end__ | ||
|
|
||
|
|
||
| # __nixl_limitations_start__ | ||
| @ray.remote(num_gpus=1) | ||
| class Actor: | ||
| def __init__(self): | ||
| self.tensor1 = torch.tensor([1, 2, 3]) | ||
| self.tensor2 = torch.tensor([4, 5, 6]) | ||
| self.tensor3 = torch.tensor([7, 8, 9]) | ||
|
|
||
| @ray.method(tensor_transport="nixl") | ||
| def send_dict1(self): | ||
| return {"round1-1": self.tensor1, "round1-2": self.tensor2} | ||
|
|
||
| @ray.method(tensor_transport="nixl") | ||
| def send_dict2(self): | ||
| return {"round2-1": self.tensor1, "round2-3": self.tensor3} | ||
|
|
||
| def sum_dict(self, dict): | ||
| return sum(v.sum().item() for v in dict.values()) | ||
|
|
||
|
|
||
| sender, receiver = Actor.remote(), Actor.remote() | ||
| ref1 = sender.send_dict1.remote() | ||
| result1 = receiver.sum_dict.remote(ref1) | ||
| print(ray.get(result1)) | ||
| ref2 = sender.send_dict2.remote() | ||
| result2 = receiver.sum_dict.remote(ref2) | ||
| try: | ||
| print(ray.get(result2)) | ||
Qiaolin-Yu marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| except ValueError as e: | ||
| print("Error caught:", e) | ||
| # __nixl_limitations_end__ | ||
Qiaolin-Yu marked this conversation as resolved.
Show resolved
Hide resolved
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Bug: Tensor Overlap Timing IssueThe example code expects a ValueError to be raised when sending overlapping tensors in different objects, but the timing is incorrect. The code calls |
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -202,6 +202,11 @@ def add_object( | |
| is_primary: Whether the GPU object is the primary copy. | ||
| """ | ||
| with self._object_present_cv: | ||
| for tensor in gpu_object: | ||
| if tensor in self._tensor_to_object_ids: | ||
| raise ValueError( | ||
| f"Tensor already exists in the RDT object store. Free all references to ObjectRef({obj_id}) before storing the tensor again." | ||
| ) | ||
Qiaolin-Yu marked this conversation as resolved.
Show resolved
Hide resolved
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Bug: Incorrect Object ID Reference in GPU Object AdditionThe |
||
| for tensor in gpu_object: | ||
| self._tensor_to_object_ids[tensor].add(obj_id) | ||
| # Append to the queue instead of overwriting | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.