Skip to content

Commit 465f715

Browse files
committed
Add OwnedStream type
Signed-off-by: Moritz Hoffmann <[email protected]>
1 parent cb41e11 commit 465f715

35 files changed

+343
-201
lines changed

kafkaesque/src/kafka_source.rs

+6-5
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
use timely::Data;
2-
use timely::dataflow::{Scope, Stream};
2+
use timely::dataflow::Scope;
33
use timely::dataflow::operators::Capability;
44
use timely::dataflow::operators::generic::OutputHandle;
5-
use timely::dataflow::channels::pushers::Tee;
65

76
use rdkafka::Message;
87
use rdkafka::consumer::{ConsumerContext, BaseConsumer};
8+
use timely::dataflow::channels::pushers::tee::PushOwned;
9+
use timely::dataflow::stream::OwnedStream;
910

1011
/// Constructs a stream of data from a Kafka consumer.
1112
///
@@ -89,14 +90,14 @@ pub fn kafka_source<C, G, D, L>(
8990
name: &str,
9091
consumer: BaseConsumer<C>,
9192
logic: L
92-
) -> Stream<G, D>
93+
) -> OwnedStream<G, Vec<D>>
9394
where
9495
C: ConsumerContext+'static,
9596
G: Scope,
9697
D: Data,
9798
L: Fn(&[u8],
9899
&mut Capability<G::Timestamp>,
99-
&mut OutputHandle<G::Timestamp, D, Tee<G::Timestamp, D>>) -> bool+'static,
100+
&mut OutputHandle<G::Timestamp, D, PushOwned<G::Timestamp, Vec<D>>>) -> bool+'static,
100101
{
101102
use timely::dataflow::operators::generic::source;
102103
source(scope, name, move |capability, info| {
@@ -135,4 +136,4 @@ where
135136
}
136137

137138
})
138-
}
139+
}

timely/examples/bfs.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ fn main() {
133133
});
134134
}
135135
)
136-
.concat(&(0..1).map(|x| (x,x)).to_stream(scope))
136+
.concat((0..1).map(|x| (x,x)).to_stream(scope))
137137
.connect_loop(handle);
138138
});
139139
}).unwrap(); // asserts error-free execution;

timely/examples/loopdemo.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ fn main() {
2929

3030
let step =
3131
stream
32-
.concat(&loop_stream)
32+
.concat(loop_stream)
3333
.map(|x| if x % 2 == 0 { x / 2 } else { 3 * x + 1 })
3434
.filter(|x| x > &1);
3535
step

timely/examples/pingpong.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ fn main() {
1616
(0 .. elements)
1717
.filter(move |&x| (x as usize) % peers == index)
1818
.to_stream(scope)
19-
.concat(&cycle)
19+
.concat(cycle)
2020
.exchange(|&x| x)
2121
.map_in_place(|x| *x += 1)
2222
.branch_when(move |t| t < &iterations).1

timely/examples/unionfind.rs

+5-4
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ use timely::dataflow::*;
99
use timely::dataflow::operators::{Input, Exchange, Probe};
1010
use timely::dataflow::operators::generic::operator::Operator;
1111
use timely::dataflow::channels::pact::Pipeline;
12+
use timely::dataflow::stream::{OwnedStream, StreamLike};
1213

1314
fn main() {
1415

@@ -50,12 +51,12 @@ fn main() {
5051
}).unwrap(); // asserts error-free execution;
5152
}
5253

53-
trait UnionFind {
54-
fn union_find(self) -> Self;
54+
trait UnionFind<G: Scope> {
55+
fn union_find(self) -> OwnedStream<G, Vec<(usize, usize)>>;
5556
}
5657

57-
impl<G: Scope> UnionFind for Stream<G, (usize, usize)> {
58-
fn union_find(self) -> Stream<G, (usize, usize)> {
58+
impl<G: Scope, S: StreamLike<G, Vec<(usize, usize)>>> UnionFind<G> for S {
59+
fn union_find(self) -> OwnedStream<G, Vec<(usize, usize)>> {
5960

6061
self.unary(Pipeline, "UnionFind", |_,_| {
6162

timely/src/dataflow/channels/pushers/tee.rs

+45-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
//! A `Push` implementor with a list of `Box<Push>` to forward pushes to.
22
33
use std::cell::RefCell;
4-
use std::fmt::{self, Debug};
4+
use std::fmt::{self, Debug, Formatter};
55
use std::rc::Rc;
66

77
use crate::dataflow::channels::{BundleCore, Message};
@@ -10,6 +10,39 @@ use crate::communication::Push;
1010
use crate::{Container, Data};
1111

1212
type PushList<T, D> = Rc<RefCell<Vec<Box<dyn Push<BundleCore<T, D>>>>>>;
13+
/// TODO
14+
pub struct PushOwned<T, D>(Rc<RefCell<Option<Box<dyn Push<BundleCore<T, D>>>>>>);
15+
16+
impl<T, D> PushOwned<T, D> {
17+
/// TODO
18+
pub fn new() -> (Self, Self) {
19+
let zelf = Self(Rc::new(RefCell::new(None)));
20+
(zelf.clone(), zelf)
21+
}
22+
23+
/// TODO
24+
pub fn set<P: Push<BundleCore<T, D>> + 'static>(&self, pusher: P) {
25+
*self.0.borrow_mut() = Some(Box::new(pusher));
26+
}
27+
}
28+
29+
impl<T, D> Default for PushOwned<T, D> {
30+
fn default() -> Self {
31+
Self(Rc::new(RefCell::new(None)))
32+
}
33+
}
34+
35+
impl<T, D> Debug for PushOwned<T, D> {
36+
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
37+
f.debug_struct("PushOwned").finish_non_exhaustive()
38+
}
39+
}
40+
41+
impl<T, D> Clone for PushOwned<T, D> {
42+
fn clone(&self) -> Self {
43+
Self(Rc::clone(&self.0))
44+
}
45+
}
1346

1447
/// Wraps a shared list of `Box<Push>` to forward pushes to. Owned by `Stream`.
1548
pub struct TeeCore<T, D> {
@@ -20,6 +53,17 @@ pub struct TeeCore<T, D> {
2053
/// [TeeCore] specialized to `Vec`-based container.
2154
pub type Tee<T, D> = TeeCore<T, Vec<D>>;
2255

56+
impl<T: Data, D: Container> Push<BundleCore<T, D>> for PushOwned<T, D> {
57+
#[inline]
58+
fn push(&mut self, message: &mut Option<BundleCore<T, D>>) {
59+
let mut pusher = self.0.borrow_mut();
60+
if let Some(pusher) = pusher.as_mut() {
61+
pusher.push(message);
62+
}
63+
}
64+
}
65+
66+
2367
impl<T: Data, D: Container> Push<BundleCore<T, D>> for TeeCore<T, D> {
2468
#[inline]
2569
fn push(&mut self, message: &mut Option<BundleCore<T, D>>) {

timely/src/dataflow/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
//! });
1414
//! ```
1515
16-
pub use self::stream::{StreamCore, Stream};
16+
pub use self::stream::{StreamCore, Stream, StreamLike, OwnedStream};
1717
pub use self::scopes::{Scope, ScopeParent};
1818

1919
pub use self::operators::input::HandleCore as InputHandleCore;

timely/src/dataflow/operators/aggregation/aggregate.rs

+3-2
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ use crate::{Data, ExchangeData};
66
use crate::dataflow::{Stream, Scope};
77
use crate::dataflow::operators::generic::operator::Operator;
88
use crate::dataflow::channels::pact::Exchange;
9+
use crate::dataflow::stream::OwnedStream;
910

1011
/// Generic intra-timestamp aggregation
1112
///
@@ -64,7 +65,7 @@ pub trait Aggregate<S: Scope, K: ExchangeData+Hash, V: ExchangeData> {
6465
self,
6566
fold: F,
6667
emit: E,
67-
hash: H) -> Stream<S, R> where S::Timestamp: Eq;
68+
hash: H) -> OwnedStream<S, Vec<R>> where S::Timestamp: Eq;
6869
}
6970

7071
impl<S: Scope, K: ExchangeData+Hash+Eq, V: ExchangeData> Aggregate<S, K, V> for Stream<S, (K, V)> {
@@ -73,7 +74,7 @@ impl<S: Scope, K: ExchangeData+Hash+Eq, V: ExchangeData> Aggregate<S, K, V> for
7374
self,
7475
fold: F,
7576
emit: E,
76-
hash: H) -> Stream<S, R> where S::Timestamp: Eq {
77+
hash: H) -> OwnedStream<S, Vec<R>> where S::Timestamp: Eq {
7778

7879
let mut aggregates = HashMap::new();
7980
let mut vector = Vec::new();

timely/src/dataflow/operators/aggregation/state_machine.rs

+3-2
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ use crate::{Data, ExchangeData};
66
use crate::dataflow::{Stream, Scope};
77
use crate::dataflow::operators::generic::operator::Operator;
88
use crate::dataflow::channels::pact::Exchange;
9+
use crate::dataflow::stream::OwnedStream;
910

1011
/// Generic state-transition machinery: each key has a state, and receives a sequence of events.
1112
/// Events are applied in time-order, but no other promises are made. Each state transition can
@@ -51,7 +52,7 @@ pub trait StateMachine<S: Scope, K: ExchangeData+Hash+Eq, V: ExchangeData> {
5152
I: IntoIterator<Item=R>, // type of output iterator
5253
F: Fn(&K, V, &mut D)->(bool, I)+'static, // state update logic
5354
H: Fn(&K)->u64+'static, // "hash" function for keys
54-
>(self, fold: F, hash: H) -> Stream<S, R> where S::Timestamp : Hash+Eq ;
55+
>(self, fold: F, hash: H) -> OwnedStream<S, Vec<R>> where S::Timestamp : Hash+Eq ;
5556
}
5657

5758
impl<S: Scope, K: ExchangeData+Hash+Eq, V: ExchangeData> StateMachine<S, K, V> for Stream<S, (K, V)> {
@@ -61,7 +62,7 @@ impl<S: Scope, K: ExchangeData+Hash+Eq, V: ExchangeData> StateMachine<S, K, V> f
6162
I: IntoIterator<Item=R>, // type of output iterator
6263
F: Fn(&K, V, &mut D)->(bool, I)+'static, // state update logic
6364
H: Fn(&K)->u64+'static, // "hash" function for keys
64-
>(self, fold: F, hash: H) -> Stream<S, R> where S::Timestamp : Hash+Eq {
65+
>(self, fold: F, hash: H) -> OwnedStream<S, Vec<R>> where S::Timestamp : Hash+Eq {
6566

6667
let mut pending: HashMap<_, Vec<(K, V)>> = HashMap::new(); // times -> (keys -> state)
6768
let mut states = HashMap::new(); // keys -> state

timely/src/dataflow/operators/branch.rs

+7-6
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ use crate::dataflow::channels::pact::Pipeline;
44
use crate::dataflow::operators::generic::builder_rc::OperatorBuilder;
55
use crate::dataflow::{Scope, Stream, StreamCore};
66
use crate::{Container, Data};
7+
use crate::dataflow::stream::{OwnedStream, StreamLike};
78

89
/// Extension trait for `Stream`.
910
pub trait Branch<S: Scope, D: Data> {
@@ -31,14 +32,14 @@ pub trait Branch<S: Scope, D: Data> {
3132
fn branch(
3233
self,
3334
condition: impl Fn(&S::Timestamp, &D) -> bool + 'static,
34-
) -> (Stream<S, D>, Stream<S, D>);
35+
) -> (OwnedStream<S, Vec<D>>, OwnedStream<S, Vec<D>>);
3536
}
3637

3738
impl<S: Scope, D: Data> Branch<S, D> for Stream<S, D> {
3839
fn branch(
3940
self,
4041
condition: impl Fn(&S::Timestamp, &D) -> bool + 'static,
41-
) -> (Stream<S, D>, Stream<S, D>) {
42+
) -> (OwnedStream<S, Vec<D>>, OwnedStream<S, Vec<D>>) {
4243
let mut builder = OperatorBuilder::new("Branch".to_owned(), self.scope());
4344

4445
let mut input = builder.new_input(self, Pipeline);
@@ -71,7 +72,7 @@ impl<S: Scope, D: Data> Branch<S, D> for Stream<S, D> {
7172
}
7273

7374
/// Extension trait for `Stream`.
74-
pub trait BranchWhen<T>: Sized {
75+
pub trait BranchWhen<G: Scope, C: Container>: Sized {
7576
/// Takes one input stream and splits it into two output streams.
7677
/// For each time, the supplied closure is called. If it returns true,
7778
/// the records for that will be sent to the second returned stream, otherwise
@@ -91,11 +92,11 @@ pub trait BranchWhen<T>: Sized {
9192
/// after_five.inspect(|x| println!("Times 5 and later: {:?}", x));
9293
/// });
9394
/// ```
94-
fn branch_when(self, condition: impl Fn(&T) -> bool + 'static) -> (Self, Self);
95+
fn branch_when(self, condition: impl Fn(&G::Timestamp) -> bool + 'static) -> (OwnedStream<G, C>, OwnedStream<G, C>);
9596
}
9697

97-
impl<S: Scope, C: Container> BranchWhen<S::Timestamp> for StreamCore<S, C> {
98-
fn branch_when(self, condition: impl Fn(&S::Timestamp) -> bool + 'static) -> (Self, Self) {
98+
impl<G: Scope, C: Container, S: StreamLike<G, C>> BranchWhen<G, C> for S {
99+
fn branch_when(self, condition: impl Fn(&G::Timestamp) -> bool + 'static) -> (OwnedStream<G, C>, OwnedStream<G, C>) {
99100
let mut builder = OperatorBuilder::new("Branch".to_owned(), self.scope());
100101

101102
let mut input = builder.new_input(self, Pipeline);

timely/src/dataflow/operators/broadcast.rs

+5-4
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,10 @@
33
use crate::ExchangeData;
44
use crate::dataflow::{Stream, Scope};
55
use crate::dataflow::operators::{Map, Exchange};
6+
use crate::dataflow::stream::{OwnedStream, StreamLike};
67

78
/// Broadcast records to all workers.
8-
pub trait Broadcast<D: ExchangeData> {
9+
pub trait Broadcast<G: Scope, D: ExchangeData> {
910
/// Broadcast records to all workers.
1011
///
1112
/// # Examples
@@ -18,11 +19,11 @@ pub trait Broadcast<D: ExchangeData> {
1819
/// .inspect(|x| println!("seen: {:?}", x));
1920
/// });
2021
/// ```
21-
fn broadcast(self) -> Self;
22+
fn broadcast(self) -> OwnedStream<G, Vec<D>>;
2223
}
2324

24-
impl<G: Scope, D: ExchangeData> Broadcast<D> for Stream<G, D> {
25-
fn broadcast(self) -> Stream<G, D> {
25+
impl<G: Scope, D: ExchangeData, S: StreamLike<G, Vec<D>>> Broadcast<G, D> for S {
26+
fn broadcast(self) -> OwnedStream<G, Vec<D>> {
2627

2728
// NOTE: Simplified implementation due to underlying motion
2829
// in timely dataflow internals. Optimize once they have

timely/src/dataflow/operators/capture/capture.rs

+7-6
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,14 @@ use crate::dataflow::channels::pullers::Counter as PullCounter;
1111
use crate::dataflow::operators::generic::builder_raw::OperatorBuilder;
1212

1313
use crate::Container;
14+
use crate::dataflow::stream::StreamLike;
1415
use crate::progress::ChangeBatch;
1516
use crate::progress::Timestamp;
1617

1718
use super::{EventCore, EventPusherCore};
1819

1920
/// Capture a stream of timestamped data for later replay.
20-
pub trait Capture<T: Timestamp, D: Container>: Sized {
21+
pub trait Capture<G: Scope, D: Container>: Sized {
2122
/// Captures a stream of timestamped data for later replay.
2223
///
2324
/// # Examples
@@ -103,18 +104,18 @@ pub trait Capture<T: Timestamp, D: Container>: Sized {
103104
///
104105
/// assert_eq!(recv0.extract()[0].1, (0..10).collect::<Vec<_>>());
105106
/// ```
106-
fn capture_into<P: EventPusherCore<T, D>+'static>(self, pusher: P);
107+
fn capture_into<P: EventPusherCore<G::Timestamp, D>+'static>(self, pusher: P);
107108

108109
/// Captures a stream using Rust's MPSC channels.
109-
fn capture(self) -> ::std::sync::mpsc::Receiver<EventCore<T, D>> {
110+
fn capture(self) -> ::std::sync::mpsc::Receiver<EventCore<G::Timestamp, D>> {
110111
let (send, recv) = ::std::sync::mpsc::channel();
111112
self.capture_into(send);
112113
recv
113114
}
114115
}
115116

116-
impl<S: Scope, D: Container> Capture<S::Timestamp, D> for StreamCore<S, D> {
117-
fn capture_into<P: EventPusherCore<S::Timestamp, D>+'static>(self, mut event_pusher: P) {
117+
impl<G: Scope, D: Container, S: StreamLike<G, D>> Capture<G, D> for S {
118+
fn capture_into<P: EventPusherCore<G::Timestamp, D>+'static>(self, mut event_pusher: P) {
118119

119120
let mut builder = OperatorBuilder::new("Capture".to_owned(), self.scope());
120121
let mut input = PullCounter::new(builder.new_input(self, Pipeline));
@@ -125,7 +126,7 @@ impl<S: Scope, D: Container> Capture<S::Timestamp, D> for StreamCore<S, D> {
125126

126127
if !started {
127128
// discard initial capability.
128-
progress.frontiers[0].update(S::Timestamp::minimum(), -1);
129+
progress.frontiers[0].update(Timestamp::minimum(), -1);
129130
started = true;
130131
}
131132
if !progress.frontiers[0].is_empty() {

timely/src/dataflow/operators/capture/replay.rs

+4-3
Original file line numberDiff line numberDiff line change
@@ -47,25 +47,26 @@ use crate::progress::Timestamp;
4747
use super::EventCore;
4848
use super::event::EventIteratorCore;
4949
use crate::Container;
50+
use crate::dataflow::stream::OwnedStream;
5051

5152
/// Replay a capture stream into a scope with the same timestamp.
5253
pub trait Replay<T: Timestamp, C> : Sized {
5354
/// Replays `self` into the provided scope, as a `Stream<S, D>`.
54-
fn replay_into<S: Scope<Timestamp=T>>(self, scope: &mut S) -> StreamCore<S, C> {
55+
fn replay_into<S: Scope<Timestamp=T>>(self, scope: &mut S) -> OwnedStream<S, C> {
5556
self.replay_core(scope, Some(std::time::Duration::new(0, 0)))
5657
}
5758
/// Replays `self` into the provided scope, as a `Stream<S, D>'.
5859
///
5960
/// The `period` argument allows the specification of a re-activation period, where the operator
6061
/// will re-activate itself every so often. The `None` argument instructs the operator not to
6162
/// re-activate itself.us
62-
fn replay_core<S: Scope<Timestamp=T>>(self, scope: &mut S, period: Option<std::time::Duration>) -> StreamCore<S, C>;
63+
fn replay_core<S: Scope<Timestamp=T>>(self, scope: &mut S, period: Option<std::time::Duration>) -> OwnedStream<S, C>;
6364
}
6465

6566
impl<T: Timestamp, C: Container, I> Replay<T, C> for I
6667
where I : IntoIterator,
6768
<I as IntoIterator>::Item: EventIteratorCore<T, C>+'static {
68-
fn replay_core<S: Scope<Timestamp=T>>(self, scope: &mut S, period: Option<std::time::Duration>) -> StreamCore<S, C>{
69+
fn replay_core<S: Scope<Timestamp=T>>(self, scope: &mut S, period: Option<std::time::Duration>) -> OwnedStream<S, C>{
6970

7071
let mut builder = OperatorBuilder::new("Replay".to_owned(), scope.clone());
7172

0 commit comments

Comments
 (0)