Skip to content

Commit 5390b43

Browse files
authored
Merge pull request #180 from madsbk/tiny_bindings
New python bindings - there is still a problem with TCP and multiple DASK processes.
2 parents c93687a + 31e2990 commit 5390b43

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

58 files changed

+1808
-3113
lines changed

.flake8

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
[flake8]
2+
filename = *.pyx,*.px*
3+
exclude = .eggs,*.egg,build
4+
ignore = E901,E225,E226,E227,E999
5+
max-line-length = 88

Makefile

+6-5
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,18 @@
11
CC = gcc
22
UCX_PATH ?= "$(abspath $(shell pwd))/../ucx/install"
33
CUDA_PATH ?= "/usr/local/cuda"
4+
PYTHON ?= python3
45

56
CFLAGS = "-I$(UCX_PATH)/include -I$(CUDA_PATH)/include"
67
LDFLAGS = "-L$(UCX_PATH)/lib -L$(CUDA_PATH)/lib64"
78

89
install:
9-
LDFLAGS=$(LDFLAGS) CFLAGS=$(CFLAGS) python3 setup.py build_ext -i --with-cuda
10-
python3 -m pip install -e .
10+
LDFLAGS=$(LDFLAGS) CFLAGS=$(CFLAGS) $(PYTHON) setup.py build_ext -i --with-cuda
11+
$(PYTHON) -m pip install -e .
1112

1213
install-cpu:
13-
LDFLAGS=$(LDFLAGS) CFLAGS=$(CFLAGS) python3 setup.py build_ext -i
14-
python3 -m pip install -e .
14+
LDFLAGS=$(LDFLAGS) CFLAGS=$(CFLAGS) $(PYTHON) setup.py build_ext -i
15+
$(PYTHON) -m pip install -e .
1516

1617
conda-install:
1718
LDFLAGS=$(LDFLAGS) CFLAGS=$(CFLAGS) $(PYTHON) setup.py build_ext -i --with-cuda install
@@ -27,7 +28,7 @@ clean:
2728
rm -rf *.egg-info
2829

2930
test:
30-
python3 -m pytest tests
31+
$(PYTHON) -m pytest tests
3132

3233
conda-packages:
3334
conda build --numpy=1.14 --python=3.7 ucx

README.md

+20
Original file line numberDiff line numberDiff line change
@@ -78,3 +78,23 @@ These three libraries provide a powerful combination of HPC message passing tool
7878
You should be done! Test the result of your build with
7979

8080
pytest -v
81+
82+
### TCP Support
83+
84+
In order to use TCP add `tcp` to `UCX_TLS` and set `UCXPY_IFNAME` to the network interface you want to use. Some setup examples:
85+
86+
# TCP using "eth0" and CUDA support
87+
export UCX_TLS=tcp,sockcm,cuda_copy,cuda_ipc
88+
export UCX_SOCKADDR_TLS_PRIORITY=sockcm
89+
export UCXPY_IFNAME="eth0"
90+
91+
# InfiniBand using "ib0" and CUDA support
92+
export UCX_TLS=sockcm,cuda_copy,cuda_ipc
93+
export UCX_SOCKADDR_TLS_PRIORITY=sockcm
94+
export UCXPY_IFNAME="ib0"
95+
96+
# TCP using "eno0" and no CUDA support
97+
export UCX_TLS=tcp,sockcm
98+
export UCX_SOCKADDR_TLS_PRIORITY=sockcm
99+
export UCXPY_IFNAME="eno0"
100+

benchmarks/bibw-asyncio.py

+16-11
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
max_iters = 256
1717
window_size = 64
1818

19+
1920
async def talk_to_client(client_ep):
2021

2122
global args
@@ -27,7 +28,7 @@ async def talk_to_client(client_ep):
2728
send_buffer_region = ucp.buffer_region()
2829
recv_buffer_region = ucp.buffer_region()
2930

30-
if args.mem_type == 'cuda':
31+
if args.mem_type == "cuda":
3132
send_buffer_region.alloc_cuda(1 << msg_log)
3233
recv_buffer_region.alloc_cuda(1 << msg_log)
3334
else:
@@ -65,10 +66,10 @@ async def talk_to_client(client_ep):
6566
end = time.time()
6667
lat = end - start
6768
bw = (iters * window_size * msg_len * 2) / lat
68-
bw = bw / 1e9 #GB/s
69+
bw = bw / 1e9 # GB/s
6970
print("{}\t\t{}".format(msg_len, bw))
7071

71-
if args.mem_type == 'cuda':
72+
if args.mem_type == "cuda":
7273
send_buffer_region.free_cuda()
7374
recv_buffer_region.free_cuda()
7475
else:
@@ -78,6 +79,7 @@ async def talk_to_client(client_ep):
7879
ucp.destroy_ep(client_ep)
7980
ucp.stop_server()
8081

82+
8183
async def talk_to_server(ip, port):
8284

8385
global args
@@ -91,7 +93,7 @@ async def talk_to_server(ip, port):
9193
send_buffer_region = ucp.buffer_region()
9294
recv_buffer_region = ucp.buffer_region()
9395

94-
if args.mem_type == 'cuda':
96+
if args.mem_type == "cuda":
9597
send_buffer_region.alloc_cuda(1 << msg_log)
9698
recv_buffer_region.alloc_cuda(1 << msg_log)
9799
else:
@@ -126,9 +128,9 @@ async def talk_to_server(ip, port):
126128
await asyncio.wait(pending_list)
127129
end = time.time()
128130
lat = end - start
129-
lat = ((lat/2) / iters)* 1000000
131+
lat = ((lat / 2) / iters) * 1000000
130132

131-
if args.mem_type == 'cuda':
133+
if args.mem_type == "cuda":
132134
send_buffer_region.free_cuda()
133135
recv_buffer_region.free_cuda()
134136
else:
@@ -137,11 +139,14 @@ async def talk_to_server(ip, port):
137139

138140
ucp.destroy_ep(server_ep)
139141

142+
140143
parser = argparse.ArgumentParser()
141-
parser.add_argument('-s','--server', help='enter server ip', required=False)
142-
parser.add_argument('-p','--port', help='enter server port number', required=False)
143-
parser.add_argument('-i','--intra_node', action='store_true')
144-
parser.add_argument('-m','--mem_type', help='host/cuda (default = host)', required=False)
144+
parser.add_argument("-s", "--server", help="enter server ip", required=False)
145+
parser.add_argument("-p", "--port", help="enter server port number", required=False)
146+
parser.add_argument("-i", "--intra_node", action="store_true")
147+
parser.add_argument(
148+
"-m", "--mem_type", help="host/cuda (default = host)", required=False
149+
)
145150
args = parser.parse_args()
146151

147152
## initiate ucp
@@ -159,7 +164,7 @@ async def talk_to_server(ip, port):
159164
if server:
160165
if args.intra_node:
161166
ucp.set_cuda_dev(1)
162-
coro = ucp.start_server(talk_to_client, is_coroutine = True)
167+
coro = ucp.start_server(talk_to_client, is_coroutine=True)
163168
else:
164169
coro = talk_to_server(init_str.encode(), int(args.port))
165170

benchmarks/bibw-future.py

+16-11
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
max_iters = 256
1313
window_size = 64
1414

15+
1516
def talk_to_client(client_ep):
1617

1718
global args
@@ -24,7 +25,7 @@ def talk_to_client(client_ep):
2425
send_buffer_region = ucp.buffer_region()
2526
recv_buffer_region = ucp.buffer_region()
2627

27-
if args.mem_type == 'cuda':
28+
if args.mem_type == "cuda":
2829
send_buffer_region.alloc_cuda(1 << msg_log)
2930
recv_buffer_region.alloc_cuda(1 << msg_log)
3031
else:
@@ -68,10 +69,10 @@ def talk_to_client(client_ep):
6869
end = time.time()
6970
lat = end - start
7071
bw = (iters * window_size * msg_len * 2) / lat
71-
bw = bw / 1e9 #GB/s
72+
bw = bw / 1e9 # GB/s
7273
print("{}\t\t{}".format(msg_len, bw))
7374

74-
if args.mem_type == 'cuda':
75+
if args.mem_type == "cuda":
7576
send_buffer_region.free_cuda()
7677
recv_buffer_region.free_cuda()
7778
else:
@@ -82,6 +83,7 @@ def talk_to_client(client_ep):
8283
cb_not_done = False
8384
ucp.stop_server()
8485

86+
8587
def talk_to_server(ip, port):
8688

8789
global args
@@ -95,7 +97,7 @@ def talk_to_server(ip, port):
9597
send_buffer_region = ucp.buffer_region()
9698
recv_buffer_region = ucp.buffer_region()
9799

98-
if args.mem_type == 'cuda':
100+
if args.mem_type == "cuda":
99101
send_buffer_region.alloc_cuda(1 << msg_log)
100102
recv_buffer_region.alloc_cuda(1 << msg_log)
101103
else:
@@ -136,9 +138,9 @@ def talk_to_server(ip, port):
136138
pending_list.remove(ft)
137139
end = time.time()
138140
lat = end - start
139-
lat = ((lat/2) / iters)* 1000000
141+
lat = ((lat / 2) / iters) * 1000000
140142

141-
if args.mem_type == 'cuda':
143+
if args.mem_type == "cuda":
142144
send_buffer_region.free_cuda()
143145
recv_buffer_region.free_cuda()
144146
else:
@@ -147,11 +149,14 @@ def talk_to_server(ip, port):
147149

148150
ucp.destroy_ep(server_ep)
149151

152+
150153
parser = argparse.ArgumentParser()
151-
parser.add_argument('-s','--server', help='enter server ip', required=False)
152-
parser.add_argument('-p','--port', help='enter server port number', required=False)
153-
parser.add_argument('-i','--intra_node', action='store_true')
154-
parser.add_argument('-m','--mem_type', help='host/cuda (default = host)', required=False)
154+
parser.add_argument("-s", "--server", help="enter server ip", required=False)
155+
parser.add_argument("-p", "--port", help="enter server port number", required=False)
156+
parser.add_argument("-i", "--intra_node", action="store_true")
157+
parser.add_argument(
158+
"-m", "--mem_type", help="host/cuda (default = host)", required=False
159+
)
155160
args = parser.parse_args()
156161

157162
## initiate ucp
@@ -168,7 +173,7 @@ def talk_to_server(ip, port):
168173
if server:
169174
if args.intra_node:
170175
ucp.set_cuda_dev(1)
171-
ucp.start_server(talk_to_client, is_coroutine = False)
176+
ucp.start_server(talk_to_client, is_coroutine=False)
172177
while cb_not_done:
173178
ucp.progress()
174179
else:

benchmarks/lat-asyncio-blind-recv.py

+16-11
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
max_msg_log = 23
1313
max_iters = 1000
1414

15+
1516
async def talk_to_client(client_ep):
1617

1718
global args
@@ -21,7 +22,7 @@ async def talk_to_client(client_ep):
2122

2223
send_buffer_region = ucp.buffer_region()
2324

24-
if args.mem_type == 'cuda':
25+
if args.mem_type == "cuda":
2526
send_buffer_region.alloc_cuda(1 << msg_log)
2627
else:
2728
send_buffer_region.alloc_host(1 << msg_log)
@@ -46,17 +47,18 @@ async def talk_to_client(client_ep):
4647
await recv_req
4748
end = time.time()
4849
lat = end - start
49-
lat = ((lat/2) / iters)* 1000000
50+
lat = ((lat / 2) / iters) * 1000000
5051
print("{}\t\t{}".format(msg_len, lat))
5152

52-
if args.mem_type == 'cuda':
53+
if args.mem_type == "cuda":
5354
send_buffer_region.free_cuda()
5455
else:
5556
send_buffer_region.free_host()
5657

5758
ucp.destroy_ep(client_ep)
5859
ucp.stop_server()
5960

61+
6062
async def talk_to_server(ip, port):
6163

6264
global args
@@ -68,7 +70,7 @@ async def talk_to_server(ip, port):
6870

6971
send_buffer_region = ucp.buffer_region()
7072

71-
if args.mem_type == 'cuda':
73+
if args.mem_type == "cuda":
7274
send_buffer_region.alloc_cuda(1 << msg_log)
7375
else:
7476
send_buffer_region.alloc_host(1 << msg_log)
@@ -91,20 +93,23 @@ async def talk_to_server(ip, port):
9193
send_req = await server_ep.send(send_msg, msg_len)
9294
end = time.time()
9395
lat = end - start
94-
lat = ((lat/2) / iters)* 1000000
96+
lat = ((lat / 2) / iters) * 1000000
9597

96-
if args.mem_type == 'cuda':
98+
if args.mem_type == "cuda":
9799
send_buffer_region.free_cuda()
98100
else:
99101
send_buffer_region.free_host()
100102

101103
ucp.destroy_ep(server_ep)
102104

105+
103106
parser = argparse.ArgumentParser()
104-
parser.add_argument('-s','--server', help='enter server ip', required=False)
105-
parser.add_argument('-p','--port', help='enter server port number', required=False)
106-
parser.add_argument('-i','--intra_node', action='store_true')
107-
parser.add_argument('-m','--mem_type', help='host/cuda (default = host)', required=False)
107+
parser.add_argument("-s", "--server", help="enter server ip", required=False)
108+
parser.add_argument("-p", "--port", help="enter server port number", required=False)
109+
parser.add_argument("-i", "--intra_node", action="store_true")
110+
parser.add_argument(
111+
"-m", "--mem_type", help="host/cuda (default = host)", required=False
112+
)
108113
args = parser.parse_args()
109114

110115
## initiate ucp
@@ -122,7 +127,7 @@ async def talk_to_server(ip, port):
122127
if server:
123128
if args.intra_node:
124129
ucp.set_cuda_dev(1)
125-
coro = ucp.start_server(talk_to_client, is_coroutine = True)
130+
coro = ucp.start_server(talk_to_client, is_coroutine=True)
126131
else:
127132
coro = talk_to_server(init_str.encode(), int(args.port))
128133

0 commit comments

Comments
 (0)