Skip to content

Commit db27dbb

Browse files
lib/libmpi/MpiHost: start remote processes in parallel in MpiHost::startProcesses()
1 parent fabaae0 commit db27dbb

File tree

2 files changed

+114
-56
lines changed

2 files changed

+114
-56
lines changed

lib/libmpi/MpiHost.cpp

+102-54
Original file line numberDiff line numberDiff line change
@@ -108,11 +108,13 @@ MpiHost::Result MpiHost::terminate()
108108
for (Size i = 1; i < m_nodes.count(); i++)
109109
{
110110
static u8 packet[MpiProxy::MaximumPacketSize];
111-
Size packetSize = sizeof(packet);
111+
Size packetSize = sizeof(MpiProxy::Header);
112112

113113
// Send terminate request to the remote node
114114
MpiProxy::Header *hdr = (MpiProxy::Header *) packet;
115115
hdr->operation = MpiProxy::MpiOpTerminate;
116+
hdr->coreId = m_nodes[i]->coreId;
117+
hdr->rankId = i;
116118

117119
// Send the packet
118120
const Result sendResult = sendPacket(i, packet, sizeof(MpiProxy::Header));
@@ -130,7 +132,7 @@ MpiHost::Result MpiHost::terminate()
130132
return recvResult;
131133
}
132134

133-
// The packet must be a data response
135+
// The packet must be a terminate response
134136
const MpiProxy::Header *header = (const MpiProxy::Header *) packet;
135137
if (header->operation != MpiProxy::MpiOpTerminate)
136138
{
@@ -212,6 +214,7 @@ MpiHost::Result MpiHost::send(const void *buf,
212214
MpiProxy::Header *hdr = (MpiProxy::Header *) packet;
213215
hdr->operation = MpiProxy::MpiOpSend;
214216
hdr->result = 0;
217+
hdr->coreId = node->coreId;
215218
hdr->rankId = dest;
216219
hdr->datatype = datatype;
217220
hdr->datacount = count;
@@ -251,6 +254,7 @@ MpiHost::Result MpiHost::receive(void *buf,
251254
// Send receive data request to the remote node
252255
MpiProxy::Header *hdr = (MpiProxy::Header *) packet;
253256
hdr->operation = MpiProxy::MpiOpRecv;
257+
hdr->coreId = node->coreId;
254258
hdr->rankId = source;
255259
hdr->datatype = datatype;
256260
hdr->datacount = count;
@@ -414,7 +418,11 @@ MpiHost::Result MpiHost::parseHostsFile(const char *hostsfile)
414418
MpiHost::Result MpiHost::startProcesses(int argc,
415419
char **argv)
416420
{
421+
const Size NumOfParallelStart = 32;
422+
static u8 packet[MpiProxy::MaximumPacketSize];
423+
MpiProxy::Header *hdr = (MpiProxy::Header *) packet;
417424
String cmdline;
425+
Size startIndex = 1, startCount = 0;
418426

419427
DEBUG("argc = " << argc);
420428

@@ -439,45 +447,57 @@ MpiHost::Result MpiHost::startProcesses(int argc,
439447
// Start remote processes with the constructed command line
440448
NOTICE("cmdline = " << *cmdline);
441449

442-
for (Size i = 1; i < m_nodes.count(); i++)
450+
// Send out packets to all the hosts
451+
while (startIndex < m_nodes.count())
443452
{
444-
in_addr nodeAddr;
445-
nodeAddr.s_addr = m_nodes[i]->ipAddress;
446-
447-
NOTICE("nodes[" << i << "] = " << inet_ntoa(nodeAddr) <<
448-
":" << m_nodes[i]->udpPort << ":" << m_nodes[i]->coreId);
453+
const Size receiveIndex = startIndex;
449454

450-
// Construct packet to send
451-
u8 packet[MpiProxy::MaximumPacketSize];
452-
MpiProxy::Header *hdr = (MpiProxy::Header *) packet;
453-
hdr->operation = MpiProxy::MpiOpExec;
454-
hdr->result = 0;
455-
hdr->rankId = i;
456-
hdr->coreId = m_nodes[i]->coreId;
457-
458-
hdr->coreCount = m_nodes.count();
459-
460-
// Append command-line after the header
461-
MemoryBlock::copy(packet + sizeof(MpiProxy::Header), *cmdline,
462-
sizeof(packet) - sizeof(MpiProxy::Header));
463-
464-
// Send the packet
465-
const Result sendResult = sendPacket(i, packet, sizeof(MpiProxy::Header) + cmdline.length());
466-
if (sendResult != MPI_SUCCESS)
455+
// Limit the number of parallel requests
456+
while (startIndex < m_nodes.count() && startCount < NumOfParallelStart)
467457
{
468-
ERROR("failed to send packet to nodeId " << i << ": result = " << (int) sendResult);
469-
return sendResult;
458+
in_addr nodeAddr;
459+
nodeAddr.s_addr = m_nodes[startIndex]->ipAddress;
460+
461+
NOTICE("nodes[" << startIndex << "] = " << inet_ntoa(nodeAddr) <<
462+
":" << m_nodes[startIndex]->udpPort << ":" << m_nodes[startIndex]->coreId);
463+
464+
// Construct packet to send
465+
hdr->operation = MpiProxy::MpiOpExec;
466+
hdr->result = 0;
467+
hdr->rankId = startIndex;
468+
hdr->coreId = m_nodes[startIndex]->coreId;
469+
hdr->coreCount = m_nodes.count();
470+
471+
// Append command-line after the header
472+
MemoryBlock::copy((char *)packet + sizeof(MpiProxy::Header), *cmdline,
473+
sizeof(packet) - sizeof(MpiProxy::Header));
474+
475+
// Send the packet
476+
const Result sendResult = sendPacket(startIndex, packet, sizeof(MpiProxy::Header) + cmdline.length());
477+
if (sendResult != MPI_SUCCESS)
478+
{
479+
ERROR("failed to send packet to nodeId " << startIndex << ": result = " << (int) sendResult);
480+
return sendResult;
481+
}
482+
startIndex++;
483+
startCount++;
470484
}
471485

472-
// Wait for acknowledge
473-
Size sz;
474-
const Result recvResult = receivePacket(i, MpiProxy::MpiOpExec, packet, sz);
475-
if (recvResult != MPI_SUCCESS)
486+
// Wait for acknowledge of each node
487+
for (Size i = receiveIndex; i < startIndex; i++)
476488
{
477-
ERROR("failed to receive acknowledge for MpiOpExec from nodeId " <<
478-
i << ": result = " << (int) recvResult);
479-
return recvResult;
489+
Size sz;
490+
sz = sizeof(MpiProxy::Header);
491+
492+
const Result recvResult = receivePacket(i, MpiProxy::MpiOpExec, &packet, sz);
493+
if (recvResult != MPI_SUCCESS)
494+
{
495+
ERROR("failed to receive acknowledge for MpiOpExec from nodeId " <<
496+
i << ": result = " << (int) recvResult);
497+
return recvResult;
498+
}
480499
}
500+
startCount = 0;
481501
}
482502

483503
return MPI_SUCCESS;
@@ -529,7 +549,8 @@ MpiHost::Result MpiHost::receivePacket(const Size nodeId,
529549

530550
in_addr nodeAddr;
531551
nodeAddr.s_addr = node->ipAddress;
532-
DEBUG("node = " << inet_ntoa(nodeAddr) << " operation = " << (int) operation);
552+
DEBUG("nodeId = " << nodeId << " addr = " << inet_ntoa(nodeAddr) <<
553+
" operation = " << (int) operation << " size = " << size);
533554

534555
// Process buffered packets first
535556
for (ListIterator<Packet *> i(m_packetBuffers[nodeId]); i.hasCurrent(); i++)
@@ -539,7 +560,6 @@ MpiHost::Result MpiHost::receivePacket(const Size nodeId,
539560

540561
if (hdr->operation == operation)
541562
{
542-
DEBUG("buffered packet: " << pkt->size << " bytes");
543563
MemoryBlock::copy(packet, pkt->data, pkt->size);
544564
size = pkt->size;
545565
delete[] pkt->data;
@@ -555,25 +575,28 @@ MpiHost::Result MpiHost::receivePacket(const Size nodeId,
555575
{
556576
struct sockaddr_in addr;
557577
socklen_t len = sizeof(addr);
578+
const Size recvSize = size;
558579

559580
// Receive UDP datagram
560-
int r = recvfrom(m_sock, packet, size, 0,
581+
int r = recvfrom(m_sock, packet, recvSize, 0,
561582
(struct sockaddr *) &addr, &len);
562583
if (r < 0)
563584
{
564-
ERROR("failed to receive UDP datagram: " << strerror(errno));
585+
ERROR("failed to receive UDP datagram on socket " << m_sock << ": " << strerror(errno));
565586
return MPI_ERR_IO;
566587
}
567588

568-
size = r;
589+
const MpiProxy::Header *hdr = (const MpiProxy::Header *) packet;
590+
569591
DEBUG("received " << r << " bytes from " << inet_ntoa(addr.sin_addr) <<
570-
" at port " << htons(addr.sin_port));
592+
":" << htons(addr.sin_port) << " with coreId = " << hdr->coreId <<
593+
" rankId = " << hdr->rankId);
571594

572595
// Is this packet targeted for the given node?
573-
if (addr.sin_addr.s_addr == node->ipAddress && htons(addr.sin_port) == node->udpPort)
596+
if (addr.sin_addr.s_addr == node->ipAddress &&
597+
htons(addr.sin_port) == node->udpPort &&
598+
hdr->coreId == node->coreId)
574599
{
575-
const MpiProxy::Header *hdr = (const MpiProxy::Header *) packet;
576-
577600
// Verify the MPI operation
578601
if (hdr->operation != operation)
579602
{
@@ -583,27 +606,52 @@ MpiHost::Result MpiHost::receivePacket(const Size nodeId,
583606
return MPI_ERR_IO;
584607
}
585608

609+
DEBUG("done");
610+
size = r;
586611
return MPI_SUCCESS;
587612
}
588613
// Add the packet to internal buffers for later retrieval
589614
else
590615
{
591-
Packet *pkt = new Packet;
592-
if (!pkt)
616+
Size otherNodeId = 0;
617+
618+
// Find the corresponding node
619+
for (Size i = 0; i < m_nodes.count(); i++)
593620
{
594-
ERROR("failed to allocate Packet struct for buffering: " << strerror(errno));
595-
return MPI_ERR_NO_MEM;
621+
if (addr.sin_addr.s_addr == m_nodes[i]->ipAddress &&
622+
htons(addr.sin_port) == m_nodes[i]->udpPort &&
623+
hdr->coreId == m_nodes[i]->coreId)
624+
{
625+
otherNodeId = i;
626+
break;
627+
}
596628
}
597629

598-
pkt->data = new u8[size];
599-
if (!pkt->data)
630+
if (otherNodeId == 0)
600631
{
601-
ERROR("failed to allocate memory for buffered packet: " << strerror(errno));
602-
return MPI_ERR_NO_MEM;
632+
ERROR("nodeId not found for packet from " << inet_ntoa(addr.sin_addr) <<
633+
" at port " << htons(addr.sin_port));
634+
}
635+
else
636+
{
637+
Packet *pkt = new Packet;
638+
if (!pkt)
639+
{
640+
ERROR("failed to allocate Packet struct for buffering: " << strerror(errno));
641+
return MPI_ERR_NO_MEM;
642+
}
643+
644+
pkt->data = new u8[r];
645+
if (!pkt->data)
646+
{
647+
ERROR("failed to allocate memory for buffered packet: " << strerror(errno));
648+
return MPI_ERR_NO_MEM;
649+
}
650+
651+
MemoryBlock::copy(pkt->data, hdr, r);
652+
pkt->size = r;
653+
m_packetBuffers[otherNodeId]->append(pkt);
603654
}
604-
605-
pkt->size = size;
606-
m_packetBuffers[nodeId]->append(pkt);
607655
}
608656
}
609657

server/mpiproxy/MpiProxy.cpp

+12-2
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,10 @@
3535
#include <mpi.h>
3636
#include "MpiProxy.h"
3737

38+
#pragma clang optimize off
39+
#pragma GCC push_options
40+
#pragma GCC optimize ("O0")
41+
3842
MpiProxy::MpiProxy(int argc, char **argv)
3943
: POSIXApplication(argc, argv)
4044
, m_sock(-1)
@@ -320,6 +324,8 @@ MpiProxy::Result MpiProxy::processRecv(const Header *header,
320324
// Prepare header
321325
hdr->operation = MpiOpRecv;
322326
hdr->result = MPI_SUCCESS;
327+
hdr->coreId = header->coreId;
328+
hdr->rankId = header->rankId;
323329
hdr->datatype = header->datatype;
324330
hdr->datacount = 0;
325331

@@ -412,7 +418,9 @@ MpiProxy::Result MpiProxy::processExec(const Header *header,
412418
Header *hdr = (Header *) pkt;
413419
hdr->operation = MpiOpExec;
414420
hdr->result = result == Success ? MPI_SUCCESS : MPI_ERR_IO;
415-
Size pktSize = sizeof(hdr);
421+
hdr->coreId = header->coreId;
422+
hdr->rankId = header->rankId;
423+
Size pktSize = sizeof(*hdr);
416424

417425
const Result sendResult = udpSend(pkt, pktSize, addr);
418426
if (sendResult != Success)
@@ -433,7 +441,7 @@ MpiProxy::Result MpiProxy::processTerminate(const Header *header,
433441
Header *hdr = (Header *) pkt;
434442
Size pktSize = sizeof(Header);
435443

436-
NOTICE("");
444+
NOTICE("size = " << size);
437445

438446
// Loop PIDs of active processes and wait for each to terminate
439447
for (Size i = 0; i < m_pids.size(); i++)
@@ -471,6 +479,8 @@ MpiProxy::Result MpiProxy::processTerminate(const Header *header,
471479
// Prepare header for response
472480
hdr->operation = MpiOpTerminate;
473481
hdr->result = MPI_SUCCESS;
482+
hdr->rankId = header->rankId;
483+
hdr->coreId = header->coreId;
474484

475485
// UDP send
476486
const Result sendResult = udpSend(pkt, pktSize, addr);

0 commit comments

Comments
 (0)