added some guards for corner cases

This commit is contained in:
Saeed Maleki 2021-03-17 19:37:57 +00:00
Родитель cae8c88a87
Коммит ee8e4c9d12
2 изменённых файлов: 11 добавлений и 3 удалений

Просмотреть файл

@ -24,7 +24,7 @@ class ncclFunction<ncclFuncAllToAll, ALGO, PROTO, FUNC, T, UNROLL> {
const int nranks = comm->nRanks;
const ssize_t loopSize = nChannels*(ssize_t)chunkSize;
const ssize_t size = args->coll.count;
return;
// Compute pointers
const T * __restrict__ thisInput = (const T*)args->sendbuff;
T * __restrict__ thisOutput = (T*)args->recvbuff;

Просмотреть файл

@ -649,9 +649,17 @@ ncclResult_t scklGetTopoFromXMLAndSetChannels(struct ncclComm* comm) {
// SCKL generates the same scklGraph for all channels for now. This will change in the future
for (int c=0; c<comm->nChannels; c++){
if (isRecv) {
comm->channels[c].sGraph.recv[comm->channels[c].sGraph.nRecvPeers++] = peerId;
if (comm->channels[c].sGraph.nRecvPeers < SCKL_MAX_NUM_CONN){
comm->channels[c].sGraph.recv[comm->channels[c].sGraph.nRecvPeers++] = peerId;
} else {
WARN("Too many recv connections for device %d channel %d -- connection to %d is ignored. This may cause deadlock in initialization.", rank, c, peerId);
}
} else if (isSend){
comm->channels[c].sGraph.send[comm->channels[c].sGraph.nSendPeers++] = peerId;
if (comm->channels[c].sGraph.nSendPeers < SCKL_MAX_NUM_CONN){
comm->channels[c].sGraph.send[comm->channels[c].sGraph.nSendPeers++] = peerId;
} else {
WARN("Too many recv connections for device %d channel %d -- connection to %d is ignored. This may cause deadlock in initialization.", rank, c, peerId);
}
}
}
}