This commit is contained in:
Saeed Maleki 2021-03-23 06:48:26 +00:00
Родитель 62169be637
Коммит 08d3ea9909
9 изменённых файлов: 113 добавлений и 73 удалений

Просмотреть файл

@ -607,66 +607,72 @@ ncclResult_t ncclTopoGetLocalNet(struct ncclTopoSystem* system, int rank, int64_
return ncclSuccess; return ncclSuccess;
} }
ncclResult_t scklGetTopoFromXMLAndSetChannels(struct ncclComm* comm) { ncclResult_t scklGetAlgoFromXMLAndSetComm(struct ncclComm* comm) {
char* str = getenv("SCKL_XML_FILE"); char* str = getenv("SCKL_XML_FILE");
if (str){ if (str){
INFO(NCCL_ENV, "SCKL_XML_FILE set by environment to %s", str); INFO(NCCL_ENV, "SCKL_XML_FILE set by environment to %s", str);
struct ncclXml* xml; struct ncclXml* xml;
NCCLCHECK(ncclCalloc(&xml, 1)); NCCLCHECK(ncclCalloc(&xml, 1));
NCCLCHECK(scklTopoGetXmlGraphFromFile(str, xml)); NCCLCHECK(scklGetXmlAlgoFromFile(str, xml));
int rank = comm->rank; int rank = comm->rank;
for (int c=0; c<comm->nChannels; c++){ struct scklAglorithm* scklAlgo = &comm->scklAlgo;
comm->channels[c].sGraph.nRecvPeers = 0; // zeroing out all entries.
comm->channels[c].sGraph.nSendPeers = 0; memset(scklAlgo, 0, sizeof(struct scklAlgorithm));
}
struct ncclXmlNode* topNode; struct ncclXmlNode* topNode;
NCCLCHECK(xmlFindTag(xml, "system", &topNode)); NCCLCHECK(xmlFindTag(xml, "algo", &topNode));
for (int s=0; s<topNode->nSubs; s++) { for (int s=0; s<topNode->nSubs; s++) {
struct ncclXmlNode* node = topNode->subs[s]; struct ncclXmlNode* node = topNode->subs[s];
if (strcmp(node->name, "gpu") == 0){ if (strcmp(node->name, "gpu") == 0){
int id; int id;
NCCLCHECK(xmlGetAttrInt(node, "id", &id)); NCCLCHECK(xmlGetAttrInt(node, "id", &id));
if (id == rank){ if (id == rank){
for (int p=0; p<node->nSubs; p++) { scklAlgo->nBlocks = 0;
struct ncclXmlNode* typeOfComm = node->subs[p]; for (int t=0; t<node->nSubs; t++) {
if (strcmp(typeOfComm->name, "conn") == 0){ struct ncclXmlNode* threadblockNode = node->subs[t];
if (strcmp(threadblockNode->name, "threadblock") == 0){
int rbid, peer;
const char* type; const char* type;
NCCLCHECK(xmlGetAttrStr(typeOfComm, "type", &type)); NCCLCHECK(xmlGetAttrInt(threadblockNode, "rbid", &rbid));
NCCLCHECK(xmlGetAttrInt(threadblockNode, "peer", &peer));
bool isRecv = false; NCCLCHECK(xmlGetAttrStr(threadblockNode, "type", &type));
bool isSend = false; if (rbid >= SCKL_MAX_NUM_THREAD_BLOCKS){
if (strcmp(type, "recv") == 0){ WARN("Too many thread blocks are requested. Max thread blocks: %d, requested: %d", SCKL_MAX_NUM_THREAD_BLOCKS, rbid+1);
isRecv = true; return ncclInternalError;
} else if (strcmp(type, "send") == 0){
isSend = true;
} }
for (int p=0; p<typeOfComm->nSubs; p++) { if (rbid < 0){
struct ncclXmlNode* peer = typeOfComm->subs[p]; WARN("rbid must be positive. rbid: %d", rbid);
int peerId; return ncclInternalError;
NCCLCHECK(xmlGetAttrInt(peer, "id", &peerId)); }
// SCKL generates the same scklAlgoState for all channels for now. This will change in the future scklAlgo->nBlocks = std::max(comm->scklAlgo.nBlocks, rbid+1);
for (int c=0; c<comm->nChannels; c++){ struct scklThreadBlock* sTB = scklAlgo->scklTB[rbid];
if (isRecv) { sTB->nsteps = 0;
if (comm->channels[c].sGraph.nRecvPeers < SCKL_MAX_NUM_CONN){ sTB->peer = peer;
int index = comm->channels[c].sGraph.nRecvPeers; if (strcmp(type, "send") == 0){
comm->channels[c].sGraph.recv[index] = peerId; sTB->type = SCKL_SEND;
// comm->channels[c].sGraph.recv[index].nChunks = 1; } else if (strcmp(type, "recv") == 0) {
comm->channels[c].sGraph.nRecvPeers++; sTB->type = SCKL_RECV;
} else { } else {
WARN("Too many recv connections for device %d channel %d -- connection to %d is ignored. This may cause deadlock in initialization.", rank, c, peerId); WARN("type of transfer is not supported: %s", type);
} return ncclInternalError;
} else if (isSend){ }
if (comm->channels[c].sGraph.nSendPeers < SCKL_MAX_NUM_CONN){
int index = comm->channels[c].sGraph.nSendPeers; for (int st=0; st<threadblockNode->nSubs; st++) {
comm->channels[c].sGraph.send[index] = peerId; struct ncclXmlNode* stepNode = threadblockNode->subs[st];
// comm->channels[c].sGraph.send[index].nChunks = 1; if (strcmp(stepNode->name, "step") == 0){
comm->channels[c].sGraph.nSendPeers++; int s, chunkId;
} else { NCCLCHECK(xmlGetAttrInt(stepNode, "s", &s));
WARN("Too many recv connections for device %d channel %d -- connection to %d is ignored. This may cause deadlock in initialization.", rank, c, peerId); NCCLCHECK(xmlGetAttrInt(stepNode, "chunkId", &chunkId));
} if (s >= SCKL_MAX_NUM_STEPS){
WARN("Too many steps are requested. Max number of steps: %d, requested: %d", SCKL_MAX_NUM_STEPS, s+1);
return ncclInternalError;
} }
if (s < 0){
WARN("step must be positive: step %d", s);
return ncclInternalError;
}
sTB->nsteps = std::max(sTB->nsteps, s+1);
sTB->transfers[s] = chunkId;
} }
} }
} }

Просмотреть файл

@ -806,43 +806,47 @@ ncclResult_t ncclTopoGetXmlGraphFromFile(const char* xmlGraphFile, struct ncclXm
return ncclSuccess; return ncclSuccess;
} }
ncclResult_t scklTopoXmlPeerLoad(FILE* file, struct ncclXml* xml, struct ncclXmlNode* head) { ncclResult_t scklAlgoXmlStep(FILE* file, struct ncclXml* xml, struct ncclXmlNode* head) {
int id; int s, chunkId;
NCCLCHECK(xmlGetAttrInt(head, "id", &id)); NCCLCHECK(xmlGetAttrInt(head, "s", &s));
NCCLCHECK(xmlGetAttrInt(head, "chunkId", &chunkId));
struct xmlHandler handlers[] = { }; struct xmlHandler handlers[] = { };
NCCLCHECK(xmlLoadSub(file, xml, head, handlers, 1)); NCCLCHECK(xmlLoadSub(file, xml, head, handlers, 1));
return ncclSuccess; return ncclSuccess;
} }
ncclResult_t scklTopoXmlConnLoad(FILE* file, struct ncclXml* xmlGraph, struct ncclXmlNode* head) { ncclResult_t scklAlgoXmlthreadblock(FILE* file, struct ncclXml* xmlGraph, struct ncclXmlNode* head) {
int rbid, peer;
const char* type; const char* type;
NCCLCHECK(xmlGetAttrInt(head, "rbid", &id));
NCCLCHECK(xmlGetAttrInt(head, "peer", &peer));
NCCLCHECK(xmlGetAttrStr(head, "type", &type)); NCCLCHECK(xmlGetAttrStr(head, "type", &type));
struct xmlHandler handlers[] = { { "peer", scklTopoXmlPeerLoad } }; struct xmlHandler handlers[] = { { "step", scklAlgoXmlStep } };
NCCLCHECK(xmlLoadSub(file, xmlGraph, head, handlers, 1)); NCCLCHECK(xmlLoadSub(file, xmlGraph, head, handlers, 1));
return ncclSuccess; return ncclSuccess;
} }
ncclResult_t scklTopoXmlGraphLoad(FILE* file, struct ncclXml* xmlGraph, struct ncclXmlNode* head) { ncclResult_t scklAlgoXmlGpu(FILE* file, struct ncclXml* xmlGraph, struct ncclXmlNode* head) {
int id; int id;
NCCLCHECK(xmlGetAttrInt(head, "id", &id)); NCCLCHECK(xmlGetAttrInt(head, "id", &id));
struct xmlHandler handlers[] = { { "conn", scklTopoXmlConnLoad } }; struct xmlHandler handlers[] = { { "threadblock", scklAlgoXmlthreadblock } };
NCCLCHECK(xmlLoadSub(file, xmlGraph, head, handlers, 1)); NCCLCHECK(xmlLoadSub(file, xmlGraph, head, handlers, 1));
return ncclSuccess; return ncclSuccess;
} }
ncclResult_t scklTopoXmlSystemLoad(FILE* file, struct ncclXml* xmlGraph, struct ncclXmlNode* head) { ncclResult_t scklAlgoXmlLoad(FILE* file, struct ncclXml* xmlGraph, struct ncclXmlNode* head) {
struct xmlHandler handlers[] = { { "gpu", scklTopoXmlGraphLoad } }; struct xmlHandler handlers[] = { { "gpu", scklAlgoXmlGpu } };
NCCLCHECK(xmlLoadSub(file, xmlGraph, head, handlers, 1)); NCCLCHECK(xmlLoadSub(file, xmlGraph, head, handlers, 1));
return ncclSuccess; return ncclSuccess;
} }
ncclResult_t scklTopoGetXmlGraphFromFile(const char* xmlGraphFile, struct ncclXml* xml) { ncclResult_t scklGetXmlAlgoFromFile(const char* xmlGraphFile, struct ncclXml* xml) {
FILE* file = fopen(xmlGraphFile, "r"); FILE* file = fopen(xmlGraphFile, "r");
if (file == NULL) { if (file == NULL) {
WARN("Could not open XML SCKL graph file %s : %s", xmlGraphFile, strerror(errno)); WARN("Could not open XML SCKL graph file %s : %s", xmlGraphFile, strerror(errno));
return ncclSystemError; return ncclSystemError;
} }
struct xmlHandler handlers[] = { { "system", scklTopoXmlSystemLoad } }; struct xmlHandler handlers[] = { { "algo", scklAlgoXmlLoad } };
xml->maxIndex = 0; xml->maxIndex = 0;
NCCLCHECK(xmlLoadSub(file, xml, NULL, handlers, 1)); NCCLCHECK(xmlLoadSub(file, xml, NULL, handlers, 1));
fclose(file); fclose(file);

Просмотреть файл

@ -43,7 +43,7 @@ ncclResult_t ncclTopoGetXmlFromFile(const char* xmlTopoFile, struct ncclXml* xml
ncclResult_t ncclTopoDumpXmlToFile(const char* xmlTopoFile, struct ncclXml* xml); ncclResult_t ncclTopoDumpXmlToFile(const char* xmlTopoFile, struct ncclXml* xml);
#define NCCL_GRAPH_XML_VERSION 1 #define NCCL_GRAPH_XML_VERSION 1
ncclResult_t ncclTopoGetXmlGraphFromFile(const char* xmlGraphFile, struct ncclXml* xml); ncclResult_t ncclTopoGetXmlGraphFromFile(const char* xmlGraphFile, struct ncclXml* xml);
ncclResult_t scklTopoGetXmlGraphFromFile(const char* xmlGraphFile, struct ncclXml* xml); ncclResult_t scklGetXmlAlgoFromFile(const char* xmlGraphFile, struct ncclXml* xml);
/* Auto-detect functions */ /* Auto-detect functions */
ncclResult_t ncclTopoFillGpu(struct ncclXml* xml, const char* busId, struct ncclXmlNode** gpuNode); ncclResult_t ncclTopoFillGpu(struct ncclXml* xml, const char* busId, struct ncclXmlNode** gpuNode);

Просмотреть файл

@ -58,6 +58,7 @@ struct ncclRecvMem {
struct ncclComm { struct ncclComm {
struct ncclChannel channels[MAXCHANNELS]; struct ncclChannel channels[MAXCHANNELS];
struct scklAlgorithm scklAlgo;
struct ncclPeerInfo* peerInfo; struct ncclPeerInfo* peerInfo;
struct ncclTopoSystem* topo; struct ncclTopoSystem* topo;

Просмотреть файл

@ -117,20 +117,25 @@ struct ncclRing {
int* devUserRanks; int* devUserRanks;
}; };
#define SCKL_MAX_NUM_CONN 16 #define SCKL_MAX_NUM_STEPS 16
#define SCKL_MAX_NUM_THREAD_BLOCKS 16
// struct scklConn { #define SCKL_SEND 0
// int peer; #define SCKL_RECV 1
// int nChunks;
// };
struct scklAlgoState { struct scklThreadBlock {
int nRecvPeers; uint8_t peer;
int nSendPeers; uint8_t type; // follow SCKL_SEND and SCKL_RECV macros
int recv[SCKL_MAX_NUM_CONN]; uint8_t nsteps;
int send[SCKL_MAX_NUM_CONN]; // step is used to index into this array. transfers[step] is the chunkId to transfer.
// struct scklConn recv[SCKL_MAX_NUM_CONN]; uint16_t transfers[SCKL_MAX_NUM_STEPS];
// struct scklConn send[SCKL_MAX_NUM_CONN]; };
// gpuId is the one that is in comm->rank
struct scklAlgorithm {
int nBlocks;
// rbid is used as an index into this array
struct scklThreadBlock scklTB[SCKL_MAX_NUM_THREAD_BLOCKS];
}; };
#define NCCL_MAX_TREE_ARITY 3 #define NCCL_MAX_TREE_ARITY 3
@ -193,7 +198,6 @@ struct ncclChannel {
struct ncclRing ring; struct ncclRing ring;
struct ncclTree tree; struct ncclTree tree;
struct ncclTree collTree; struct ncclTree collTree;
struct scklAlgoState sGraph;
int id; int id;

Просмотреть файл

@ -35,8 +35,8 @@ ncclResult_t ncclTopoCheckGdr(struct ncclTopoSystem* topo, int64_t busId, int ne
// Set CPU affinity // Set CPU affinity
ncclResult_t ncclTopoSetAffinity(struct ncclTopoSystem* system, int rank); ncclResult_t ncclTopoSetAffinity(struct ncclTopoSystem* system, int rank);
// SCKL setup peers // SCKL get alirthm from XML file and set the communicator
ncclResult_t scklGetTopoFromXMLAndSetChannels(struct ncclComm* comm); ncclResult_t scklGetAlgoFromXMLAndSetComm(struct ncclComm* comm);
#define NCCL_TOPO_CPU_ARCH_X86 1 #define NCCL_TOPO_CPU_ARCH_X86 1
#define NCCL_TOPO_CPU_ARCH_POWER 2 #define NCCL_TOPO_CPU_ARCH_POWER 2

Просмотреть файл

@ -55,6 +55,7 @@ struct ncclTransport {
}; };
ncclResult_t ncclTransportP2pConnect(struct ncclComm* comm, struct ncclChannel* channel, int nrecv, int* peerRecv, int nsend, int* peerSend); ncclResult_t ncclTransportP2pConnect(struct ncclComm* comm, struct ncclChannel* channel, int nrecv, int* peerRecv, int nsend, int* peerSend);
ncclResult_t scklTransportP2pConnect(struct ncclComm* comm, struct ncclChannel* channel);
ncclResult_t ncclTransportP2pSetup(struct ncclComm* comm, struct ncclTopoGraph* graph); ncclResult_t ncclTransportP2pSetup(struct ncclComm* comm, struct ncclTopoGraph* graph);
#endif #endif

Просмотреть файл

@ -824,16 +824,16 @@ static ncclResult_t initTransportsRank(struct ncclComm* comm, ncclUniqueId* comm
INFO(NCCL_INIT, "Connected all trees"); INFO(NCCL_INIT, "Connected all trees");
// NetSharedBuffers needs to be set for this to work across nodes. // NetSharedBuffers needs to be set for this to work across nodes.
NCCLCHECK(scklGetTopoFromXMLAndSetChannels(comm)); NCCLCHECK(scklGetAlgoFromXMLAndSetComm(comm));
// Connect SCKL graph // Connect SCKL graph
for (int c=0; c<comm->nChannels; c++) { for (int c=0; c<comm->nChannels; c++) {
struct ncclChannel* channel = comm->channels+c; struct ncclChannel* channel = comm->channels+c;
if (comm->nRanks == 1) continue; if (comm->nRanks == 1) continue;
NCCLCHECKGOTO(ncclTransportP2pConnect(comm, channel, channel->sGraph.nRecvPeers, channel->sGraph.recv, channel->sGraph.nSendPeers, channel->sGraph.send), ret, affinity_restore); NCCLCHECKGOTO(scklTransportP2pConnect(comm, channel), ret, affinity_restore);
} }
// It appears that graph is not really needed for P2pSetup. The only place that actually uses it is in ncclTopoGetNetDev which has a bypass for when it is set to NULL. // It appears that graph is not really needed for P2pSetup. The only place that actually uses it is in ncclTopoGetNetDev which has a bypass for when it is set to NULL.
NCCLCHECKGOTO(ncclTransportP2pSetup(comm, NULL), ret, affinity_restore); NCCLCHECKGOTO(ncclTransportP2pSetup(comm, NULL), ret, affinity_restore);
INFO(NCCL_INIT, "Connected SCKL graph"); INFO(NCCL_INIT, "Connected SCKL algorithm");
// Check if we can setup CollNet // Check if we can setup CollNet
if (comm->nNodes > 1 && if (comm->nNodes > 1 &&

Просмотреть файл

@ -51,6 +51,30 @@ ncclResult_t ncclTransportP2pConnect(struct ncclComm* comm, struct ncclChannel*
return ncclSuccess; return ncclSuccess;
} }
// SCKL needs to traverse the algorithm to find the peers
ncclResult_t scklTransportP2pConnect(struct ncclComm* comm, struct ncclChannel* channel) {
uint32_t mask = 1 << channel->id;
struct scklAlgorithm* scklAlgo = &comm->scklAlgo;
int nrecv = 0;
int nsend = 0;
for (int i=0; i<scklAlgo->nBlocks; i++){
int peer = scklAlgo->scklTB[i].peer;
int type = scklAlgo->scklTB[i].type; // 0 for send, 1 for recv
if (peer == -1 || peer >= comm->nRanks || peer == comm->rank) continue;
if (type == SCKL_SEND){
if (channel->peers[peer].send.connected) continue;
comm->connectSend[peer] |= mask;
nsend++;
} else if (type == SCKL_RECV) {
if (channel->peers[peer].recv.connected) continue;
comm->connectRecv[peer] |= mask;
nrecv++;
}
}
TRACE(NCCL_INIT, "sckl nsend %d nrecv %d", nsend, nrecv);
return ncclSuccess;
}
void dumpData(struct ncclConnect* data, int ndata) { void dumpData(struct ncclConnect* data, int ndata) {
for (int n=0; n<ndata; n++) { for (int n=0; n<ndata; n++) {
printf("[%d] ", n); printf("[%d] ", n);