зеркало из https://github.com/microsoft/msccl.git
not compilable
This commit is contained in:
Родитель
62169be637
Коммит
08d3ea9909
|
@ -607,66 +607,72 @@ ncclResult_t ncclTopoGetLocalNet(struct ncclTopoSystem* system, int rank, int64_
|
|||
return ncclSuccess;
|
||||
}
|
||||
|
||||
ncclResult_t scklGetTopoFromXMLAndSetChannels(struct ncclComm* comm) {
|
||||
ncclResult_t scklGetAlgoFromXMLAndSetComm(struct ncclComm* comm) {
|
||||
char* str = getenv("SCKL_XML_FILE");
|
||||
if (str){
|
||||
INFO(NCCL_ENV, "SCKL_XML_FILE set by environment to %s", str);
|
||||
struct ncclXml* xml;
|
||||
NCCLCHECK(ncclCalloc(&xml, 1));
|
||||
NCCLCHECK(scklTopoGetXmlGraphFromFile(str, xml));
|
||||
NCCLCHECK(scklGetXmlAlgoFromFile(str, xml));
|
||||
int rank = comm->rank;
|
||||
|
||||
for (int c=0; c<comm->nChannels; c++){
|
||||
comm->channels[c].sGraph.nRecvPeers = 0;
|
||||
comm->channels[c].sGraph.nSendPeers = 0;
|
||||
}
|
||||
|
||||
struct scklAglorithm* scklAlgo = &comm->scklAlgo;
|
||||
// zeroing out all entries.
|
||||
memset(scklAlgo, 0, sizeof(struct scklAlgorithm));
|
||||
struct ncclXmlNode* topNode;
|
||||
NCCLCHECK(xmlFindTag(xml, "system", &topNode));
|
||||
NCCLCHECK(xmlFindTag(xml, "algo", &topNode));
|
||||
for (int s=0; s<topNode->nSubs; s++) {
|
||||
struct ncclXmlNode* node = topNode->subs[s];
|
||||
if (strcmp(node->name, "gpu") == 0){
|
||||
int id;
|
||||
NCCLCHECK(xmlGetAttrInt(node, "id", &id));
|
||||
if (id == rank){
|
||||
for (int p=0; p<node->nSubs; p++) {
|
||||
struct ncclXmlNode* typeOfComm = node->subs[p];
|
||||
if (strcmp(typeOfComm->name, "conn") == 0){
|
||||
scklAlgo->nBlocks = 0;
|
||||
for (int t=0; t<node->nSubs; t++) {
|
||||
struct ncclXmlNode* threadblockNode = node->subs[t];
|
||||
if (strcmp(threadblockNode->name, "threadblock") == 0){
|
||||
int rbid, peer;
|
||||
const char* type;
|
||||
NCCLCHECK(xmlGetAttrStr(typeOfComm, "type", &type));
|
||||
NCCLCHECK(xmlGetAttrInt(threadblockNode, "rbid", &rbid));
|
||||
NCCLCHECK(xmlGetAttrInt(threadblockNode, "peer", &peer));
|
||||
NCCLCHECK(xmlGetAttrStr(threadblockNode, "type", &type));
|
||||
if (rbid >= SCKL_MAX_NUM_THREAD_BLOCKS){
|
||||
WARN("Too many thread blocks are requested. Max thread blocks: %d, requested: %d", SCKL_MAX_NUM_THREAD_BLOCKS, rbid+1);
|
||||
return ncclInternalError;
|
||||
}
|
||||
if (rbid < 0){
|
||||
WARN("rbid must be positive. rbid: %d", rbid);
|
||||
return ncclInternalError;
|
||||
}
|
||||
scklAlgo->nBlocks = std::max(comm->scklAlgo.nBlocks, rbid+1);
|
||||
struct scklThreadBlock* sTB = scklAlgo->scklTB[rbid];
|
||||
sTB->nsteps = 0;
|
||||
sTB->peer = peer;
|
||||
if (strcmp(type, "send") == 0){
|
||||
sTB->type = SCKL_SEND;
|
||||
} else if (strcmp(type, "recv") == 0) {
|
||||
sTB->type = SCKL_RECV;
|
||||
} else {
|
||||
WARN("type of transfer is not supported: %s", type);
|
||||
return ncclInternalError;
|
||||
}
|
||||
|
||||
bool isRecv = false;
|
||||
bool isSend = false;
|
||||
if (strcmp(type, "recv") == 0){
|
||||
isRecv = true;
|
||||
} else if (strcmp(type, "send") == 0){
|
||||
isSend = true;
|
||||
}
|
||||
for (int p=0; p<typeOfComm->nSubs; p++) {
|
||||
struct ncclXmlNode* peer = typeOfComm->subs[p];
|
||||
int peerId;
|
||||
NCCLCHECK(xmlGetAttrInt(peer, "id", &peerId));
|
||||
// SCKL generates the same scklAlgoState for all channels for now. This will change in the future
|
||||
for (int c=0; c<comm->nChannels; c++){
|
||||
if (isRecv) {
|
||||
if (comm->channels[c].sGraph.nRecvPeers < SCKL_MAX_NUM_CONN){
|
||||
int index = comm->channels[c].sGraph.nRecvPeers;
|
||||
comm->channels[c].sGraph.recv[index] = peerId;
|
||||
// comm->channels[c].sGraph.recv[index].nChunks = 1;
|
||||
comm->channels[c].sGraph.nRecvPeers++;
|
||||
} else {
|
||||
WARN("Too many recv connections for device %d channel %d -- connection to %d is ignored. This may cause deadlock in initialization.", rank, c, peerId);
|
||||
}
|
||||
} else if (isSend){
|
||||
if (comm->channels[c].sGraph.nSendPeers < SCKL_MAX_NUM_CONN){
|
||||
int index = comm->channels[c].sGraph.nSendPeers;
|
||||
comm->channels[c].sGraph.send[index] = peerId;
|
||||
// comm->channels[c].sGraph.send[index].nChunks = 1;
|
||||
comm->channels[c].sGraph.nSendPeers++;
|
||||
} else {
|
||||
WARN("Too many recv connections for device %d channel %d -- connection to %d is ignored. This may cause deadlock in initialization.", rank, c, peerId);
|
||||
for (int st=0; st<threadblockNode->nSubs; st++) {
|
||||
struct ncclXmlNode* stepNode = threadblockNode->subs[st];
|
||||
if (strcmp(stepNode->name, "step") == 0){
|
||||
int s, chunkId;
|
||||
NCCLCHECK(xmlGetAttrInt(stepNode, "s", &s));
|
||||
NCCLCHECK(xmlGetAttrInt(stepNode, "chunkId", &chunkId));
|
||||
if (s >= SCKL_MAX_NUM_STEPS){
|
||||
WARN("Too many steps are requested. Max number of steps: %d, requested: %d", SCKL_MAX_NUM_STEPS, s+1);
|
||||
return ncclInternalError;
|
||||
}
|
||||
if (s < 0){
|
||||
WARN("step must be positive: step %d", s);
|
||||
return ncclInternalError;
|
||||
}
|
||||
sTB->nsteps = std::max(sTB->nsteps, s+1);
|
||||
sTB->transfers[s] = chunkId;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -806,43 +806,47 @@ ncclResult_t ncclTopoGetXmlGraphFromFile(const char* xmlGraphFile, struct ncclXm
|
|||
return ncclSuccess;
|
||||
}
|
||||
|
||||
ncclResult_t scklTopoXmlPeerLoad(FILE* file, struct ncclXml* xml, struct ncclXmlNode* head) {
|
||||
int id;
|
||||
NCCLCHECK(xmlGetAttrInt(head, "id", &id));
|
||||
ncclResult_t scklAlgoXmlStep(FILE* file, struct ncclXml* xml, struct ncclXmlNode* head) {
|
||||
int s, chunkId;
|
||||
NCCLCHECK(xmlGetAttrInt(head, "s", &s));
|
||||
NCCLCHECK(xmlGetAttrInt(head, "chunkId", &chunkId));
|
||||
struct xmlHandler handlers[] = { };
|
||||
NCCLCHECK(xmlLoadSub(file, xml, head, handlers, 1));
|
||||
return ncclSuccess;
|
||||
}
|
||||
|
||||
ncclResult_t scklTopoXmlConnLoad(FILE* file, struct ncclXml* xmlGraph, struct ncclXmlNode* head) {
|
||||
ncclResult_t scklAlgoXmlthreadblock(FILE* file, struct ncclXml* xmlGraph, struct ncclXmlNode* head) {
|
||||
int rbid, peer;
|
||||
const char* type;
|
||||
NCCLCHECK(xmlGetAttrInt(head, "rbid", &id));
|
||||
NCCLCHECK(xmlGetAttrInt(head, "peer", &peer));
|
||||
NCCLCHECK(xmlGetAttrStr(head, "type", &type));
|
||||
struct xmlHandler handlers[] = { { "peer", scklTopoXmlPeerLoad } };
|
||||
struct xmlHandler handlers[] = { { "step", scklAlgoXmlStep } };
|
||||
NCCLCHECK(xmlLoadSub(file, xmlGraph, head, handlers, 1));
|
||||
return ncclSuccess;
|
||||
}
|
||||
|
||||
ncclResult_t scklTopoXmlGraphLoad(FILE* file, struct ncclXml* xmlGraph, struct ncclXmlNode* head) {
|
||||
ncclResult_t scklAlgoXmlGpu(FILE* file, struct ncclXml* xmlGraph, struct ncclXmlNode* head) {
|
||||
int id;
|
||||
NCCLCHECK(xmlGetAttrInt(head, "id", &id));
|
||||
struct xmlHandler handlers[] = { { "conn", scklTopoXmlConnLoad } };
|
||||
struct xmlHandler handlers[] = { { "threadblock", scklAlgoXmlthreadblock } };
|
||||
NCCLCHECK(xmlLoadSub(file, xmlGraph, head, handlers, 1));
|
||||
return ncclSuccess;
|
||||
}
|
||||
|
||||
ncclResult_t scklTopoXmlSystemLoad(FILE* file, struct ncclXml* xmlGraph, struct ncclXmlNode* head) {
|
||||
struct xmlHandler handlers[] = { { "gpu", scklTopoXmlGraphLoad } };
|
||||
ncclResult_t scklAlgoXmlLoad(FILE* file, struct ncclXml* xmlGraph, struct ncclXmlNode* head) {
|
||||
struct xmlHandler handlers[] = { { "gpu", scklAlgoXmlGpu } };
|
||||
NCCLCHECK(xmlLoadSub(file, xmlGraph, head, handlers, 1));
|
||||
return ncclSuccess;
|
||||
}
|
||||
|
||||
ncclResult_t scklTopoGetXmlGraphFromFile(const char* xmlGraphFile, struct ncclXml* xml) {
|
||||
ncclResult_t scklGetXmlAlgoFromFile(const char* xmlGraphFile, struct ncclXml* xml) {
|
||||
FILE* file = fopen(xmlGraphFile, "r");
|
||||
if (file == NULL) {
|
||||
WARN("Could not open XML SCKL graph file %s : %s", xmlGraphFile, strerror(errno));
|
||||
return ncclSystemError;
|
||||
}
|
||||
struct xmlHandler handlers[] = { { "system", scklTopoXmlSystemLoad } };
|
||||
struct xmlHandler handlers[] = { { "algo", scklAlgoXmlLoad } };
|
||||
xml->maxIndex = 0;
|
||||
NCCLCHECK(xmlLoadSub(file, xml, NULL, handlers, 1));
|
||||
fclose(file);
|
||||
|
|
|
@ -43,7 +43,7 @@ ncclResult_t ncclTopoGetXmlFromFile(const char* xmlTopoFile, struct ncclXml* xml
|
|||
ncclResult_t ncclTopoDumpXmlToFile(const char* xmlTopoFile, struct ncclXml* xml);
|
||||
#define NCCL_GRAPH_XML_VERSION 1
|
||||
ncclResult_t ncclTopoGetXmlGraphFromFile(const char* xmlGraphFile, struct ncclXml* xml);
|
||||
ncclResult_t scklTopoGetXmlGraphFromFile(const char* xmlGraphFile, struct ncclXml* xml);
|
||||
ncclResult_t scklGetXmlAlgoFromFile(const char* xmlGraphFile, struct ncclXml* xml);
|
||||
|
||||
/* Auto-detect functions */
|
||||
ncclResult_t ncclTopoFillGpu(struct ncclXml* xml, const char* busId, struct ncclXmlNode** gpuNode);
|
||||
|
|
|
@ -58,6 +58,7 @@ struct ncclRecvMem {
|
|||
|
||||
struct ncclComm {
|
||||
struct ncclChannel channels[MAXCHANNELS];
|
||||
struct scklAlgorithm scklAlgo;
|
||||
|
||||
struct ncclPeerInfo* peerInfo;
|
||||
struct ncclTopoSystem* topo;
|
||||
|
|
|
@ -117,20 +117,25 @@ struct ncclRing {
|
|||
int* devUserRanks;
|
||||
};
|
||||
|
||||
#define SCKL_MAX_NUM_CONN 16
|
||||
#define SCKL_MAX_NUM_STEPS 16
|
||||
#define SCKL_MAX_NUM_THREAD_BLOCKS 16
|
||||
|
||||
// struct scklConn {
|
||||
// int peer;
|
||||
// int nChunks;
|
||||
// };
|
||||
#define SCKL_SEND 0
|
||||
#define SCKL_RECV 1
|
||||
|
||||
struct scklAlgoState {
|
||||
int nRecvPeers;
|
||||
int nSendPeers;
|
||||
int recv[SCKL_MAX_NUM_CONN];
|
||||
int send[SCKL_MAX_NUM_CONN];
|
||||
// struct scklConn recv[SCKL_MAX_NUM_CONN];
|
||||
// struct scklConn send[SCKL_MAX_NUM_CONN];
|
||||
struct scklThreadBlock {
|
||||
uint8_t peer;
|
||||
uint8_t type; // follow SCKL_SEND and SCKL_RECV macros
|
||||
uint8_t nsteps;
|
||||
// step is used to index into this array. transfers[step] is the chunkId to transfer.
|
||||
uint16_t transfers[SCKL_MAX_NUM_STEPS];
|
||||
};
|
||||
|
||||
// gpuId is the one that is in comm->rank
|
||||
struct scklAlgorithm {
|
||||
int nBlocks;
|
||||
// rbid is used as an index into this array
|
||||
struct scklThreadBlock scklTB[SCKL_MAX_NUM_THREAD_BLOCKS];
|
||||
};
|
||||
|
||||
#define NCCL_MAX_TREE_ARITY 3
|
||||
|
@ -193,7 +198,6 @@ struct ncclChannel {
|
|||
struct ncclRing ring;
|
||||
struct ncclTree tree;
|
||||
struct ncclTree collTree;
|
||||
struct scklAlgoState sGraph;
|
||||
|
||||
int id;
|
||||
|
||||
|
|
|
@ -35,8 +35,8 @@ ncclResult_t ncclTopoCheckGdr(struct ncclTopoSystem* topo, int64_t busId, int ne
|
|||
// Set CPU affinity
|
||||
ncclResult_t ncclTopoSetAffinity(struct ncclTopoSystem* system, int rank);
|
||||
|
||||
// SCKL setup peers
|
||||
ncclResult_t scklGetTopoFromXMLAndSetChannels(struct ncclComm* comm);
|
||||
// SCKL get alirthm from XML file and set the communicator
|
||||
ncclResult_t scklGetAlgoFromXMLAndSetComm(struct ncclComm* comm);
|
||||
|
||||
#define NCCL_TOPO_CPU_ARCH_X86 1
|
||||
#define NCCL_TOPO_CPU_ARCH_POWER 2
|
||||
|
|
|
@ -55,6 +55,7 @@ struct ncclTransport {
|
|||
};
|
||||
|
||||
ncclResult_t ncclTransportP2pConnect(struct ncclComm* comm, struct ncclChannel* channel, int nrecv, int* peerRecv, int nsend, int* peerSend);
|
||||
ncclResult_t scklTransportP2pConnect(struct ncclComm* comm, struct ncclChannel* channel);
|
||||
ncclResult_t ncclTransportP2pSetup(struct ncclComm* comm, struct ncclTopoGraph* graph);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -824,16 +824,16 @@ static ncclResult_t initTransportsRank(struct ncclComm* comm, ncclUniqueId* comm
|
|||
INFO(NCCL_INIT, "Connected all trees");
|
||||
|
||||
// NetSharedBuffers needs to be set for this to work across nodes.
|
||||
NCCLCHECK(scklGetTopoFromXMLAndSetChannels(comm));
|
||||
NCCLCHECK(scklGetAlgoFromXMLAndSetComm(comm));
|
||||
// Connect SCKL graph
|
||||
for (int c=0; c<comm->nChannels; c++) {
|
||||
struct ncclChannel* channel = comm->channels+c;
|
||||
if (comm->nRanks == 1) continue;
|
||||
NCCLCHECKGOTO(ncclTransportP2pConnect(comm, channel, channel->sGraph.nRecvPeers, channel->sGraph.recv, channel->sGraph.nSendPeers, channel->sGraph.send), ret, affinity_restore);
|
||||
NCCLCHECKGOTO(scklTransportP2pConnect(comm, channel), ret, affinity_restore);
|
||||
}
|
||||
// It appears that graph is not really needed for P2pSetup. The only place that actually uses it is in ncclTopoGetNetDev which has a bypass for when it is set to NULL.
|
||||
NCCLCHECKGOTO(ncclTransportP2pSetup(comm, NULL), ret, affinity_restore);
|
||||
INFO(NCCL_INIT, "Connected SCKL graph");
|
||||
INFO(NCCL_INIT, "Connected SCKL algorithm");
|
||||
|
||||
// Check if we can setup CollNet
|
||||
if (comm->nNodes > 1 &&
|
||||
|
|
|
@ -51,6 +51,30 @@ ncclResult_t ncclTransportP2pConnect(struct ncclComm* comm, struct ncclChannel*
|
|||
return ncclSuccess;
|
||||
}
|
||||
|
||||
// SCKL needs to traverse the algorithm to find the peers
|
||||
ncclResult_t scklTransportP2pConnect(struct ncclComm* comm, struct ncclChannel* channel) {
|
||||
uint32_t mask = 1 << channel->id;
|
||||
struct scklAlgorithm* scklAlgo = &comm->scklAlgo;
|
||||
int nrecv = 0;
|
||||
int nsend = 0;
|
||||
for (int i=0; i<scklAlgo->nBlocks; i++){
|
||||
int peer = scklAlgo->scklTB[i].peer;
|
||||
int type = scklAlgo->scklTB[i].type; // 0 for send, 1 for recv
|
||||
if (peer == -1 || peer >= comm->nRanks || peer == comm->rank) continue;
|
||||
if (type == SCKL_SEND){
|
||||
if (channel->peers[peer].send.connected) continue;
|
||||
comm->connectSend[peer] |= mask;
|
||||
nsend++;
|
||||
} else if (type == SCKL_RECV) {
|
||||
if (channel->peers[peer].recv.connected) continue;
|
||||
comm->connectRecv[peer] |= mask;
|
||||
nrecv++;
|
||||
}
|
||||
}
|
||||
TRACE(NCCL_INIT, "sckl nsend %d nrecv %d", nsend, nrecv);
|
||||
return ncclSuccess;
|
||||
}
|
||||
|
||||
void dumpData(struct ncclConnect* data, int ndata) {
|
||||
for (int n=0; n<ndata; n++) {
|
||||
printf("[%d] ", n);
|
||||
|
|
Загрузка…
Ссылка в новой задаче