зеркало из https://github.com/microsoft/msccl.git
not compilable
This commit is contained in:
Родитель
62169be637
Коммит
08d3ea9909
|
@ -607,66 +607,72 @@ ncclResult_t ncclTopoGetLocalNet(struct ncclTopoSystem* system, int rank, int64_
|
||||||
return ncclSuccess;
|
return ncclSuccess;
|
||||||
}
|
}
|
||||||
|
|
||||||
ncclResult_t scklGetTopoFromXMLAndSetChannels(struct ncclComm* comm) {
|
ncclResult_t scklGetAlgoFromXMLAndSetComm(struct ncclComm* comm) {
|
||||||
char* str = getenv("SCKL_XML_FILE");
|
char* str = getenv("SCKL_XML_FILE");
|
||||||
if (str){
|
if (str){
|
||||||
INFO(NCCL_ENV, "SCKL_XML_FILE set by environment to %s", str);
|
INFO(NCCL_ENV, "SCKL_XML_FILE set by environment to %s", str);
|
||||||
struct ncclXml* xml;
|
struct ncclXml* xml;
|
||||||
NCCLCHECK(ncclCalloc(&xml, 1));
|
NCCLCHECK(ncclCalloc(&xml, 1));
|
||||||
NCCLCHECK(scklTopoGetXmlGraphFromFile(str, xml));
|
NCCLCHECK(scklGetXmlAlgoFromFile(str, xml));
|
||||||
int rank = comm->rank;
|
int rank = comm->rank;
|
||||||
|
|
||||||
for (int c=0; c<comm->nChannels; c++){
|
struct scklAglorithm* scklAlgo = &comm->scklAlgo;
|
||||||
comm->channels[c].sGraph.nRecvPeers = 0;
|
// zeroing out all entries.
|
||||||
comm->channels[c].sGraph.nSendPeers = 0;
|
memset(scklAlgo, 0, sizeof(struct scklAlgorithm));
|
||||||
}
|
|
||||||
|
|
||||||
struct ncclXmlNode* topNode;
|
struct ncclXmlNode* topNode;
|
||||||
NCCLCHECK(xmlFindTag(xml, "system", &topNode));
|
NCCLCHECK(xmlFindTag(xml, "algo", &topNode));
|
||||||
for (int s=0; s<topNode->nSubs; s++) {
|
for (int s=0; s<topNode->nSubs; s++) {
|
||||||
struct ncclXmlNode* node = topNode->subs[s];
|
struct ncclXmlNode* node = topNode->subs[s];
|
||||||
if (strcmp(node->name, "gpu") == 0){
|
if (strcmp(node->name, "gpu") == 0){
|
||||||
int id;
|
int id;
|
||||||
NCCLCHECK(xmlGetAttrInt(node, "id", &id));
|
NCCLCHECK(xmlGetAttrInt(node, "id", &id));
|
||||||
if (id == rank){
|
if (id == rank){
|
||||||
for (int p=0; p<node->nSubs; p++) {
|
scklAlgo->nBlocks = 0;
|
||||||
struct ncclXmlNode* typeOfComm = node->subs[p];
|
for (int t=0; t<node->nSubs; t++) {
|
||||||
if (strcmp(typeOfComm->name, "conn") == 0){
|
struct ncclXmlNode* threadblockNode = node->subs[t];
|
||||||
|
if (strcmp(threadblockNode->name, "threadblock") == 0){
|
||||||
|
int rbid, peer;
|
||||||
const char* type;
|
const char* type;
|
||||||
NCCLCHECK(xmlGetAttrStr(typeOfComm, "type", &type));
|
NCCLCHECK(xmlGetAttrInt(threadblockNode, "rbid", &rbid));
|
||||||
|
NCCLCHECK(xmlGetAttrInt(threadblockNode, "peer", &peer));
|
||||||
bool isRecv = false;
|
NCCLCHECK(xmlGetAttrStr(threadblockNode, "type", &type));
|
||||||
bool isSend = false;
|
if (rbid >= SCKL_MAX_NUM_THREAD_BLOCKS){
|
||||||
if (strcmp(type, "recv") == 0){
|
WARN("Too many thread blocks are requested. Max thread blocks: %d, requested: %d", SCKL_MAX_NUM_THREAD_BLOCKS, rbid+1);
|
||||||
isRecv = true;
|
return ncclInternalError;
|
||||||
} else if (strcmp(type, "send") == 0){
|
|
||||||
isSend = true;
|
|
||||||
}
|
}
|
||||||
for (int p=0; p<typeOfComm->nSubs; p++) {
|
if (rbid < 0){
|
||||||
struct ncclXmlNode* peer = typeOfComm->subs[p];
|
WARN("rbid must be positive. rbid: %d", rbid);
|
||||||
int peerId;
|
return ncclInternalError;
|
||||||
NCCLCHECK(xmlGetAttrInt(peer, "id", &peerId));
|
}
|
||||||
// SCKL generates the same scklAlgoState for all channels for now. This will change in the future
|
scklAlgo->nBlocks = std::max(comm->scklAlgo.nBlocks, rbid+1);
|
||||||
for (int c=0; c<comm->nChannels; c++){
|
struct scklThreadBlock* sTB = scklAlgo->scklTB[rbid];
|
||||||
if (isRecv) {
|
sTB->nsteps = 0;
|
||||||
if (comm->channels[c].sGraph.nRecvPeers < SCKL_MAX_NUM_CONN){
|
sTB->peer = peer;
|
||||||
int index = comm->channels[c].sGraph.nRecvPeers;
|
if (strcmp(type, "send") == 0){
|
||||||
comm->channels[c].sGraph.recv[index] = peerId;
|
sTB->type = SCKL_SEND;
|
||||||
// comm->channels[c].sGraph.recv[index].nChunks = 1;
|
} else if (strcmp(type, "recv") == 0) {
|
||||||
comm->channels[c].sGraph.nRecvPeers++;
|
sTB->type = SCKL_RECV;
|
||||||
} else {
|
} else {
|
||||||
WARN("Too many recv connections for device %d channel %d -- connection to %d is ignored. This may cause deadlock in initialization.", rank, c, peerId);
|
WARN("type of transfer is not supported: %s", type);
|
||||||
}
|
return ncclInternalError;
|
||||||
} else if (isSend){
|
}
|
||||||
if (comm->channels[c].sGraph.nSendPeers < SCKL_MAX_NUM_CONN){
|
|
||||||
int index = comm->channels[c].sGraph.nSendPeers;
|
for (int st=0; st<threadblockNode->nSubs; st++) {
|
||||||
comm->channels[c].sGraph.send[index] = peerId;
|
struct ncclXmlNode* stepNode = threadblockNode->subs[st];
|
||||||
// comm->channels[c].sGraph.send[index].nChunks = 1;
|
if (strcmp(stepNode->name, "step") == 0){
|
||||||
comm->channels[c].sGraph.nSendPeers++;
|
int s, chunkId;
|
||||||
} else {
|
NCCLCHECK(xmlGetAttrInt(stepNode, "s", &s));
|
||||||
WARN("Too many recv connections for device %d channel %d -- connection to %d is ignored. This may cause deadlock in initialization.", rank, c, peerId);
|
NCCLCHECK(xmlGetAttrInt(stepNode, "chunkId", &chunkId));
|
||||||
}
|
if (s >= SCKL_MAX_NUM_STEPS){
|
||||||
|
WARN("Too many steps are requested. Max number of steps: %d, requested: %d", SCKL_MAX_NUM_STEPS, s+1);
|
||||||
|
return ncclInternalError;
|
||||||
}
|
}
|
||||||
|
if (s < 0){
|
||||||
|
WARN("step must be positive: step %d", s);
|
||||||
|
return ncclInternalError;
|
||||||
|
}
|
||||||
|
sTB->nsteps = std::max(sTB->nsteps, s+1);
|
||||||
|
sTB->transfers[s] = chunkId;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -806,43 +806,47 @@ ncclResult_t ncclTopoGetXmlGraphFromFile(const char* xmlGraphFile, struct ncclXm
|
||||||
return ncclSuccess;
|
return ncclSuccess;
|
||||||
}
|
}
|
||||||
|
|
||||||
ncclResult_t scklTopoXmlPeerLoad(FILE* file, struct ncclXml* xml, struct ncclXmlNode* head) {
|
ncclResult_t scklAlgoXmlStep(FILE* file, struct ncclXml* xml, struct ncclXmlNode* head) {
|
||||||
int id;
|
int s, chunkId;
|
||||||
NCCLCHECK(xmlGetAttrInt(head, "id", &id));
|
NCCLCHECK(xmlGetAttrInt(head, "s", &s));
|
||||||
|
NCCLCHECK(xmlGetAttrInt(head, "chunkId", &chunkId));
|
||||||
struct xmlHandler handlers[] = { };
|
struct xmlHandler handlers[] = { };
|
||||||
NCCLCHECK(xmlLoadSub(file, xml, head, handlers, 1));
|
NCCLCHECK(xmlLoadSub(file, xml, head, handlers, 1));
|
||||||
return ncclSuccess;
|
return ncclSuccess;
|
||||||
}
|
}
|
||||||
|
|
||||||
ncclResult_t scklTopoXmlConnLoad(FILE* file, struct ncclXml* xmlGraph, struct ncclXmlNode* head) {
|
ncclResult_t scklAlgoXmlthreadblock(FILE* file, struct ncclXml* xmlGraph, struct ncclXmlNode* head) {
|
||||||
|
int rbid, peer;
|
||||||
const char* type;
|
const char* type;
|
||||||
|
NCCLCHECK(xmlGetAttrInt(head, "rbid", &id));
|
||||||
|
NCCLCHECK(xmlGetAttrInt(head, "peer", &peer));
|
||||||
NCCLCHECK(xmlGetAttrStr(head, "type", &type));
|
NCCLCHECK(xmlGetAttrStr(head, "type", &type));
|
||||||
struct xmlHandler handlers[] = { { "peer", scklTopoXmlPeerLoad } };
|
struct xmlHandler handlers[] = { { "step", scklAlgoXmlStep } };
|
||||||
NCCLCHECK(xmlLoadSub(file, xmlGraph, head, handlers, 1));
|
NCCLCHECK(xmlLoadSub(file, xmlGraph, head, handlers, 1));
|
||||||
return ncclSuccess;
|
return ncclSuccess;
|
||||||
}
|
}
|
||||||
|
|
||||||
ncclResult_t scklTopoXmlGraphLoad(FILE* file, struct ncclXml* xmlGraph, struct ncclXmlNode* head) {
|
ncclResult_t scklAlgoXmlGpu(FILE* file, struct ncclXml* xmlGraph, struct ncclXmlNode* head) {
|
||||||
int id;
|
int id;
|
||||||
NCCLCHECK(xmlGetAttrInt(head, "id", &id));
|
NCCLCHECK(xmlGetAttrInt(head, "id", &id));
|
||||||
struct xmlHandler handlers[] = { { "conn", scklTopoXmlConnLoad } };
|
struct xmlHandler handlers[] = { { "threadblock", scklAlgoXmlthreadblock } };
|
||||||
NCCLCHECK(xmlLoadSub(file, xmlGraph, head, handlers, 1));
|
NCCLCHECK(xmlLoadSub(file, xmlGraph, head, handlers, 1));
|
||||||
return ncclSuccess;
|
return ncclSuccess;
|
||||||
}
|
}
|
||||||
|
|
||||||
ncclResult_t scklTopoXmlSystemLoad(FILE* file, struct ncclXml* xmlGraph, struct ncclXmlNode* head) {
|
ncclResult_t scklAlgoXmlLoad(FILE* file, struct ncclXml* xmlGraph, struct ncclXmlNode* head) {
|
||||||
struct xmlHandler handlers[] = { { "gpu", scklTopoXmlGraphLoad } };
|
struct xmlHandler handlers[] = { { "gpu", scklAlgoXmlGpu } };
|
||||||
NCCLCHECK(xmlLoadSub(file, xmlGraph, head, handlers, 1));
|
NCCLCHECK(xmlLoadSub(file, xmlGraph, head, handlers, 1));
|
||||||
return ncclSuccess;
|
return ncclSuccess;
|
||||||
}
|
}
|
||||||
|
|
||||||
ncclResult_t scklTopoGetXmlGraphFromFile(const char* xmlGraphFile, struct ncclXml* xml) {
|
ncclResult_t scklGetXmlAlgoFromFile(const char* xmlGraphFile, struct ncclXml* xml) {
|
||||||
FILE* file = fopen(xmlGraphFile, "r");
|
FILE* file = fopen(xmlGraphFile, "r");
|
||||||
if (file == NULL) {
|
if (file == NULL) {
|
||||||
WARN("Could not open XML SCKL graph file %s : %s", xmlGraphFile, strerror(errno));
|
WARN("Could not open XML SCKL graph file %s : %s", xmlGraphFile, strerror(errno));
|
||||||
return ncclSystemError;
|
return ncclSystemError;
|
||||||
}
|
}
|
||||||
struct xmlHandler handlers[] = { { "system", scklTopoXmlSystemLoad } };
|
struct xmlHandler handlers[] = { { "algo", scklAlgoXmlLoad } };
|
||||||
xml->maxIndex = 0;
|
xml->maxIndex = 0;
|
||||||
NCCLCHECK(xmlLoadSub(file, xml, NULL, handlers, 1));
|
NCCLCHECK(xmlLoadSub(file, xml, NULL, handlers, 1));
|
||||||
fclose(file);
|
fclose(file);
|
||||||
|
|
|
@ -43,7 +43,7 @@ ncclResult_t ncclTopoGetXmlFromFile(const char* xmlTopoFile, struct ncclXml* xml
|
||||||
ncclResult_t ncclTopoDumpXmlToFile(const char* xmlTopoFile, struct ncclXml* xml);
|
ncclResult_t ncclTopoDumpXmlToFile(const char* xmlTopoFile, struct ncclXml* xml);
|
||||||
#define NCCL_GRAPH_XML_VERSION 1
|
#define NCCL_GRAPH_XML_VERSION 1
|
||||||
ncclResult_t ncclTopoGetXmlGraphFromFile(const char* xmlGraphFile, struct ncclXml* xml);
|
ncclResult_t ncclTopoGetXmlGraphFromFile(const char* xmlGraphFile, struct ncclXml* xml);
|
||||||
ncclResult_t scklTopoGetXmlGraphFromFile(const char* xmlGraphFile, struct ncclXml* xml);
|
ncclResult_t scklGetXmlAlgoFromFile(const char* xmlGraphFile, struct ncclXml* xml);
|
||||||
|
|
||||||
/* Auto-detect functions */
|
/* Auto-detect functions */
|
||||||
ncclResult_t ncclTopoFillGpu(struct ncclXml* xml, const char* busId, struct ncclXmlNode** gpuNode);
|
ncclResult_t ncclTopoFillGpu(struct ncclXml* xml, const char* busId, struct ncclXmlNode** gpuNode);
|
||||||
|
|
|
@ -58,6 +58,7 @@ struct ncclRecvMem {
|
||||||
|
|
||||||
struct ncclComm {
|
struct ncclComm {
|
||||||
struct ncclChannel channels[MAXCHANNELS];
|
struct ncclChannel channels[MAXCHANNELS];
|
||||||
|
struct scklAlgorithm scklAlgo;
|
||||||
|
|
||||||
struct ncclPeerInfo* peerInfo;
|
struct ncclPeerInfo* peerInfo;
|
||||||
struct ncclTopoSystem* topo;
|
struct ncclTopoSystem* topo;
|
||||||
|
|
|
@ -117,20 +117,25 @@ struct ncclRing {
|
||||||
int* devUserRanks;
|
int* devUserRanks;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define SCKL_MAX_NUM_CONN 16
|
#define SCKL_MAX_NUM_STEPS 16
|
||||||
|
#define SCKL_MAX_NUM_THREAD_BLOCKS 16
|
||||||
|
|
||||||
// struct scklConn {
|
#define SCKL_SEND 0
|
||||||
// int peer;
|
#define SCKL_RECV 1
|
||||||
// int nChunks;
|
|
||||||
// };
|
|
||||||
|
|
||||||
struct scklAlgoState {
|
struct scklThreadBlock {
|
||||||
int nRecvPeers;
|
uint8_t peer;
|
||||||
int nSendPeers;
|
uint8_t type; // follow SCKL_SEND and SCKL_RECV macros
|
||||||
int recv[SCKL_MAX_NUM_CONN];
|
uint8_t nsteps;
|
||||||
int send[SCKL_MAX_NUM_CONN];
|
// step is used to index into this array. transfers[step] is the chunkId to transfer.
|
||||||
// struct scklConn recv[SCKL_MAX_NUM_CONN];
|
uint16_t transfers[SCKL_MAX_NUM_STEPS];
|
||||||
// struct scklConn send[SCKL_MAX_NUM_CONN];
|
};
|
||||||
|
|
||||||
|
// gpuId is the one that is in comm->rank
|
||||||
|
struct scklAlgorithm {
|
||||||
|
int nBlocks;
|
||||||
|
// rbid is used as an index into this array
|
||||||
|
struct scklThreadBlock scklTB[SCKL_MAX_NUM_THREAD_BLOCKS];
|
||||||
};
|
};
|
||||||
|
|
||||||
#define NCCL_MAX_TREE_ARITY 3
|
#define NCCL_MAX_TREE_ARITY 3
|
||||||
|
@ -193,7 +198,6 @@ struct ncclChannel {
|
||||||
struct ncclRing ring;
|
struct ncclRing ring;
|
||||||
struct ncclTree tree;
|
struct ncclTree tree;
|
||||||
struct ncclTree collTree;
|
struct ncclTree collTree;
|
||||||
struct scklAlgoState sGraph;
|
|
||||||
|
|
||||||
int id;
|
int id;
|
||||||
|
|
||||||
|
|
|
@ -35,8 +35,8 @@ ncclResult_t ncclTopoCheckGdr(struct ncclTopoSystem* topo, int64_t busId, int ne
|
||||||
// Set CPU affinity
|
// Set CPU affinity
|
||||||
ncclResult_t ncclTopoSetAffinity(struct ncclTopoSystem* system, int rank);
|
ncclResult_t ncclTopoSetAffinity(struct ncclTopoSystem* system, int rank);
|
||||||
|
|
||||||
// SCKL setup peers
|
// SCKL get alirthm from XML file and set the communicator
|
||||||
ncclResult_t scklGetTopoFromXMLAndSetChannels(struct ncclComm* comm);
|
ncclResult_t scklGetAlgoFromXMLAndSetComm(struct ncclComm* comm);
|
||||||
|
|
||||||
#define NCCL_TOPO_CPU_ARCH_X86 1
|
#define NCCL_TOPO_CPU_ARCH_X86 1
|
||||||
#define NCCL_TOPO_CPU_ARCH_POWER 2
|
#define NCCL_TOPO_CPU_ARCH_POWER 2
|
||||||
|
|
|
@ -55,6 +55,7 @@ struct ncclTransport {
|
||||||
};
|
};
|
||||||
|
|
||||||
ncclResult_t ncclTransportP2pConnect(struct ncclComm* comm, struct ncclChannel* channel, int nrecv, int* peerRecv, int nsend, int* peerSend);
|
ncclResult_t ncclTransportP2pConnect(struct ncclComm* comm, struct ncclChannel* channel, int nrecv, int* peerRecv, int nsend, int* peerSend);
|
||||||
|
ncclResult_t scklTransportP2pConnect(struct ncclComm* comm, struct ncclChannel* channel);
|
||||||
ncclResult_t ncclTransportP2pSetup(struct ncclComm* comm, struct ncclTopoGraph* graph);
|
ncclResult_t ncclTransportP2pSetup(struct ncclComm* comm, struct ncclTopoGraph* graph);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -824,16 +824,16 @@ static ncclResult_t initTransportsRank(struct ncclComm* comm, ncclUniqueId* comm
|
||||||
INFO(NCCL_INIT, "Connected all trees");
|
INFO(NCCL_INIT, "Connected all trees");
|
||||||
|
|
||||||
// NetSharedBuffers needs to be set for this to work across nodes.
|
// NetSharedBuffers needs to be set for this to work across nodes.
|
||||||
NCCLCHECK(scklGetTopoFromXMLAndSetChannels(comm));
|
NCCLCHECK(scklGetAlgoFromXMLAndSetComm(comm));
|
||||||
// Connect SCKL graph
|
// Connect SCKL graph
|
||||||
for (int c=0; c<comm->nChannels; c++) {
|
for (int c=0; c<comm->nChannels; c++) {
|
||||||
struct ncclChannel* channel = comm->channels+c;
|
struct ncclChannel* channel = comm->channels+c;
|
||||||
if (comm->nRanks == 1) continue;
|
if (comm->nRanks == 1) continue;
|
||||||
NCCLCHECKGOTO(ncclTransportP2pConnect(comm, channel, channel->sGraph.nRecvPeers, channel->sGraph.recv, channel->sGraph.nSendPeers, channel->sGraph.send), ret, affinity_restore);
|
NCCLCHECKGOTO(scklTransportP2pConnect(comm, channel), ret, affinity_restore);
|
||||||
}
|
}
|
||||||
// It appears that graph is not really needed for P2pSetup. The only place that actually uses it is in ncclTopoGetNetDev which has a bypass for when it is set to NULL.
|
// It appears that graph is not really needed for P2pSetup. The only place that actually uses it is in ncclTopoGetNetDev which has a bypass for when it is set to NULL.
|
||||||
NCCLCHECKGOTO(ncclTransportP2pSetup(comm, NULL), ret, affinity_restore);
|
NCCLCHECKGOTO(ncclTransportP2pSetup(comm, NULL), ret, affinity_restore);
|
||||||
INFO(NCCL_INIT, "Connected SCKL graph");
|
INFO(NCCL_INIT, "Connected SCKL algorithm");
|
||||||
|
|
||||||
// Check if we can setup CollNet
|
// Check if we can setup CollNet
|
||||||
if (comm->nNodes > 1 &&
|
if (comm->nNodes > 1 &&
|
||||||
|
|
|
@ -51,6 +51,30 @@ ncclResult_t ncclTransportP2pConnect(struct ncclComm* comm, struct ncclChannel*
|
||||||
return ncclSuccess;
|
return ncclSuccess;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SCKL needs to traverse the algorithm to find the peers
|
||||||
|
ncclResult_t scklTransportP2pConnect(struct ncclComm* comm, struct ncclChannel* channel) {
|
||||||
|
uint32_t mask = 1 << channel->id;
|
||||||
|
struct scklAlgorithm* scklAlgo = &comm->scklAlgo;
|
||||||
|
int nrecv = 0;
|
||||||
|
int nsend = 0;
|
||||||
|
for (int i=0; i<scklAlgo->nBlocks; i++){
|
||||||
|
int peer = scklAlgo->scklTB[i].peer;
|
||||||
|
int type = scklAlgo->scklTB[i].type; // 0 for send, 1 for recv
|
||||||
|
if (peer == -1 || peer >= comm->nRanks || peer == comm->rank) continue;
|
||||||
|
if (type == SCKL_SEND){
|
||||||
|
if (channel->peers[peer].send.connected) continue;
|
||||||
|
comm->connectSend[peer] |= mask;
|
||||||
|
nsend++;
|
||||||
|
} else if (type == SCKL_RECV) {
|
||||||
|
if (channel->peers[peer].recv.connected) continue;
|
||||||
|
comm->connectRecv[peer] |= mask;
|
||||||
|
nrecv++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
TRACE(NCCL_INIT, "sckl nsend %d nrecv %d", nsend, nrecv);
|
||||||
|
return ncclSuccess;
|
||||||
|
}
|
||||||
|
|
||||||
void dumpData(struct ncclConnect* data, int ndata) {
|
void dumpData(struct ncclConnect* data, int ndata) {
|
||||||
for (int n=0; n<ndata; n++) {
|
for (int n=0; n<ndata; n++) {
|
||||||
printf("[%d] ", n);
|
printf("[%d] ", n);
|
||||||
|
|
Загрузка…
Ссылка в новой задаче