зеркало из https://github.com/microsoft/msccl.git
added a fence before the flag write
This commit is contained in:
Родитель
f2cf4f2858
Коммит
295c627e7f
|
@ -79,6 +79,7 @@ class ncclFunction<ncclFuncAllToAll, ALGO, PROTO, FUNC, T, UNROLL> {
|
|||
} else if (sckltb->type == SCKL_RECV) {
|
||||
prims.directRecv(thisbuffer + offset, offset, nelem);
|
||||
if (tid == 0){
|
||||
__threadfence();
|
||||
uint64_t curFlag = COMPUTE_FLAG(workIndex, iter, i);
|
||||
scklFlags[bid].flag = curFlag;
|
||||
}
|
||||
|
|
|
@ -549,7 +549,7 @@ ncclResult_t ncclSaveCommKernels(ncclComm_t comm) {
|
|||
for (int c = 0; c < comm->asyncOpCount; c++) {
|
||||
struct ncclInfo* info = comm->asyncOps+c;
|
||||
if (hasScklAlgo && info->algorithm != NCCL_ALGO_SCKL){
|
||||
WARN("SCKL algorithms can be used asynchronously only when all are the same algorithm.");
|
||||
WARN("SCKL algorithms can only be used asynchronously with other SCKL algorithm.");
|
||||
return ncclInvalidUsage;
|
||||
}
|
||||
// SCKL needs to adjust nChannels in the future
|
||||
|
|
Загрузка…
Ссылка в новой задаче