added a fence before the flag write

This commit is contained in:
Saeed Maleki 2021-04-02 05:48:58 +00:00
Родитель f2cf4f2858
Коммит 295c627e7f
2 изменённых файлов: 2 добавлений и 1 удалений

Просмотреть файл

@ -79,6 +79,7 @@ class ncclFunction<ncclFuncAllToAll, ALGO, PROTO, FUNC, T, UNROLL> {
} else if (sckltb->type == SCKL_RECV) {
prims.directRecv(thisbuffer + offset, offset, nelem);
if (tid == 0){
__threadfence();
uint64_t curFlag = COMPUTE_FLAG(workIndex, iter, i);
scklFlags[bid].flag = curFlag;
}

Просмотреть файл

@ -549,7 +549,7 @@ ncclResult_t ncclSaveCommKernels(ncclComm_t comm) {
for (int c = 0; c < comm->asyncOpCount; c++) {
struct ncclInfo* info = comm->asyncOps+c;
if (hasScklAlgo && info->algorithm != NCCL_ALGO_SCKL){
WARN("SCKL algorithms can be used asynchronously only when all are the same algorithm.");
WARN("SCKL algorithms can only be used asynchronously with other SCKL algorithm.");
return ncclInvalidUsage;
}
// SCKL needs to adjust nChannels in the future