Load remainder buffer elements in the end (#3462)

This commit is contained in:
Vishal Sharma 2021-02-16 12:36:32 -08:00 коммит произвёл GitHub
Родитель 4c998a5e7e
Коммит 93a20f7967
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
3 изменённых файлов: 23 добавлений и 24 удалений

Просмотреть файл

@ -6883,16 +6883,7 @@ static Value* TranslateStructBufVecLd(Type* VecEltTy, unsigned ElemCount,
std::vector<Value*> elts(ElemCount);
unsigned rest = (ElemCount % 4);
if (rest) {
Value* ResultElts[4];
Value *bufLd = GenerateStructBufLd(handle, bufIdx, offset, status, VecEltTy, ResultElts, OP, Builder, rest, alignment);
bufLds.emplace_back(bufLd);
for (unsigned i = 0; i < rest; i++)
elts[i] = ResultElts[i];
offset = Builder.CreateAdd(offset, OP->GetU32Const(EltSize * rest));
}
for (unsigned i = rest; i < ElemCount; i += 4) {
for (unsigned i = 0; i < ElemCount-rest; i += 4) {
Value* ResultElts[4];
Value* bufLd = GenerateStructBufLd(handle, bufIdx, offset, status, VecEltTy, ResultElts, OP, Builder, 4, alignment);
bufLds.emplace_back(bufLd);
@ -6905,6 +6896,14 @@ static Value* TranslateStructBufVecLd(Type* VecEltTy, unsigned ElemCount,
offset = Builder.CreateAdd(offset, OP->GetU32Const(4 * EltSize));
}
if (rest) {
Value* ResultElts[4];
Value* bufLd = GenerateStructBufLd(handle, bufIdx, offset, status, VecEltTy, ResultElts, OP, Builder, rest, alignment);
bufLds.emplace_back(bufLd);
for (unsigned i = 0; i < rest; i++)
elts[ElemCount - rest + i] = ResultElts[i];
}
// If the expected return type is scalar then skip building a vector
if (isScalarTy) {
return elts[0];

Просмотреть файл

@ -24,15 +24,15 @@
// CHK_MAT2x2: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 15, i32 4)
// CHK_MAT2x3: call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 3, i32 2)
// CHK_MAT2x3: call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 4, i8 15, i32 2)
// CHK_MAT2x3: call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 15, i32 2)
// CHK_MAT2x3: call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 8, i8 3, i32 2)
// CHK_MAT3x2: call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 3, i32 2)
// CHK_MAT3x2: call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 4, i8 15, i32 2)
// CHK_MAT3x2: call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 15, i32 2)
// CHK_MAT3x2: call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 8, i8 3, i32 2)
// CHK_MAT3x3: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 1, i32 4)
// CHK_MAT3x3: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 4, i8 15, i32 4)
// CHK_MAT3x3: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 20, i8 15, i32 4)
// CHK_MAT3x3: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 15, i32 4)
// CHK_MAT3x3: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 16, i8 15, i32 4)
// CHK_MAT3x3: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 32, i8 1, i32 4)
// CHK_MAT3x4: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 15, i32 4)
// CHK_MAT3x4: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 16, i8 15, i32 4)

Просмотреть файл

@ -24,15 +24,15 @@
// CHK_MAT2x2: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 15, i32 4)
// CHK_MAT2x3: call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 3, i32 2)
// CHK_MAT2x3: call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 4, i8 15, i32 2)
// CHK_MAT2x3: call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 15, i32 2)
// CHK_MAT2x3: call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 8, i8 3, i32 2)
// CHK_MAT3x2: call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 3, i32 2)
// CHK_MAT3x2: call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 4, i8 15, i32 2)
// CHK_MAT3x2: call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 15, i32 2)
// CHK_MAT3x2: call %dx.types.ResRet.i16 @dx.op.rawBufferLoad.i16(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 8, i8 3, i32 2)
// CHK_MAT3x3: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 1, i32 4)
// CHK_MAT3x3: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 4, i8 15, i32 4)
// CHK_MAT3x3: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 20, i8 15, i32 4)
// CHK_MAT3x3: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 15, i32 4)
// CHK_MAT3x3: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 16, i8 15, i32 4)
// CHK_MAT3x3: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 32, i8 1, i32 4)
// CHK_MAT3x4: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 0, i8 15, i32 4)
// CHK_MAT3x4: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %{{.*}}, i32 %{{.*}}, i32 16, i8 15, i32 4)