Improve codegen for initializer lists to use memset more aggressively

when an initializer is variable (I handled the constant case in a previous
patch).  This has three pieces:

1. Enhance AggValueSlot to have a 'isZeroed' bit to tell CGExprAgg that
   the memory being stored into has previously been memset to zero.
2. Teach CGExprAgg to not emit stores of zero to isZeroed memory.
3. Teach CodeGenFunction::EmitAggExpr to scan initializers to determine
   whether they are profitable to emit a memset + inividual stores vs
   stores for everything.

The heuristic used is that a global has to be more than 16 bytes and
has to be 3/4 zero to be candidate for this xform.  The two testcases
are illustrative of the scenarios this catches.  We now codegen test9 into:

 call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 400, i32 4, i1 false)
 %.array = getelementptr inbounds [100 x i32]* %Arr, i32 0, i32 0
 %tmp = load i32* %X.addr, align 4
 store i32 %tmp, i32* %.array

and test10 into:

  call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 392, i32 8, i1 false)
  %tmp = getelementptr inbounds %struct.b* %S, i32 0, i32 0
  %tmp1 = getelementptr inbounds %struct.a* %tmp, i32 0, i32 0
  %tmp2 = load i32* %X.addr, align 4
  store i32 %tmp2, i32* %tmp1, align 4
  %tmp5 = getelementptr inbounds %struct.b* %S, i32 0, i32 3
  %tmp10 = getelementptr inbounds %struct.a* %tmp5, i32 0, i32 4
  %tmp11 = load i32* %X.addr, align 4
  store i32 %tmp11, i32* %tmp10, align 4

Previously we produced 99 stores of zero for test9 and also tons for test10.
This xforms should substantially speed up -O0 builds when it kicks in as well
as reducing code size and optimizer heartburn on insane cases.  This resolves
PR279.



git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@120692 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chris Lattner 2010-12-02 07:07:26 +00:00
Родитель c0f31fd085
Коммит 1b726771d0
5 изменённых файлов: 187 добавлений и 20 удалений

Просмотреть файл

@ -696,8 +696,7 @@ void CodeGenFunction::EmitAutoVarDecl(const VarDecl &D,
// Get the element type.
const llvm::Type *LElemTy = ConvertTypeForMem(Ty);
const llvm::Type *LElemPtrTy =
llvm::PointerType::get(LElemTy, Ty.getAddressSpace());
const llvm::Type *LElemPtrTy = LElemTy->getPointerTo(Ty.getAddressSpace());
llvm::Value *VLASize = EmitVLASize(Ty);
@ -860,7 +859,7 @@ void CodeGenFunction::EmitAutoVarDecl(const VarDecl &D,
} else if (Init->getType()->isAnyComplexType()) {
EmitComplexExprIntoAddr(Init, Loc, isVolatile);
} else {
EmitAggExpr(Init, AggValueSlot::forAddr(Loc, isVolatile, true));
EmitAggExpr(Init, AggValueSlot::forAddr(Loc, isVolatile, true, false));
}
}

Просмотреть файл

@ -497,11 +497,42 @@ void AggExprEmitter::VisitImplicitValueInitExpr(ImplicitValueInitExpr *E) {
EmitNullInitializationToLValue(CGF.MakeAddrLValue(Slot.getAddr(), T), T);
}
/// isSimpleZero - If emitting this value will obviously just cause a store of
/// zero to memory, return true. This can return false if uncertain, so it just
/// handles simple cases.
static bool isSimpleZero(const Expr *E, CodeGenFunction &CGF) {
// (0)
if (const ParenExpr *PE = dyn_cast<ParenExpr>(E))
return isSimpleZero(PE->getSubExpr(), CGF);
// 0
if (const IntegerLiteral *IL = dyn_cast<IntegerLiteral>(E))
return IL->getValue() == 0;
// +0.0
if (const FloatingLiteral *FL = dyn_cast<FloatingLiteral>(E))
return FL->getValue().isPosZero();
// int()
if ((isa<ImplicitValueInitExpr>(E) || isa<CXXScalarValueInitExpr>(E)) &&
CGF.getTypes().isZeroInitializable(E->getType()))
return true;
// (int*)0 - Null pointer expressions.
if (const CastExpr *ICE = dyn_cast<CastExpr>(E))
return ICE->getCastKind() == CK_NullToPointer;
// '\0'
if (const CharacterLiteral *CL = dyn_cast<CharacterLiteral>(E))
return CL->getValue() == 0;
// Otherwise, hard case: conservatively return false.
return false;
}
void
AggExprEmitter::EmitInitializationToLValue(Expr* E, LValue LV, QualType T) {
// FIXME: Ignore result?
// FIXME: Are initializers affected by volatile?
if (isa<ImplicitValueInitExpr>(E)) {
if (Dest.isZeroed() && isSimpleZero(E, CGF)) {
// Storing "i32 0" to a zero'd memory location is a noop.
} else if (isa<ImplicitValueInitExpr>(E)) {
EmitNullInitializationToLValue(LV, T);
} else if (T->isReferenceType()) {
RValue RV = CGF.EmitReferenceBindingToExpr(E, /*InitializedDecl=*/0);
@ -509,13 +540,19 @@ AggExprEmitter::EmitInitializationToLValue(Expr* E, LValue LV, QualType T) {
} else if (T->isAnyComplexType()) {
CGF.EmitComplexExprIntoAddr(E, LV.getAddress(), false);
} else if (CGF.hasAggregateLLVMType(T)) {
CGF.EmitAggExpr(E, AggValueSlot::forAddr(LV.getAddress(), false, true));
CGF.EmitAggExpr(E, AggValueSlot::forAddr(LV.getAddress(), false, true,
false, Dest.isZeroed()));
} else {
CGF.EmitStoreThroughLValue(CGF.EmitAnyExpr(E), LV, T);
}
}
void AggExprEmitter::EmitNullInitializationToLValue(LValue LV, QualType T) {
// If the destination slot is already zeroed out before the aggregate is
// copied into it, we don't have to emit any zeros here.
if (Dest.isZeroed() && CGF.getTypes().isZeroInitializable(T))
return;
if (!CGF.hasAggregateLLVMType(T)) {
// For non-aggregates, we can store zero
llvm::Value *Null = llvm::Constant::getNullValue(CGF.ConvertType(T));
@ -573,13 +610,27 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) {
// FIXME: were we intentionally ignoring address spaces and GC attributes?
for (uint64_t i = 0; i != NumArrayElements; ++i) {
// If we're done emitting initializers and the destination is known-zeroed
// then we're done.
if (i == NumInitElements &&
Dest.isZeroed() &&
CGF.getTypes().isZeroInitializable(ElementType))
break;
llvm::Value *NextVal = Builder.CreateStructGEP(DestPtr, i, ".array");
LValue LV = CGF.MakeAddrLValue(NextVal, ElementType);
if (i < NumInitElements)
EmitInitializationToLValue(E->getInit(i), LV, ElementType);
else
EmitNullInitializationToLValue(LV, ElementType);
// If the GEP didn't get used because of a dead zero init or something
// else, clean it up for -O0 builds and general tidiness.
if (llvm::GetElementPtrInst *GEP =
dyn_cast<llvm::GetElementPtrInst>(NextVal))
if (GEP->use_empty())
GEP->eraseFromParent();
}
return;
}
@ -612,13 +663,13 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) {
// FIXME: volatility
FieldDecl *Field = E->getInitializedFieldInUnion();
LValue FieldLoc = CGF.EmitLValueForFieldInitialization(DestPtr, Field, 0);
LValue FieldLoc = CGF.EmitLValueForFieldInitialization(DestPtr, Field, 0);
if (NumInitElements) {
// Store the initializer into the field
EmitInitializationToLValue(E->getInit(0), FieldLoc, Field->getType());
} else {
// Default-initialize to null
// Default-initialize to null.
EmitNullInitializationToLValue(FieldLoc, Field->getType());
}
@ -638,10 +689,16 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) {
if (Field->isUnnamedBitfield())
continue;
// Don't emit GEP before a noop store of zero.
if (CurInitVal == NumInitElements && Dest.isZeroed() &&
CGF.getTypes().isZeroInitializable(E->getType()))
break;
// FIXME: volatility
LValue FieldLoc = CGF.EmitLValueForFieldInitialization(DestPtr, *Field, 0);
// We never generate write-barries for initialized fields.
FieldLoc.setNonGC(true);
if (CurInitVal < NumInitElements) {
// Store the initializer into the field.
EmitInitializationToLValue(E->getInit(CurInitVal++), FieldLoc,
@ -650,6 +707,14 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) {
// We're out of initalizers; default-initialize to null
EmitNullInitializationToLValue(FieldLoc, Field->getType());
}
// If the GEP didn't get used because of a dead zero init or something
// else, clean it up for -O0 builds and general tidiness.
if (FieldLoc.isSimple())
if (llvm::GetElementPtrInst *GEP =
dyn_cast<llvm::GetElementPtrInst>(FieldLoc.getAddress()))
if (GEP->use_empty())
GEP->eraseFromParent();
}
}
@ -657,6 +722,69 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) {
// Entry Points into this File
//===----------------------------------------------------------------------===//
/// GetNumNonZeroBytesInInit - Get an approximate count of the number of
/// non-zero bytes that will be stored when outputting the initializer for the
/// specified initializer expression.
static uint64_t GetNumNonZeroBytesInInit(const Expr *E, CodeGenFunction &CGF) {
if (const ParenExpr *PE = dyn_cast<ParenExpr>(E))
return GetNumNonZeroBytesInInit(PE->getSubExpr(), CGF);
// 0 and 0.0 won't require any non-zero stores!
if (isSimpleZero(E, CGF)) return 0;
// If this is an initlist expr, sum up the size of sizes of the (present)
// elements. If this is something weird, assume the whole thing is non-zero.
const InitListExpr *ILE = dyn_cast<InitListExpr>(E);
if (ILE == 0 || !CGF.getTypes().isZeroInitializable(ILE->getType()))
return CGF.getContext().getTypeSize(E->getType())/8;
uint64_t NumNonZeroBytes = 0;
for (unsigned i = 0, e = ILE->getNumInits(); i != e; ++i)
NumNonZeroBytes += GetNumNonZeroBytesInInit(ILE->getInit(i), CGF);
return NumNonZeroBytes;
}
/// CheckAggExprForMemSetUse - If the initializer is large and has a lot of
/// zeros in it, emit a memset and avoid storing the individual zeros.
///
static void CheckAggExprForMemSetUse(AggValueSlot &Slot, const Expr *E,
CodeGenFunction &CGF) {
// If the slot is already known to be zeroed, nothing to do. Don't mess with
// volatile stores.
if (Slot.isZeroed() || Slot.isVolatile() || Slot.getAddr() == 0) return;
// If the type is 16-bytes or smaller, prefer individual stores over memset.
std::pair<uint64_t, unsigned> TypeInfo =
CGF.getContext().getTypeInfo(E->getType());
if (TypeInfo.first/8 <= 16)
return;
// Check to see if over 3/4 of the initializer are known to be zero. If so,
// we prefer to emit memset + individual stores for the rest.
uint64_t NumNonZeroBytes = GetNumNonZeroBytesInInit(E, CGF);
if (NumNonZeroBytes*4 > TypeInfo.first/8)
return;
// Okay, it seems like a good idea to use an initial memset, emit the call.
llvm::Constant *SizeVal = CGF.Builder.getInt64(TypeInfo.first/8);
llvm::ConstantInt *AlignVal = CGF.Builder.getInt32(TypeInfo.second/8);
llvm::Value *Loc = Slot.getAddr();
const llvm::Type *BP = llvm::Type::getInt8PtrTy(CGF.getLLVMContext());
Loc = CGF.Builder.CreateBitCast(Loc, BP);
CGF.Builder.CreateCall5(CGF.CGM.getMemSetFn(Loc->getType(),
SizeVal->getType()),
Loc, CGF.Builder.getInt8(0), SizeVal, AlignVal,
CGF.Builder.getFalse());
// Tell the AggExprEmitter that the slot is known zero.
Slot.setZeroed();
}
/// EmitAggExpr - Emit the computation of the specified expression of aggregate
/// type. The result is computed into DestPtr. Note that if DestPtr is null,
/// the value of the aggregate expression is not needed. If VolatileDest is
@ -670,20 +798,20 @@ void CodeGenFunction::EmitAggExpr(const Expr *E, AggValueSlot Slot,
bool IgnoreResult) {
assert(E && hasAggregateLLVMType(E->getType()) &&
"Invalid aggregate expression to emit");
assert((Slot.getAddr() != 0 || Slot.isIgnored())
&& "slot has bits but no address");
assert((Slot.getAddr() != 0 || Slot.isIgnored()) &&
"slot has bits but no address");
AggExprEmitter(*this, Slot, IgnoreResult)
.Visit(const_cast<Expr*>(E));
// Optimize the slot if possible.
CheckAggExprForMemSetUse(Slot, E, *this);
AggExprEmitter(*this, Slot, IgnoreResult).Visit(const_cast<Expr*>(E));
}
LValue CodeGenFunction::EmitAggExprToLValue(const Expr *E) {
assert(hasAggregateLLVMType(E->getType()) && "Invalid argument!");
llvm::Value *Temp = CreateMemTemp(E->getType());
LValue LV = MakeAddrLValue(Temp, E->getType());
AggValueSlot Slot
= AggValueSlot::forAddr(Temp, LV.isVolatileQualified(), false);
EmitAggExpr(E, Slot);
EmitAggExpr(E, AggValueSlot::forAddr(Temp, LV.isVolatileQualified(), false));
return LV;
}

Просмотреть файл

@ -336,6 +336,11 @@ class AggValueSlot {
bool VolatileFlag : 1;
bool LifetimeFlag : 1;
bool RequiresGCollection : 1;
/// IsZeroed - This is set to true if the destination is known to be zero
/// before the assignment into it. This means that zero fields don't need to
/// be set.
bool IsZeroed : 1;
public:
/// ignored - Returns an aggregate value slot indicating that the
@ -343,7 +348,7 @@ public:
static AggValueSlot ignored() {
AggValueSlot AV;
AV.Addr = 0;
AV.VolatileFlag = AV.LifetimeFlag = AV.RequiresGCollection = 0;
AV.VolatileFlag = AV.LifetimeFlag = AV.RequiresGCollection = AV.IsZeroed =0;
return AV;
}
@ -357,17 +362,19 @@ public:
/// somewhere that ObjC GC calls should be emitted for
static AggValueSlot forAddr(llvm::Value *Addr, bool Volatile,
bool LifetimeExternallyManaged,
bool RequiresGCollection=false) {
bool RequiresGCollection = false,
bool IsZeroed = false) {
AggValueSlot AV;
AV.Addr = Addr;
AV.VolatileFlag = Volatile;
AV.LifetimeFlag = LifetimeExternallyManaged;
AV.RequiresGCollection = RequiresGCollection;
AV.IsZeroed = IsZeroed;
return AV;
}
static AggValueSlot forLValue(LValue LV, bool LifetimeExternallyManaged,
bool RequiresGCollection=false) {
bool RequiresGCollection = false) {
return forAddr(LV.getAddress(), LV.isVolatileQualified(),
LifetimeExternallyManaged, RequiresGCollection);
}
@ -399,6 +406,10 @@ public:
return RValue::getAggregate(getAddr(), isVolatile());
}
void setZeroed(bool V = true) { IsZeroed = V; }
bool isZeroed() const {
return IsZeroed;
}
};
} // end namespace CodeGen

Просмотреть файл

@ -1662,8 +1662,7 @@ public:
const BlockDeclRefExpr *BDRE);
RValue EmitCXXExprWithTemporaries(const CXXExprWithTemporaries *E,
AggValueSlot Slot
= AggValueSlot::ignored());
AggValueSlot Slot =AggValueSlot::ignored());
void EmitCXXThrowExpr(const CXXThrowExpr *E);

Просмотреть файл

@ -70,3 +70,33 @@ char test8(int X) {
// CHECK: store i8 98
// CHECK: store i8 99
}
void bar(void*);
// PR279
int test9(int X) {
int Arr[100] = { X }; // Should use memset
bar(Arr);
// CHECK: @test9
// CHECK: call void @llvm.memset
// CHECK-NOT: store i32 0
// CHECK: call void @bar
}
struct a {
int a, b, c, d, e, f, g, h, i, j, k, *p;
};
struct b {
struct a a,b,c,d,e,f,g;
};
int test10(int X) {
struct b S = { .a.a = X, .d.e = X, .f.e = 0, .f.f = 0, .f.p = 0 };
bar(&S);
// CHECK: @test10
// CHECK: call void @llvm.memset
// CHECK-NOT: store i32 0
// CHECK: call void @bar
}