diff --git a/src/cmd/compile/internal/gc/escape.go b/src/cmd/compile/internal/gc/escape.go index 66440674d9..fdf327d715 100644 --- a/src/cmd/compile/internal/gc/escape.go +++ b/src/cmd/compile/internal/gc/escape.go @@ -880,7 +880,9 @@ func (e *Escape) augmentParamHole(k EscHole, where *Node) EscHole { // non-transient location to avoid arguments from being // transiently allocated. if where.Op == ODEFER && e.loopDepth == 1 { - where.Esc = EscNever // force stack allocation of defer record (see ssa.go) + // force stack allocation of defer record, unless open-coded + // defers are used (see ssa.go) + where.Esc = EscNever return e.later(k) } diff --git a/src/cmd/compile/internal/gc/main.go b/src/cmd/compile/internal/gc/main.go index 51e9f0071a..ab616d4c9b 100644 --- a/src/cmd/compile/internal/gc/main.go +++ b/src/cmd/compile/internal/gc/main.go @@ -53,6 +53,7 @@ var ( Debug_typecheckinl int Debug_gendwarfinl int Debug_softfloat int + Debug_defer int ) // Debug arguments. @@ -83,6 +84,7 @@ var debugtab = []struct { {"typecheckinl", "eager typechecking of inline function bodies", &Debug_typecheckinl}, {"dwarfinl", "print information about DWARF inlined function creation", &Debug_gendwarfinl}, {"softfloat", "force compiler to emit soft-float code", &Debug_softfloat}, + {"defer", "print information about defer compilation", &Debug_defer}, } const debugHelpHeader = `usage: -d arg[,arg]* and arg is [=] diff --git a/src/cmd/compile/internal/gc/obj.go b/src/cmd/compile/internal/gc/obj.go index be13b27892..83371fabf5 100644 --- a/src/cmd/compile/internal/gc/obj.go +++ b/src/cmd/compile/internal/gc/obj.go @@ -294,6 +294,9 @@ func addGCLocals() { } ggloblsym(x, int32(len(x.P)), attr) } + if x := s.Func.OpenCodedDeferInfo; x != nil { + ggloblsym(x, int32(len(x.P)), obj.RODATA|obj.DUPOK) + } } } diff --git a/src/cmd/compile/internal/gc/plive.go b/src/cmd/compile/internal/gc/plive.go index 1745b92e6b..5f0ece0ad7 100644 --- a/src/cmd/compile/internal/gc/plive.go +++ b/src/cmd/compile/internal/gc/plive.go @@ -863,7 +863,16 @@ func (lv *Liveness) solve() { newliveout.vars.Set(pos) } case ssa.BlockExit: - // panic exit - nothing to do + if lv.fn.Func.HasDefer() && !lv.fn.Func.OpenCodedDeferDisallowed() { + // All stack slots storing args for open-coded + // defers are live at panic exit (since they + // will be used in running defers) + for i, n := range lv.vars { + if n.Name.OpenDeferSlot() { + newliveout.vars.Set(int32(i)) + } + } + } default: // A variable is live on output from this block // if it is live on input to some successor. diff --git a/src/cmd/compile/internal/gc/reflect.go b/src/cmd/compile/internal/gc/reflect.go index e34ed7311c..f614b60685 100644 --- a/src/cmd/compile/internal/gc/reflect.go +++ b/src/cmd/compile/internal/gc/reflect.go @@ -317,6 +317,7 @@ func deferstruct(stksize int64) *types.Type { makefield("siz", types.Types[TUINT32]), makefield("started", types.Types[TBOOL]), makefield("heap", types.Types[TBOOL]), + makefield("openDefer", types.Types[TBOOL]), makefield("sp", types.Types[TUINTPTR]), makefield("pc", types.Types[TUINTPTR]), // Note: the types here don't really matter. Defer structures @@ -325,6 +326,9 @@ func deferstruct(stksize int64) *types.Type { makefield("fn", types.Types[TUINTPTR]), makefield("_panic", types.Types[TUINTPTR]), makefield("link", types.Types[TUINTPTR]), + makefield("framepc", types.Types[TUINTPTR]), + makefield("varp", types.Types[TUINTPTR]), + makefield("fd", types.Types[TUINTPTR]), makefield("args", argtype), } diff --git a/src/cmd/compile/internal/gc/sizeof_test.go b/src/cmd/compile/internal/gc/sizeof_test.go index f4725c0eb2..ce4a216c2e 100644 --- a/src/cmd/compile/internal/gc/sizeof_test.go +++ b/src/cmd/compile/internal/gc/sizeof_test.go @@ -20,7 +20,7 @@ func TestSizeof(t *testing.T) { _32bit uintptr // size on 32bit platforms _64bit uintptr // size on 64bit platforms }{ - {Func{}, 116, 208}, + {Func{}, 124, 224}, {Name{}, 32, 56}, {Param{}, 24, 48}, {Node{}, 76, 128}, diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go index 72727cab9c..dff559a7ba 100644 --- a/src/cmd/compile/internal/gc/ssa.go +++ b/src/cmd/compile/internal/gc/ssa.go @@ -29,6 +29,10 @@ var ssaDumpStdout bool // whether to dump to stdout var ssaDumpCFG string // generate CFGs for these phases const ssaDumpFile = "ssa.html" +// The max number of defers in a function using open-coded defers. We enforce this +// limit because the deferBits bitmask is currently a single byte (to minimize code size) +const maxOpenDefers = 8 + // ssaDumpInlined holds all inlined functions when ssaDump contains a function name. var ssaDumpInlined []*Node @@ -167,6 +171,111 @@ func initssaconfig() { SigPanic = sysfunc("sigpanic") } +// getParam returns the Field of ith param of node n (which is a +// function/method/interface call), where the receiver of a method call is +// considered as the 0th parameter. This does not include the receiver of an +// interface call. +func getParam(n *Node, i int) *types.Field { + t := n.Left.Type + if n.Op == OCALLMETH { + if i == 0 { + return t.Recv() + } + return t.Params().Field(i - 1) + } + return t.Params().Field(i) +} + +// dvarint writes a varint v to the funcdata in symbol x and returns the new offset +func dvarint(x *obj.LSym, off int, v int64) int { + if v < 0 || v > 1e9 { + panic(fmt.Sprintf("dvarint: bad offset for funcdata - %v", v)) + } + if v < 1<<7 { + return duint8(x, off, uint8(v)) + } + off = duint8(x, off, uint8((v&127)|128)) + if v < 1<<14 { + return duint8(x, off, uint8(v>>7)) + } + off = duint8(x, off, uint8(((v>>7)&127)|128)) + if v < 1<<21 { + return duint8(x, off, uint8(v>>14)) + } + off = duint8(x, off, uint8(((v>>14)&127)|128)) + if v < 1<<28 { + return duint8(x, off, uint8(v>>21)) + } + off = duint8(x, off, uint8(((v>>21)&127)|128)) + return duint8(x, off, uint8(v>>28)) +} + +// emitOpenDeferInfo emits FUNCDATA information about the defers in a function +// that is using open-coded defers. This funcdata is used to determine the active +// defers in a function and execute those defers during panic processing. +// +// The funcdata is all encoded in varints (since values will almost always be less than +// 128, but stack offsets could potentially be up to 2Gbyte). All "locations" (offsets) +// for stack variables are specified as the number of bytes below varp (pointer to the +// top of the local variables) for their starting address. The format is: +// +// - Max total argument size among all the defers +// - Offset of the deferBits variable +// - Number of defers in the function +// - Information about each defer call, in reverse order of appearance in the function: +// - Total argument size of the call +// - Offset of the closure value to call +// - Number of arguments (including interface receiver or method receiver as first arg) +// - Information about each argument +// - Offset of the stored defer argument in this function's frame +// - Size of the argument +// - Offset of where argument should be placed in the args frame when making call +func (s *state) emitOpenDeferInfo() { + x := Ctxt.Lookup(s.curfn.Func.lsym.Name + ".opendefer") + s.curfn.Func.lsym.Func.OpenCodedDeferInfo = x + off := 0 + + // Compute maxargsize (max size of arguments for all defers) + // first, so we can output it first to the funcdata + var maxargsize int64 + for i := len(s.openDefers) - 1; i >= 0; i-- { + r := s.openDefers[i] + argsize := r.n.Left.Type.ArgWidth() + if argsize > maxargsize { + maxargsize = argsize + } + } + off = dvarint(x, off, maxargsize) + off = dvarint(x, off, -s.deferBitsTemp.Xoffset) + off = dvarint(x, off, int64(len(s.openDefers))) + + // Write in reverse-order, for ease of running in that order at runtime + for i := len(s.openDefers) - 1; i >= 0; i-- { + r := s.openDefers[i] + off = dvarint(x, off, r.n.Left.Type.ArgWidth()) + off = dvarint(x, off, -r.closureNode.Xoffset) + numArgs := len(r.argNodes) + if r.rcvrNode != nil { + // If there's an interface receiver, treat/place it as the first + // arg. (If there is a method receiver, it's already included as + // first arg in r.argNodes.) + numArgs++ + } + off = dvarint(x, off, int64(numArgs)) + if r.rcvrNode != nil { + off = dvarint(x, off, -r.rcvrNode.Xoffset) + off = dvarint(x, off, s.config.PtrSize) + off = dvarint(x, off, 0) + } + for j, arg := range r.argNodes { + f := getParam(r.n, j) + off = dvarint(x, off, -arg.Xoffset) + off = dvarint(x, off, f.Type.Size()) + off = dvarint(x, off, f.Offset) + } + } +} + // buildssa builds an SSA function for fn. // worker indicates which of the backend workers is doing the processing. func buildssa(fn *Node, worker int) *ssa.Func { @@ -229,11 +338,55 @@ func buildssa(fn *Node, worker int) *ssa.Func { s.labeledNodes = map[*Node]*ssaLabel{} s.fwdVars = map[*Node]*ssa.Value{} s.startmem = s.entryNewValue0(ssa.OpInitMem, types.TypeMem) + + s.hasOpenDefers = Debug['N'] == 0 && s.hasdefer && !s.curfn.Func.OpenCodedDeferDisallowed() + if s.hasOpenDefers && (Ctxt.Flag_shared || Ctxt.Flag_dynlink) && thearch.LinkArch.Name == "386" { + // Don't support open-coded defers for 386 ONLY when using shared + // libraries, because there is extra code (added by rewriteToUseGot()) + // preceding the deferreturn/ret code that is generated by gencallret() + // that we don't track correctly. + s.hasOpenDefers = false + } + if s.hasOpenDefers && s.curfn.Func.Exit.Len() > 0 { + // Skip doing open defers if there is any extra exit code (likely + // copying heap-allocated return values or race detection), since + // we will not generate that code in the case of the extra + // deferreturn/ret segment. + s.hasOpenDefers = false + } + if s.hasOpenDefers && + s.curfn.Func.numReturns*s.curfn.Func.numDefers > 15 { + // Since we are generating defer calls at every exit for + // open-coded defers, skip doing open-coded defers if there are + // too many returns (especially if there are multiple defers). + // Open-coded defers are most important for improving performance + // for smaller functions (which don't have many returns). + s.hasOpenDefers = false + } + s.sp = s.entryNewValue0(ssa.OpSP, types.Types[TUINTPTR]) // TODO: use generic pointer type (unsafe.Pointer?) instead s.sb = s.entryNewValue0(ssa.OpSB, types.Types[TUINTPTR]) s.startBlock(s.f.Entry) s.vars[&memVar] = s.startmem + if s.hasOpenDefers { + // Create the deferBits variable and stack slot. deferBits is a + // bitmask showing which of the open-coded defers in this function + // have been activated. + deferBitsTemp := tempAt(src.NoXPos, s.curfn, types.Types[TUINT8]) + s.deferBitsTemp = deferBitsTemp + // For this value, AuxInt is initialized to zero by default + startDeferBits := s.entryNewValue0(ssa.OpConst8, types.Types[TUINT8]) + s.vars[&deferBitsVar] = startDeferBits + s.deferBitsAddr = s.addr(deferBitsTemp, false) + s.store(types.Types[TUINT8], s.deferBitsAddr, startDeferBits) + // Make sure that the deferBits stack slot is kept alive (for use + // by panics) and stores to deferBits are not eliminated, even if + // all checking code on deferBits in the function exit can be + // eliminated, because the defer statements were all + // unconditional. + s.vars[&memVar] = s.newValue1Apos(ssa.OpVarLive, types.TypeMem, deferBitsTemp, s.mem(), false) + } // Generate addresses of local declarations s.decladdrs = map[*Node]*ssa.Value{} @@ -289,6 +442,11 @@ func buildssa(fn *Node, worker int) *ssa.Func { // Main call to ssa package to compile function ssa.Compile(s.f) + + if s.hasOpenDefers { + s.emitOpenDeferInfo() + } + return s.f } @@ -377,6 +535,29 @@ func (s *state) updateUnsetPredPos(b *ssa.Block) { } } +// Information about each open-coded defer. +type openDeferInfo struct { + // The ODEFER node representing the function call of the defer + n *Node + // If defer call is closure call, the address of the argtmp where the + // closure is stored. + closure *ssa.Value + // The node representing the argtmp where the closure is stored - used for + // function, method, or interface call, to store a closure that panic + // processing can use for this defer. + closureNode *Node + // If defer call is interface call, the address of the argtmp where the + // receiver is stored + rcvr *ssa.Value + // The node representing the argtmp where the receiver is stored + rcvrNode *Node + // The addresses of the argtmps where the evaluated arguments of the defer + // function call are stored. + argVals []*ssa.Value + // The nodes representing the argtmps where the args of the defer are stored + argNodes []*Node +} + type state struct { // configuration (arch) information config *ssa.Config @@ -418,6 +599,9 @@ type state struct { startmem *ssa.Value sp *ssa.Value sb *ssa.Value + // value representing address of where deferBits autotmp is stored + deferBitsAddr *ssa.Value + deferBitsTemp *Node // line number stack. The current line number is top of stack line []src.XPos @@ -434,6 +618,19 @@ type state struct { cgoUnsafeArgs bool hasdefer bool // whether the function contains a defer statement softFloat bool + hasOpenDefers bool // whether we are doing open-coded defers + + // If doing open-coded defers, list of info about the defer calls in + // scanning order. Hence, at exit we should run these defers in reverse + // order of this list + openDefers []*openDeferInfo + // For open-coded defers, this is the beginning and end blocks of the last + // defer exit code that we have generated so far. We use these to share + // code between exits if the shareDeferExits option (disabled by default) + // is on. + lastDeferExit *ssa.Block // Entry block of last defer exit code we generated + lastDeferFinalBlock *ssa.Block // Final block of last defer exit code we generated + lastDeferCount int // Number of defers encountered at that point } type funcLine struct { @@ -471,12 +668,13 @@ var ( memVar = Node{Op: ONAME, Sym: &types.Sym{Name: "mem"}} // dummy nodes for temporary variables - ptrVar = Node{Op: ONAME, Sym: &types.Sym{Name: "ptr"}} - lenVar = Node{Op: ONAME, Sym: &types.Sym{Name: "len"}} - newlenVar = Node{Op: ONAME, Sym: &types.Sym{Name: "newlen"}} - capVar = Node{Op: ONAME, Sym: &types.Sym{Name: "cap"}} - typVar = Node{Op: ONAME, Sym: &types.Sym{Name: "typ"}} - okVar = Node{Op: ONAME, Sym: &types.Sym{Name: "ok"}} + ptrVar = Node{Op: ONAME, Sym: &types.Sym{Name: "ptr"}} + lenVar = Node{Op: ONAME, Sym: &types.Sym{Name: "len"}} + newlenVar = Node{Op: ONAME, Sym: &types.Sym{Name: "newlen"}} + capVar = Node{Op: ONAME, Sym: &types.Sym{Name: "cap"}} + typVar = Node{Op: ONAME, Sym: &types.Sym{Name: "typ"}} + okVar = Node{Op: ONAME, Sym: &types.Sym{Name: "ok"}} + deferBitsVar = Node{Op: ONAME, Sym: &types.Sym{Name: "deferBits"}} ) // startBlock sets the current block we're generating code in to b. @@ -867,11 +1065,26 @@ func (s *state) stmt(n *Node) { } } case ODEFER: - d := callDefer - if n.Esc == EscNever { - d = callDeferStack + if Debug_defer > 0 { + var defertype string + if s.hasOpenDefers { + defertype = "open-coded" + } else if n.Esc == EscNever { + defertype = "stack-allocated" + } else { + defertype = "heap-allocated" + } + Warnl(n.Pos, "%s defer", defertype) + } + if s.hasOpenDefers { + s.openDeferRecord(n.Left) + } else { + d := callDefer + if n.Esc == EscNever { + d = callDeferStack + } + s.call(n.Left, d) } - s.call(n.Left, d) case OGO: s.call(n.Left, callGo) @@ -1288,12 +1501,28 @@ func (s *state) stmt(n *Node) { } } +// If true, share as many open-coded defer exits as possible (with the downside of +// worse line-number information) +const shareDeferExits = false + // exit processes any code that needs to be generated just before returning. // It returns a BlockRet block that ends the control flow. Its control value // will be set to the final memory state. func (s *state) exit() *ssa.Block { if s.hasdefer { - s.rtcall(Deferreturn, true, nil) + if s.hasOpenDefers { + if shareDeferExits && s.lastDeferExit != nil && len(s.openDefers) == s.lastDeferCount { + if s.curBlock.Kind != ssa.BlockPlain { + panic("Block for an exit should be BlockPlain") + } + s.curBlock.AddEdgeTo(s.lastDeferExit) + s.endBlock() + return s.lastDeferFinalBlock + } + s.openDeferExit() + } else { + s.rtcall(Deferreturn, true, nil) + } } // Run exit code. Typically, this code copies heap-allocated PPARAMOUT @@ -1316,6 +1545,9 @@ func (s *state) exit() *ssa.Block { b := s.endBlock() b.Kind = ssa.BlockRet b.SetControl(m) + if s.hasdefer && s.hasOpenDefers { + s.lastDeferFinalBlock = b + } return b } @@ -3841,6 +4073,230 @@ func (s *state) intrinsicArgs(n *Node) []*ssa.Value { return args } +// openDeferRecord adds code to evaluate and store the args for an open-code defer +// call, and records info about the defer, so we can generate proper code on the +// exit paths. n is the sub-node of the defer node that is the actual function +// call. We will also record funcdata information on where the args are stored +// (as well as the deferBits variable), and this will enable us to run the proper +// defer calls during panics. +func (s *state) openDeferRecord(n *Node) { + // Do any needed expression evaluation for the args (including the + // receiver, if any). This may be evaluating something like 'autotmp_3 = + // once.mutex'. Such a statement will create a mapping in s.vars[] from + // the autotmp name to the evaluated SSA arg value, but won't do any + // stores to the stack. + s.stmtList(n.List) + + var args []*ssa.Value + var argNodes []*Node + + opendefer := &openDeferInfo{ + n: n, + } + fn := n.Left + if n.Op == OCALLFUNC { + // We must always store the function value in a stack slot for the + // runtime panic code to use. But in the defer exit code, we will + // call the function directly if it is a static function. + closureVal := s.expr(fn) + closure := s.openDeferSave(fn, fn.Type, closureVal) + opendefer.closureNode = closure.Aux.(*Node) + if !(fn.Op == ONAME && fn.Class() == PFUNC) { + opendefer.closure = closure + } + } else if n.Op == OCALLMETH { + if fn.Op != ODOTMETH { + Fatalf("OCALLMETH: n.Left not an ODOTMETH: %v", fn) + } + closureVal := s.getMethodClosure(fn) + // We must always store the function value in a stack slot for the + // runtime panic code to use. But in the defer exit code, we will + // call the method directly. + closure := s.openDeferSave(fn, fn.Type, closureVal) + opendefer.closureNode = closure.Aux.(*Node) + } else { + if fn.Op != ODOTINTER { + Fatalf("OCALLINTER: n.Left not an ODOTINTER: %v", fn.Op) + } + closure, rcvr := s.getClosureAndRcvr(fn) + opendefer.closure = s.openDeferSave(fn, closure.Type, closure) + // Important to get the receiver type correct, so it is recognized + // as a pointer for GC purposes. + opendefer.rcvr = s.openDeferSave(nil, fn.Type.Recv().Type, rcvr) + opendefer.closureNode = opendefer.closure.Aux.(*Node) + opendefer.rcvrNode = opendefer.rcvr.Aux.(*Node) + } + for _, argn := range n.Rlist.Slice() { + v := s.openDeferSave(argn, argn.Type, s.expr(argn)) + args = append(args, v) + argNodes = append(argNodes, v.Aux.(*Node)) + } + opendefer.argVals = args + opendefer.argNodes = argNodes + index := len(s.openDefers) + s.openDefers = append(s.openDefers, opendefer) + + // Update deferBits only after evaluation and storage to stack of + // args/receiver/interface is successful. + bitvalue := s.constInt8(types.Types[TUINT8], 1<= 0; i-- { + r := s.openDefers[i] + bCond := s.f.NewBlock(ssa.BlockPlain) + bEnd := s.f.NewBlock(ssa.BlockPlain) + + deferBits := s.variable(&deferBitsVar, types.Types[TUINT8]) + // Generate code to check if the bit associated with the current + // defer is set. + bitval := s.constInt8(types.Types[TUINT8], 1< maxOpenDefers { + // Don't allow open-coded defers if there are more than + // 8 defers in the function, since we use a single + // byte to record active defers. + Curfn.Func.SetOpenCodedDeferDisallowed(true) + } + if n.Esc != EscNever { + // If n.Esc is not EscNever, then this defer occurs in a loop, + // so open-coded defers cannot be used in this function. + Curfn.Func.SetOpenCodedDeferDisallowed(true) + } fallthrough case OGO: switch n.Left.Op { @@ -255,6 +267,7 @@ func walkstmt(n *Node) *Node { walkstmtlist(n.Rlist.Slice()) case ORETURN: + Curfn.Func.numReturns++ if n.List.Len() == 0 { break } diff --git a/src/cmd/compile/internal/ssa/deadstore.go b/src/cmd/compile/internal/ssa/deadstore.go index 6b9bcedadb..88af7a6f4a 100644 --- a/src/cmd/compile/internal/ssa/deadstore.go +++ b/src/cmd/compile/internal/ssa/deadstore.go @@ -170,6 +170,11 @@ func elimDeadAutosGeneric(f *Func) { return case OpVarLive: // Don't delete the auto if it needs to be kept alive. + + // We depend on this check to keep the autotmp stack slots + // for open-coded defers from being removed (since they + // may not be used by the inline code, but will be used by + // panic processing). n, ok := v.Aux.(GCNode) if !ok || n.StorageClass() != ClassAuto { return diff --git a/src/cmd/compile/internal/ssa/func.go b/src/cmd/compile/internal/ssa/func.go index cdd5161913..332e201899 100644 --- a/src/cmd/compile/internal/ssa/func.go +++ b/src/cmd/compile/internal/ssa/func.go @@ -32,8 +32,16 @@ type Func struct { Type *types.Type // type signature of the function. Blocks []*Block // unordered set of all basic blocks (note: not indexable by ID) Entry *Block // the entry basic block - bid idAlloc // block ID allocator - vid idAlloc // value ID allocator + + // If we are using open-coded defers, this is the first call to a deferred + // function in the final defer exit sequence that we generated. This call + // should be after all defer statements, and will have all args, etc. of + // all defer calls as live. The liveness info of this call will be used + // for the deferreturn/ret segment generated for functions with open-coded + // defers. + LastDeferExit *Value + bid idAlloc // block ID allocator + vid idAlloc // value ID allocator // Given an environment variable used for debug hash match, // what file (if any) receives the yes/no logging? diff --git a/src/cmd/internal/obj/link.go b/src/cmd/internal/obj/link.go index 1c101bfc27..468e9402ee 100644 --- a/src/cmd/internal/obj/link.go +++ b/src/cmd/internal/obj/link.go @@ -405,10 +405,11 @@ type FuncInfo struct { dwarfAbsFnSym *LSym dwarfDebugLinesSym *LSym - GCArgs *LSym - GCLocals *LSym - GCRegs *LSym - StackObjects *LSym + GCArgs *LSym + GCLocals *LSym + GCRegs *LSym + StackObjects *LSym + OpenCodedDeferInfo *LSym } type InlMark struct { diff --git a/src/cmd/internal/obj/x86/obj6.go b/src/cmd/internal/obj/x86/obj6.go index f28fa65e95..b80f86799a 100644 --- a/src/cmd/internal/obj/x86/obj6.go +++ b/src/cmd/internal/obj/x86/obj6.go @@ -419,6 +419,9 @@ func rewriteToUseGot(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { // to a PLT, so make sure the GOT pointer is loaded into BX. // RegTo2 is set on the replacement call insn to stop it being // processed when it is in turn passed to progedit. + // + // We disable open-coded defers in buildssa() on 386 ONLY with shared + // libraries because of this extra code added before deferreturn calls. if ctxt.Arch.Family == sys.AMD64 || (p.To.Sym != nil && p.To.Sym.Local()) || p.RegTo2 != 0 { return } diff --git a/src/cmd/internal/objabi/funcdata.go b/src/cmd/internal/objabi/funcdata.go index addbd2ac88..08b75eb9fe 100644 --- a/src/cmd/internal/objabi/funcdata.go +++ b/src/cmd/internal/objabi/funcdata.go @@ -15,11 +15,12 @@ const ( PCDATA_StackMapIndex = 1 PCDATA_InlTreeIndex = 2 - FUNCDATA_ArgsPointerMaps = 0 - FUNCDATA_LocalsPointerMaps = 1 - FUNCDATA_RegPointerMaps = 2 - FUNCDATA_StackObjects = 3 - FUNCDATA_InlTree = 4 + FUNCDATA_ArgsPointerMaps = 0 + FUNCDATA_LocalsPointerMaps = 1 + FUNCDATA_RegPointerMaps = 2 + FUNCDATA_StackObjects = 3 + FUNCDATA_InlTree = 4 + FUNCDATA_OpenCodedDeferInfo = 5 // ArgsSizeUnknown is set in Func.argsize to mark all functions // whose argument size is unknown (C vararg functions, and diff --git a/src/cmd/internal/objabi/funcid.go b/src/cmd/internal/objabi/funcid.go index c13c3cb458..487f009830 100644 --- a/src/cmd/internal/objabi/funcid.go +++ b/src/cmd/internal/objabi/funcid.go @@ -85,6 +85,12 @@ func GetFuncID(name, file string) FuncID { return FuncID_panicwrap case "runtime.handleAsyncEvent": return FuncID_handleAsyncEvent + case "runtime.deferreturn": + // Don't show in the call stack (used when invoking defer functions) + return FuncID_wrapper + case "runtime.runOpenDeferFrame": + // Don't show in the call stack (used when invoking defer functions) + return FuncID_wrapper } if file == "" { return FuncID_wrapper diff --git a/src/cmd/internal/objabi/stack.go b/src/cmd/internal/objabi/stack.go index 62ab0398a6..7320dbf365 100644 --- a/src/cmd/internal/objabi/stack.go +++ b/src/cmd/internal/objabi/stack.go @@ -18,7 +18,7 @@ const ( ) // Initialize StackGuard and StackLimit according to target system. -var StackGuard = 880*stackGuardMultiplier() + StackSystem +var StackGuard = 896*stackGuardMultiplier() + StackSystem var StackLimit = StackGuard - StackSystem - StackSmall // stackGuardMultiplier returns a multiplier to apply to the default diff --git a/src/cmd/link/internal/ld/pcln.go b/src/cmd/link/internal/ld/pcln.go index d9904f9093..9cccc7a6e9 100644 --- a/src/cmd/link/internal/ld/pcln.go +++ b/src/cmd/link/internal/ld/pcln.go @@ -11,6 +11,7 @@ import ( "cmd/internal/sys" "cmd/link/internal/sym" "encoding/binary" + "fmt" "log" "os" "path/filepath" @@ -255,13 +256,23 @@ func (ctxt *Link) pclntab() { } if r.Type.IsDirectJump() && r.Sym != nil && r.Sym.Name == "runtime.deferreturn" { if ctxt.Arch.Family == sys.Wasm { - deferreturn = lastWasmAddr + deferreturn = lastWasmAddr - 1 } else { // Note: the relocation target is in the call instruction, but // is not necessarily the whole instruction (for instance, on // x86 the relocation applies to bytes [1:5] of the 5 byte call // instruction). deferreturn = uint32(r.Off) + switch ctxt.Arch.Family { + case sys.AMD64, sys.I386: + deferreturn-- + case sys.PPC64, sys.ARM, sys.ARM64, sys.MIPS, sys.MIPS64, sys.RISCV64: + // no change + case sys.S390X: + deferreturn -= 2 + default: + panic(fmt.Sprint("Unhandled architecture:", ctxt.Arch.Family)) + } } break // only need one } diff --git a/src/cmd/link/internal/ld/symtab.go b/src/cmd/link/internal/ld/symtab.go index d686a8a476..b4236a5239 100644 --- a/src/cmd/link/internal/ld/symtab.go +++ b/src/cmd/link/internal/ld/symtab.go @@ -498,7 +498,8 @@ func (ctxt *Link) symtab() { case strings.HasPrefix(s.Name, "gcargs."), strings.HasPrefix(s.Name, "gclocals."), strings.HasPrefix(s.Name, "gclocals·"), - strings.HasPrefix(s.Name, "inltree."): + strings.HasPrefix(s.Name, "inltree."), + strings.HasSuffix(s.Name, ".opendefer"): s.Type = sym.SGOFUNC s.Attr |= sym.AttrNotInSymbolTable s.Outer = symgofunc diff --git a/src/runtime/callers_test.go b/src/runtime/callers_test.go index fcfd10deff..eee1d5c867 100644 --- a/src/runtime/callers_test.go +++ b/src/runtime/callers_test.go @@ -188,3 +188,32 @@ func TestCallersDivZeroPanic(t *testing.T) { t.Fatal("did not see divide-by-sizer panic") } } + +func TestCallersDeferNilFuncPanic(t *testing.T) { + // Make sure we don't have any extra frames on the stack. We cut off the check + // at runtime.sigpanic, because non-open-coded defers (which may be used in + // non-opt or race checker mode) include an extra 'jmpdefer' frame (which is + // where the nil pointer deref happens). We could consider hiding jmpdefer in + // tracebacks. + state := 1 + want := []string{"runtime.Callers", "runtime_test.TestCallersDeferNilFuncPanic.func1", + "runtime.gopanic", "runtime.panicmem", "runtime.sigpanic"} + + defer func() { + if r := recover(); r == nil { + t.Fatal("did not panic") + } + pcs := make([]uintptr, 20) + pcs = pcs[:runtime.Callers(0, pcs)] + testCallersEqual(t, pcs, want) + if state == 1 { + t.Fatal("nil defer func panicked at defer time rather than function exit time") + } + + }() + var f func() + defer f() + // Use the value of 'state' to make sure nil defer func f causes panic at + // function exit, rather than at the defer statement. + state = 2 +} diff --git a/src/runtime/defer_test.go b/src/runtime/defer_test.go index 0d3e8e9d63..d830fc591f 100644 --- a/src/runtime/defer_test.go +++ b/src/runtime/defer_test.go @@ -15,11 +15,11 @@ import ( // unconditional panic (hence no return from the function) func TestUnconditionalPanic(t *testing.T) { defer func() { - if recover() == nil { + if recover() != "testUnconditional" { t.Fatal("expected unconditional panic") } }() - panic("panic should be recovered") + panic("testUnconditional") } var glob int = 3 @@ -30,7 +30,7 @@ func TestOpenAndNonOpenDefers(t *testing.T) { for { // Non-open defer because in a loop defer func(n int) { - if recover() == nil { + if recover() != "testNonOpenDefer" { t.Fatal("expected testNonOpen panic") } }(3) @@ -45,7 +45,7 @@ func TestOpenAndNonOpenDefers(t *testing.T) { //go:noinline func testOpen(t *testing.T, arg int) { defer func(n int) { - if recover() == nil { + if recover() != "testOpenDefer" { t.Fatal("expected testOpen panic") } }(4) @@ -61,7 +61,7 @@ func TestNonOpenAndOpenDefers(t *testing.T) { for { // Non-open defer because in a loop defer func(n int) { - if recover() == nil { + if recover() != "testNonOpenDefer" { t.Fatal("expected testNonOpen panic") } }(3) @@ -80,7 +80,7 @@ func TestConditionalDefers(t *testing.T) { list = make([]int, 0, 10) defer func() { - if recover() == nil { + if recover() != "testConditional" { t.Fatal("expected panic") } want := []int{4, 2, 1} @@ -106,7 +106,7 @@ func testConditionalDefers(n int) { defer doappend(4) } } - panic("test") + panic("testConditional") } // Test that there is no compile-time or run-time error if an open-coded defer @@ -174,3 +174,52 @@ func TestRecoverMatching(t *testing.T) { }() panic("panic1") } + +type nonSSAable [128]byte + +type bigStruct struct { + x, y, z, w, p, q int64 +} + +func mknonSSAable() nonSSAable { + globint1++ + return nonSSAable{0, 0, 0, 0, 5} +} + +var globint1, globint2 int + +//go:noinline +func sideeffect(n int64) int64 { + globint2++ + return n +} + +// Test that nonSSAable arguments to defer are handled correctly and only evaluated once. +func TestNonSSAableArgs(t *testing.T) { + globint1 = 0 + globint2 = 0 + var save1 byte + var save2 int64 + + defer func() { + if globint1 != 1 { + t.Fatal(fmt.Sprintf("globint1: wanted: 1, got %v", globint1)) + } + if save1 != 5 { + t.Fatal(fmt.Sprintf("save1: wanted: 5, got %v", save1)) + } + if globint2 != 1 { + t.Fatal(fmt.Sprintf("globint2: wanted: 1, got %v", globint2)) + } + if save2 != 2 { + t.Fatal(fmt.Sprintf("save2: wanted: 2, got %v", save2)) + } + }() + + defer func(n nonSSAable) { + save1 = n[4] + }(mknonSSAable()) + defer func(b bigStruct) { + save2 = b.y + }(bigStruct{1, 2, 3, 4, 5, sideeffect(6)}) +} diff --git a/src/runtime/funcdata.h b/src/runtime/funcdata.h index d9a35c51a0..0fb50ddfba 100644 --- a/src/runtime/funcdata.h +++ b/src/runtime/funcdata.h @@ -17,6 +17,7 @@ #define FUNCDATA_RegPointerMaps 2 #define FUNCDATA_StackObjects 3 #define FUNCDATA_InlTree 4 +#define FUNCDATA_OpenCodedDeferInfo 5 /* info for func with open-coded defers */ // Pseudo-assembly statements. diff --git a/src/runtime/panic.go b/src/runtime/panic.go index 5f33cd7c0c..bdfe117e45 100644 --- a/src/runtime/panic.go +++ b/src/runtime/panic.go @@ -10,6 +10,19 @@ import ( "unsafe" ) +// We have two different ways of doing defers. The older way involves creating a +// defer record at the time that a defer statement is executing and adding it to a +// defer chain. This chain is inspected by the deferreturn call at all function +// exits in order to run the appropriate defer calls. A cheaper way (which we call +// open-coded defers) is used for functions in which no defer statements occur in +// loops. In that case, we simply store the defer function/arg information into +// specific stack slots at the point of each defer statement, as well as setting a +// bit in a bitmask. At each function exit, we add inline code to directly make +// the appropriate defer calls based on the bitmask and fn/arg information stored +// on the stack. During panic/Goexit processing, the appropriate defer calls are +// made using extra funcdata info that indicates the exact stack slots that +// contain the bitmask and defer fn/args. + // Check to make sure we can really generate a panic. If the panic // was generated from the runtime, or from inside malloc, then convert // to a throw of msg. @@ -263,19 +276,24 @@ func deferprocStack(d *_defer) { // are initialized here. d.started = false d.heap = false + d.openDefer = false d.sp = getcallersp() d.pc = getcallerpc() + d.framepc = 0 + d.varp = 0 // The lines below implement: // d.panic = nil + // d.fp = nil // d.link = gp._defer // gp._defer = d - // But without write barriers. The first two are writes to + // But without write barriers. The first three are writes to // the stack so they don't need a write barrier, and furthermore // are to uninitialized memory, so they must not use a write barrier. - // The third write does not require a write barrier because we + // The fourth write does not require a write barrier because we // explicitly mark all the defer structures, so we don't need to // keep track of pointers to them with a write barrier. *(*uintptr)(unsafe.Pointer(&d._panic)) = 0 + *(*uintptr)(unsafe.Pointer(&d.fd)) = 0 *(*uintptr)(unsafe.Pointer(&d.link)) = uintptr(unsafe.Pointer(gp._defer)) *(*uintptr)(unsafe.Pointer(&gp._defer)) = uintptr(unsafe.Pointer(d)) @@ -463,8 +481,12 @@ func freedefer(d *_defer) { // started causing a nosplit stack overflow via typedmemmove. d.siz = 0 d.started = false + d.openDefer = false d.sp = 0 d.pc = 0 + d.framepc = 0 + d.varp = 0 + d.fd = nil // d._panic and d.fn must be nil already. // If not, we would have called freedeferpanic or freedeferfn above, // both of which throw. @@ -493,9 +515,11 @@ func freedeferfn() { // to have been called by the caller of deferreturn at the point // just before deferreturn was called. The effect is that deferreturn // is called again and again until there are no more deferred functions. -// Cannot split the stack because we reuse the caller's frame to -// call the deferred function. - +// +// Declared as nosplit, because the function should not be preempted once we start +// modifying the caller's frame in order to reuse the frame to call the deferred +// function. +// // The single argument isn't actually used - it just has its address // taken so it can be matched against pending defers. //go:nosplit @@ -509,6 +533,15 @@ func deferreturn(arg0 uintptr) { if d.sp != sp { return } + if d.openDefer { + done := runOpenDeferFrame(gp, d) + if !done { + throw("unfinished open-coded defers in deferreturn") + } + gp._defer = d.link + freedefer(d) + return + } // Moving arguments around. // @@ -544,6 +577,8 @@ func Goexit() { // This code is similar to gopanic, see that implementation // for detailed comments. gp := getg() + addOneOpenDeferFrame(gp, getcallerpc(), unsafe.Pointer(getcallersp())) + for { d := gp._defer if d == nil { @@ -554,13 +589,26 @@ func Goexit() { d._panic.aborted = true d._panic = nil } - d.fn = nil - gp._defer = d.link - freedefer(d) - continue + if !d.openDefer { + d.fn = nil + gp._defer = d.link + freedefer(d) + continue + } } d.started = true - reflectcall(nil, unsafe.Pointer(d.fn), deferArgs(d), uint32(d.siz), uint32(d.siz)) + if d.openDefer { + done := runOpenDeferFrame(gp, d) + if !done { + // We should always run all defers in the frame, + // since there is no panic associated with this + // defer that can be recovered. + throw("unfinished open-coded defers in Goexit") + } + addOneOpenDeferFrame(gp, 0, nil) + } else { + reflectcall(nil, unsafe.Pointer(d.fn), deferArgs(d), uint32(d.siz), uint32(d.siz)) + } if gp._defer != d { throw("bad defer entry in Goexit") } @@ -607,6 +655,177 @@ func printpanics(p *_panic) { print("\n") } +// addOneOpenDeferFrame scans the stack for the first frame (if any) with +// open-coded defers and if it finds one, adds a single record to the defer chain +// for that frame. If sp is non-nil, it starts the stack scan from the frame +// specified by sp. If sp is nil, it uses the sp from the current defer record +// (which has just been finished). Hence, it continues the stack scan from the +// frame of the defer that just finished. It skips any frame that already has an +// open-coded _defer record, which would have been been created from a previous +// (unrecovered) panic. +// +// Note: All entries of the defer chain (including this new open-coded entry) have +// their pointers (including sp) adjusted properly if the stack moves while +// running deferred functions. Also, it is safe to pass in the sp arg (which is +// the direct result of calling getcallersp()), because all pointer variables +// (including arguments) are adjusted as needed during stack copies. +func addOneOpenDeferFrame(gp *g, pc uintptr, sp unsafe.Pointer) { + var prevDefer *_defer + if sp == nil { + prevDefer = gp._defer + pc = prevDefer.framepc + sp = unsafe.Pointer(prevDefer.sp) + } + systemstack(func() { + gentraceback(pc, uintptr(sp), 0, gp, 0, nil, 0x7fffffff, + func(frame *stkframe, unused unsafe.Pointer) bool { + if prevDefer != nil && prevDefer.sp == frame.sp { + // Skip the frame for the previous defer that + // we just finished (and was used to set + // where we restarted the stack scan) + return true + } + f := frame.fn + fd := funcdata(f, _FUNCDATA_OpenCodedDeferInfo) + if fd == nil { + return true + } + // Insert the open defer record in the + // chain, in order sorted by sp. + d := gp._defer + var prev *_defer + for d != nil { + dsp := d.sp + if frame.sp < dsp { + break + } + if frame.sp == dsp { + if !d.openDefer { + throw("duplicated defer entry") + } + return true + } + prev = d + d = d.link + } + if frame.fn.deferreturn == 0 { + throw("missing deferreturn") + } + + maxargsize, _ := readvarintUnsafe(fd) + d1 := newdefer(int32(maxargsize)) + d1.openDefer = true + d1._panic = nil + // These are the pc/sp to set after we've + // run a defer in this frame that did a + // recover. We return to a special + // deferreturn that runs any remaining + // defers and then returns from the + // function. + d1.pc = frame.fn.entry + uintptr(frame.fn.deferreturn) + d1.varp = frame.varp + d1.fd = fd + // Save the SP/PC associated with current frame, + // so we can continue stack trace later if needed. + d1.framepc = frame.pc + d1.sp = frame.sp + d1.link = d + if prev == nil { + gp._defer = d1 + } else { + prev.link = d1 + } + // Stop stack scanning after adding one open defer record + return false + }, + nil, 0) + }) +} + +// readvarintUnsafe reads the uint32 in varint format starting at fd, and returns the +// uint32 and a pointer to the byte following the varint. +// +// There is a similar function runtime.readvarint, which takes a slice of bytes, +// rather than an unsafe pointer. These functions are duplicated, because one of +// the two use cases for the functions would get slower if the functions were +// combined. +func readvarintUnsafe(fd unsafe.Pointer) (uint32, unsafe.Pointer) { + var r uint32 + var shift int + for { + b := *(*uint8)((unsafe.Pointer(fd))) + fd = add(fd, unsafe.Sizeof(b)) + if b < 128 { + return r + uint32(b)< 28 { + panic("Bad varint") + } + } +} + +// runOpenDeferFrame runs the active open-coded defers in the frame specified by +// d. It normally processes all active defers in the frame, but stops immediately +// if a defer does a successful recover. It returns true if there are no +// remaining defers to run in the frame. +func runOpenDeferFrame(gp *g, d *_defer) bool { + done := true + fd := d.fd + + // Skip the maxargsize + _, fd = readvarintUnsafe(fd) + deferBitsOffset, fd := readvarintUnsafe(fd) + nDefers, fd := readvarintUnsafe(fd) + deferBits := *(*uint8)(unsafe.Pointer(d.varp - uintptr(deferBitsOffset))) + + for i := int(nDefers) - 1; i >= 0; i-- { + // read the funcdata info for this defer + var argWidth, closureOffset, nArgs uint32 + argWidth, fd = readvarintUnsafe(fd) + closureOffset, fd = readvarintUnsafe(fd) + nArgs, fd = readvarintUnsafe(fd) + if deferBits&(1< 2 { + break + } + } + defer func() { // ERROR "stack-allocated defer" + fmt.Println("defer2") + }() +} + +func f3() { + defer func() { // ERROR "stack-allocated defer" + fmt.Println("defer2") + }() + for { + defer func() { // ERROR "heap-allocated defer" + fmt.Println("defer1") + }() + if glob > 2 { + break + } + } +} + +func f4() { + defer func() { // ERROR "open-coded defer" + fmt.Println("defer") + }() +label: + fmt.Println("goto loop") + if glob > 2 { + goto label + } +} + +func f5() { +label: + fmt.Println("goto loop") + defer func() { // ERROR "heap-allocated defer" + fmt.Println("defer") + }() + if glob > 2 { + goto label + } +} + +func f6() { +label: + fmt.Println("goto loop") + if glob > 2 { + goto label + } + // The current analysis doesn't end a backward goto loop, so this defer is + // considered to be inside a loop + defer func() { // ERROR "heap-allocated defer" + fmt.Println("defer") + }() +} diff --git a/test/live.go b/test/live.go index b6e6d93f5f..32c397f4a9 100644 --- a/test/live.go +++ b/test/live.go @@ -367,16 +367,19 @@ func f24() { m2[[2]string{"x", "y"}] = nil } -// defer should not cause spurious ambiguously live variables - +// Non-open-coded defers should not cause autotmps. (Open-coded defers do create extra autotmps). func f25(b bool) { - defer g25() + for i := 0; i < 2; i++ { + // Put in loop to make sure defer is not open-coded + defer g25() + } if b { return } var x string x = g14() printstring(x) + return } func g25() @@ -417,7 +420,8 @@ func f27defer(b bool) { defer call27(func() { x++ }) // ERROR "stack object .autotmp_[0-9]+ struct \{" } defer call27(func() { x++ }) // ERROR "stack object .autotmp_[0-9]+ struct \{" - printnl() + printnl() // ERROR "live at call to printnl: .autotmp_[0-9]+ .autotmp_[0-9]+" + return // ERROR "live at call to call27: .autotmp_[0-9]+" } // and newproc (go) escapes to the heap @@ -687,12 +691,12 @@ type R struct{ *T } // ERRORAUTO "live at entry to \(\*R\)\.Foo: \.this ptr" "li // In particular, at printint r must be live. func f41(p, q *int) (r *int) { // ERROR "live at entry to f41: p q$" r = p - defer func() { // ERROR "live at call to deferprocStack: q r$" "live at call to deferreturn: r$" + defer func() { recover() }() - printint(0) // ERROR "live at call to printint: q r$" + printint(0) // ERROR "live at call to printint: q r .autotmp_[0-9]+$" r = q - return // ERROR "live at call to deferreturn: r$" + return // ERROR "live at call to f41.func1: r .autotmp_[0-9]+$" } func f42() { diff --git a/test/nosplit.go b/test/nosplit.go index 266e6077b1..3b7e605999 100644 --- a/test/nosplit.go +++ b/test/nosplit.go @@ -309,17 +309,17 @@ TestCases: name := m[1] size, _ := strconv.Atoi(m[2]) - // The limit was originally 128 but is now 752 (880-128). + // The limit was originally 128 but is now 768 (896-128). // Instead of rewriting the test cases above, adjust // the first stack frame to use up the extra bytes. if i == 0 { - size += (880 - 128) - 128 + size += (896 - 128) - 128 // Noopt builds have a larger stackguard. // See ../src/cmd/dist/buildruntime.go:stackGuardMultiplier // This increase is included in objabi.StackGuard for _, s := range strings.Split(os.Getenv("GO_GCFLAGS"), " ") { if s == "-N" { - size += 880 + size += 896 } } }