فهرست منبع

Dump goroutine diagnostics on Controller shutdown hang

Rod Hynes 10 ماه پیش
والد
کامیت
0b98a5db6d
3فایلهای تغییر یافته به همراه71 افزوده شده و 1 حذف شده
  1. 6 0
      psiphon/config.go
  2. 28 1
      psiphon/controller.go
  3. 37 0
      psiphon/notice.go

+ 6 - 0
psiphon/config.go

@@ -681,6 +681,12 @@ type Config struct {
 	// showing proxy connection information and bytes transferred.
 	EmitInproxyProxyActivity bool
 
+	// ShutdownGoroutineProfileDeadlineSeconds is the number of seconds to
+	// await shutdown in Controller.Run before dumping a goroutine profile to
+	// diagnostics. Specify 0 to disable. When not specified, the default is
+	// 60 seconds.
+	ShutdownGoroutineProfileDeadlineSeconds *int
+
 	//
 	// The following parameters are deprecated.
 	//

+ 28 - 1
psiphon/controller.go

@@ -30,6 +30,7 @@ import (
 	"math/rand"
 	"net"
 	"runtime"
+	"runtime/pprof"
 	"sync"
 	"sync/atomic"
 	"time"
@@ -426,6 +427,31 @@ func (controller *Controller) Run(ctx context.Context) {
 	<-controller.runCtx.Done()
 	NoticeInfo("controller stopped")
 
+	// To assist with diagnosing unexpected shutdown hangs, log a goroutine
+	// profile if the wait operation runs over a deadline. This diagnostic
+	// goroutine is intentially not awaited on.
+	signalDoneShutdown := make(chan struct{})
+	go func() {
+		deadlineSeconds := 60
+		if controller.config.ShutdownGoroutineProfileDeadlineSeconds != nil {
+			deadlineSeconds = *controller.config.ShutdownGoroutineProfileDeadlineSeconds
+		}
+		if deadlineSeconds == 0 {
+			return
+		}
+		timer := time.NewTimer(time.Duration(deadlineSeconds) * time.Second)
+		defer timer.Stop()
+		select {
+		case <-signalDoneShutdown:
+			return
+		case <-timer.C:
+		}
+		pprof.Lookup("goroutine").WriteTo(
+			NewNoticeLineWriter("Goroutine"), 1)
+	}()
+
+	// Shutdown
+
 	if controller.packetTunnelClient != nil {
 		controller.packetTunnelClient.Stop()
 	}
@@ -437,9 +463,10 @@ func (controller *Controller) Run(ctx context.Context) {
 	// workers such as fetch remote server list and untunneled uprade
 	// download -- operate with the controller run context and will all
 	// be interrupted when the run context is done.
-
 	controller.runWaitGroup.Wait()
 
+	close(signalDoneShutdown)
+
 	NoticeInfo("exiting controller")
 
 	NoticeExiting()

+ 37 - 0
psiphon/notice.go

@@ -1321,6 +1321,43 @@ func (writer *NoticeWriter) Write(p []byte) (n int, err error) {
 	return len(p), nil
 }
 
+// NoticeLineWriter implements io.Writer and emits the contents of Write calls
+// as Notices. NoticeLineWriter buffers writes and emits a notice for each
+// complete, newline delimited line. Tab characters are replaced with spaces.
+type NoticeLineWriter struct {
+	noticeType string
+	lineBuffer strings.Builder
+}
+
+// NoticeLineWriter initializes a new NoticeLineWriter
+func NewNoticeLineWriter(noticeType string) *NoticeLineWriter {
+	return &NoticeLineWriter{noticeType: noticeType}
+}
+
+// Write implements io.Writer.
+func (writer *NoticeLineWriter) Write(p []byte) (n int, err error) {
+
+	str := strings.ReplaceAll(string(p), "\t", "    ")
+
+	for {
+		before, after, found := strings.Cut(str, "\n")
+		writer.lineBuffer.WriteString(before)
+		if !found {
+			return len(p), nil
+		}
+		singletonNoticeLogger.outputNotice(
+			writer.noticeType, noticeIsDiagnostic,
+			"message", writer.lineBuffer.String())
+		writer.lineBuffer.Reset()
+		if len(after) == 0 {
+			break
+		}
+		str = after
+	}
+
+	return len(p), nil
+}
+
 // NoticeCommonLogger maps the common.Logger interface to the notice facility.
 // This is used to make the notice facility available to other packages that
 // don't import the "psiphon" package.