Эх сурвалжийг харах

Merge pull request #262 from rod-hynes/master

psiphond: logging and profiling changes
Rod Hynes 9 жил өмнө
parent
commit
d00098c20c

+ 1 - 1
psiphon/controller_test.go

@@ -764,7 +764,7 @@ func (TestHostNameTransformer) TransformHostName(string) (string, bool) {
 func fetchAndVerifyWebsite(t *testing.T, httpProxyPort int) {
 func fetchAndVerifyWebsite(t *testing.T, httpProxyPort int) {
 
 
 	testUrl := "https://raw.githubusercontent.com/Psiphon-Labs/psiphon-tunnel-core/master/LICENSE"
 	testUrl := "https://raw.githubusercontent.com/Psiphon-Labs/psiphon-tunnel-core/master/LICENSE"
-	roundTripTimeout := 10 * time.Second
+	roundTripTimeout := 30 * time.Second
 	expectedResponsePrefix := "                    GNU GENERAL PUBLIC LICENSE"
 	expectedResponsePrefix := "                    GNU GENERAL PUBLIC LICENSE"
 	expectedResponseSize := 35148
 	expectedResponseSize := 35148
 	checkResponse := func(responseBody string) bool {
 	checkResponse := func(responseBody string) bool {

+ 8 - 0
psiphon/server/config.go

@@ -213,6 +213,14 @@ type Config struct {
 	// the default profiles here: https://golang.org/pkg/runtime/pprof/#Profile.
 	// the default profiles here: https://golang.org/pkg/runtime/pprof/#Profile.
 	ProcessProfileOutputDirectory string
 	ProcessProfileOutputDirectory string
 
 
+	// ProcessBlockProfileDurationSeconds specifies the sample duration for
+	// "block" profiling. For the default, 0, no "block" profile is taken.
+	ProcessBlockProfileDurationSeconds int
+
+	// ProcessCPUProfileDurationSeconds specifies the sample duration for
+	// CPU profiling. For the default, 0, no CPU profile is taken.
+	ProcessCPUProfileDurationSeconds int
+
 	// TrafficRulesFilename is the path of a file containing a
 	// TrafficRulesFilename is the path of a file containing a
 	// JSON-encoded TrafficRulesSet, the traffic rules to apply to
 	// JSON-encoded TrafficRulesSet, the traffic rules to apply to
 	// Psiphon client tunnels.
 	// Psiphon client tunnels.

+ 8 - 0
psiphon/server/log.go

@@ -23,6 +23,8 @@ import (
 	"encoding/json"
 	"encoding/json"
 	"fmt"
 	"fmt"
 	"io"
 	"io"
+	"io/ioutil"
+	go_log "log"
 	"os"
 	"os"
 
 
 	"github.com/Psiphon-Inc/logrus"
 	"github.com/Psiphon-Inc/logrus"
@@ -170,6 +172,12 @@ func InitLogging(config *Config) error {
 }
 }
 
 
 func init() {
 func init() {
+
+	// Suppress standard "log" package logging performed by other packages.
+	// For example, "net/http" logs messages such as:
+	// "http: TLS handshake error from <client-ip-addr>:<port>: [...]: i/o timeout"
+	go_log.SetOutput(ioutil.Discard)
+
 	log = &ContextLogger{
 	log = &ContextLogger{
 		&logrus.Logger{
 		&logrus.Logger{
 			Out:       os.Stderr,
 			Out:       os.Stderr,

+ 1 - 1
psiphon/server/server_test.go

@@ -434,7 +434,7 @@ func makeTunneledWebRequest(t *testing.T, localHTTPProxyPort int) error {
 func makeTunneledNTPRequest(t *testing.T, localSOCKSProxyPort int, udpgwServerAddress string) error {
 func makeTunneledNTPRequest(t *testing.T, localSOCKSProxyPort int, udpgwServerAddress string) error {
 
 
 	testHostname := "pool.ntp.org"
 	testHostname := "pool.ntp.org"
-	timeout := 5 * time.Second
+	timeout := 10 * time.Second
 
 
 	localUDPProxyAddress, err := net.ResolveUDPAddr("udp", "127.0.0.1:7301")
 	localUDPProxyAddress, err := net.ResolveUDPAddr("udp", "127.0.0.1:7301")
 	if err != nil {
 	if err != nil {

+ 86 - 15
psiphon/server/services.go

@@ -115,6 +115,21 @@ func RunServices(configJSON []byte) error {
 		}
 		}
 	}()
 	}()
 
 
+	// Shutdown doesn't wait for the outputProcessProfiles goroutine
+	// to complete, as it may be sleeping while running a "block" or
+	// CPU profile.
+	signalProcessProfiles := make(chan struct{}, 1)
+	go func() {
+		for {
+			select {
+			case <-signalProcessProfiles:
+				outputProcessProfiles(supportServices.Config)
+			case <-shutdownBroadcast:
+				return
+			}
+		}
+	}()
+
 	// An OS signal triggers an orderly shutdown
 	// An OS signal triggers an orderly shutdown
 	systemStopSignal := make(chan os.Signal, 1)
 	systemStopSignal := make(chan os.Signal, 1)
 	signal.Notify(systemStopSignal, os.Interrupt, os.Kill, syscall.SIGTERM)
 	signal.Notify(systemStopSignal, os.Interrupt, os.Kill, syscall.SIGTERM)
@@ -123,7 +138,7 @@ func RunServices(configJSON []byte) error {
 	reloadSupportServicesSignal := make(chan os.Signal, 1)
 	reloadSupportServicesSignal := make(chan os.Signal, 1)
 	signal.Notify(reloadSupportServicesSignal, syscall.SIGUSR1)
 	signal.Notify(reloadSupportServicesSignal, syscall.SIGUSR1)
 
 
-	// SIGUSR2 triggers an immediate load log and optional profile dump
+	// SIGUSR2 triggers an immediate load log and optional process profile output
 	logServerLoadSignal := make(chan os.Signal, 1)
 	logServerLoadSignal := make(chan os.Signal, 1)
 	signal.Notify(logServerLoadSignal, syscall.SIGUSR2)
 	signal.Notify(logServerLoadSignal, syscall.SIGUSR2)
 
 
@@ -139,9 +154,13 @@ loop:
 			tunnelServer.ResetAllClientTrafficRules()
 			tunnelServer.ResetAllClientTrafficRules()
 
 
 		case <-logServerLoadSignal:
 		case <-logServerLoadSignal:
-			// Profiles are dumped first to ensure some diagnostics are
-			// available in case logServerLoad deadlocks.
-			dumpProcessProfiles(supportServices.Config)
+			// Signal profiles writes first to ensure some diagnostics are
+			// available in case logServerLoad hangs (which has happened
+			// in the past due to a deadlock bug).
+			select {
+			case signalProcessProfiles <- *new(struct{}):
+			default:
+			}
 			logServerLoad(tunnelServer)
 			logServerLoad(tunnelServer)
 
 
 		case <-systemStopSignal:
 		case <-systemStopSignal:
@@ -160,24 +179,33 @@ loop:
 	return err
 	return err
 }
 }
 
 
-func dumpProcessProfiles(config *Config) {
+func outputProcessProfiles(config *Config) {
 
 
 	if config.ProcessProfileOutputDirectory != "" {
 	if config.ProcessProfileOutputDirectory != "" {
 
 
-		for _, profileName := range []string{
-			"goroutine", "heap", "threadcreate", "block"} {
-
+		openProfileFile := func(profileName string) *os.File {
 			fileName := filepath.Join(
 			fileName := filepath.Join(
 				config.ProcessProfileOutputDirectory, profileName+".profile")
 				config.ProcessProfileOutputDirectory, profileName+".profile")
-
-			writer, err := os.OpenFile(
+			file, err := os.OpenFile(
 				fileName, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0666)
 				fileName, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0666)
-
-			if err == nil {
-				err = pprof.Lookup(profileName).WriteTo(writer, 1)
-				writer.Close()
+			if err != nil {
+				log.WithContextFields(
+					LogFields{
+						"error":    err,
+						"fileName": fileName}).Error("open profile file failed")
+				return nil
 			}
 			}
+			return file
+		}
 
 
+		writeProfile := func(profileName string) {
+
+			file := openProfileFile(profileName)
+			if file == nil {
+				return
+			}
+			err := pprof.Lookup(profileName).WriteTo(file, 1)
+			file.Close()
 			if err != nil {
 			if err != nil {
 				log.WithContextFields(
 				log.WithContextFields(
 					LogFields{
 					LogFields{
@@ -185,8 +213,51 @@ func dumpProcessProfiles(config *Config) {
 						"profileName": profileName}).Error("write profile failed")
 						"profileName": profileName}).Error("write profile failed")
 			}
 			}
 		}
 		}
-	}
 
 
+		// TODO: capture https://golang.org/pkg/runtime/debug/#WriteHeapDump?
+		// May not be useful in its current state, as per:
+		// https://groups.google.com/forum/#!topic/golang-dev/cYAkuU45Qyw
+
+		// Write goroutine, heap, and threadcreate profiles
+		// https://golang.org/pkg/runtime/pprof/#Profile
+		writeProfile("goroutine")
+		writeProfile("heap")
+		writeProfile("threadcreate")
+
+		// Write block profile (after sampling)
+		// https://golang.org/pkg/runtime/pprof/#Profile
+
+		if config.ProcessBlockProfileDurationSeconds > 0 {
+			log.WithContext().Info("start block profiling")
+			runtime.SetBlockProfileRate(1)
+			time.Sleep(
+				time.Duration(config.ProcessBlockProfileDurationSeconds) * time.Second)
+			runtime.SetBlockProfileRate(0)
+			log.WithContext().Info("end block profiling")
+			writeProfile("block")
+		}
+
+		// Write CPU profile (after sampling)
+		// https://golang.org/pkg/runtime/pprof/#StartCPUProfile
+
+		if config.ProcessCPUProfileDurationSeconds > 0 {
+			file := openProfileFile("cpu")
+			if file != nil {
+				log.WithContext().Info("start cpu profiling")
+				err := pprof.StartCPUProfile(file)
+				if err != nil {
+					log.WithContextFields(
+						LogFields{"error": err}).Error("StartCPUProfile failed")
+				} else {
+					time.Sleep(time.Duration(
+						config.ProcessCPUProfileDurationSeconds) * time.Second)
+					pprof.StopCPUProfile()
+					log.WithContext().Info("end cpu profiling")
+				}
+				file.Close()
+			}
+		}
+	}
 }
 }
 
 
 func logServerLoad(server *TunnelServer) {
 func logServerLoad(server *TunnelServer) {

+ 4 - 1
psiphon/server/tunnelServer.go

@@ -847,12 +847,15 @@ func (sshClient *sshClient) runClient(
 }
 }
 
 
 func (sshClient *sshClient) rejectNewChannel(newChannel ssh.NewChannel, reason ssh.RejectionReason, logMessage string) {
 func (sshClient *sshClient) rejectNewChannel(newChannel ssh.NewChannel, reason ssh.RejectionReason, logMessage string) {
+
+	// Note: Debug level, as logMessage may contain user traffic destination address information
 	log.WithContextFields(
 	log.WithContextFields(
 		LogFields{
 		LogFields{
 			"channelType":  newChannel.ChannelType(),
 			"channelType":  newChannel.ChannelType(),
 			"logMessage":   logMessage,
 			"logMessage":   logMessage,
 			"rejectReason": reason.String(),
 			"rejectReason": reason.String(),
-		}).Warning("reject new channel")
+		}).Debug("reject new channel")
+
 	// Note: logMessage is internal, for logging only; just the RejectionReason is sent to the client
 	// Note: logMessage is internal, for logging only; just the RejectionReason is sent to the client
 	newChannel.Reject(reason, reason.String())
 	newChannel.Reject(reason, reason.String())
 }
 }