Procházet zdrojové kódy

Add option to dump profiles when load limiting with a low established client count

Rod Hynes před 6 roky
rodič
revize
55e413d49a
3 změnil soubory, kde provedl 74 přidání a 18 odebrání
  1. 48 16
      psiphon/server/config.go
  2. 13 2
      psiphon/server/services.go
  3. 13 0
      psiphon/server/tunnelServer.go

+ 48 - 16
psiphon/server/config.go

@@ -31,6 +31,7 @@ import (
 	"net"
 	"net"
 	"strconv"
 	"strconv"
 	"strings"
 	"strings"
+	"sync/atomic"
 	"time"
 	"time"
 
 
 	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common"
 	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common"
@@ -45,19 +46,20 @@ import (
 )
 )
 
 
 const (
 const (
-	SERVER_CONFIG_FILENAME               = "psiphond.config"
-	SERVER_TRAFFIC_RULES_CONFIG_FILENAME = "psiphond-traffic-rules.config"
-	SERVER_OSL_CONFIG_FILENAME           = "psiphond-osl.config"
-	SERVER_TACTICS_CONFIG_FILENAME       = "psiphond-tactics.config"
-	SERVER_ENTRY_FILENAME                = "server-entry.dat"
-	DEFAULT_SERVER_IP_ADDRESS            = "127.0.0.1"
-	WEB_SERVER_SECRET_BYTE_LENGTH        = 32
-	DISCOVERY_VALUE_KEY_BYTE_LENGTH      = 32
-	SSH_USERNAME_SUFFIX_BYTE_LENGTH      = 8
-	SSH_PASSWORD_BYTE_LENGTH             = 32
-	SSH_RSA_HOST_KEY_BITS                = 2048
-	SSH_OBFUSCATED_KEY_BYTE_LENGTH       = 32
-	PERIODIC_GARBAGE_COLLECTION          = 120 * time.Second
+	SERVER_CONFIG_FILENAME                              = "psiphond.config"
+	SERVER_TRAFFIC_RULES_CONFIG_FILENAME                = "psiphond-traffic-rules.config"
+	SERVER_OSL_CONFIG_FILENAME                          = "psiphond-osl.config"
+	SERVER_TACTICS_CONFIG_FILENAME                      = "psiphond-tactics.config"
+	SERVER_ENTRY_FILENAME                               = "server-entry.dat"
+	DEFAULT_SERVER_IP_ADDRESS                           = "127.0.0.1"
+	WEB_SERVER_SECRET_BYTE_LENGTH                       = 32
+	DISCOVERY_VALUE_KEY_BYTE_LENGTH                     = 32
+	SSH_USERNAME_SUFFIX_BYTE_LENGTH                     = 8
+	SSH_PASSWORD_BYTE_LENGTH                            = 32
+	SSH_RSA_HOST_KEY_BITS                               = 2048
+	SSH_OBFUSCATED_KEY_BYTE_LENGTH                      = 32
+	PERIODIC_GARBAGE_COLLECTION                         = 120 * time.Second
+	STOP_ESTABLISH_TUNNELS_ESTABLISHED_CLIENT_THRESHOLD = 20
 )
 )
 
 
 // Config specifies the configuration and behavior of a Psiphon
 // Config specifies the configuration and behavior of a Psiphon
@@ -315,6 +317,15 @@ type Config struct {
 	// PERIODIC_GARBAGE_COLLECTION.
 	// PERIODIC_GARBAGE_COLLECTION.
 	PeriodicGarbageCollectionSeconds *int
 	PeriodicGarbageCollectionSeconds *int
 
 
+	// StopEstablishTunnelsEstablishedClientThreshold sets the established client
+	// threshold for dumping profiles when SIGTSTP is signaled. When there are
+	// less than or equal to the threshold number of established clients,
+	// profiles are dumped to aid investigating unusual load limited states that
+	// occur when few clients are connected and load should be relatively low. A
+	// profile dump is attempted at most once per process lifetime, the first
+	// time the threshold is met. Disabled when < 0.
+	StopEstablishTunnelsEstablishedClientThreshold *int
+
 	// AccessControlVerificationKeyRing is the access control authorization
 	// AccessControlVerificationKeyRing is the access control authorization
 	// verification key ring used to verify signed authorizations presented
 	// verification key ring used to verify signed authorizations presented
 	// by clients. Verified, active (unexpired) access control types will be
 	// by clients. Verified, active (unexpired) access control types will be
@@ -351,9 +362,11 @@ type Config struct {
 	// entries are stored on a Psiphon server.
 	// entries are stored on a Psiphon server.
 	OwnEncodedServerEntries map[string]string
 	OwnEncodedServerEntries map[string]string
 
 
-	sshBeginHandshakeTimeout  time.Duration
-	sshHandshakeTimeout       time.Duration
-	periodicGarbageCollection time.Duration
+	sshBeginHandshakeTimeout                       time.Duration
+	sshHandshakeTimeout                            time.Duration
+	periodicGarbageCollection                      time.Duration
+	stopEstablishTunnelsEstablishedClientThreshold int
+	dumpProfilesOnStopEstablishTunnelsDone         int32
 }
 }
 
 
 // RunWebServer indicates whether to run a web server component.
 // RunWebServer indicates whether to run a web server component.
@@ -371,6 +384,20 @@ func (config *Config) RunPeriodicGarbageCollection() bool {
 	return config.periodicGarbageCollection > 0
 	return config.periodicGarbageCollection > 0
 }
 }
 
 
+// DumpProfilesOnStopEstablishTunnels indicates whether dump profiles due to
+// an unexpectedly low number of established clients during high load.
+func (config *Config) DumpProfilesOnStopEstablishTunnels(establishedClientsCount int) bool {
+	if config.stopEstablishTunnelsEstablishedClientThreshold < 0 {
+		return false
+	}
+	if atomic.LoadInt32(&config.dumpProfilesOnStopEstablishTunnelsDone) != 0 {
+		return false
+	}
+	dump := (establishedClientsCount <= config.stopEstablishTunnelsEstablishedClientThreshold)
+	atomic.StoreInt32(&config.dumpProfilesOnStopEstablishTunnelsDone, 1)
+	return dump
+}
+
 // GetOwnEncodedServerEntry returns one of the server's own server entries, as
 // GetOwnEncodedServerEntry returns one of the server's own server entries, as
 // identified by the server entry tag.
 // identified by the server entry tag.
 func (config *Config) GetOwnEncodedServerEntry(serverEntryTag string) (string, bool) {
 func (config *Config) GetOwnEncodedServerEntry(serverEntryTag string) (string, bool) {
@@ -493,6 +520,11 @@ func LoadConfig(configJSON []byte) (*Config, error) {
 		config.periodicGarbageCollection = time.Duration(*config.PeriodicGarbageCollectionSeconds) * time.Second
 		config.periodicGarbageCollection = time.Duration(*config.PeriodicGarbageCollectionSeconds) * time.Second
 	}
 	}
 
 
+	config.stopEstablishTunnelsEstablishedClientThreshold = STOP_ESTABLISH_TUNNELS_ESTABLISHED_CLIENT_THRESHOLD
+	if config.StopEstablishTunnelsEstablishedClientThreshold != nil {
+		config.stopEstablishTunnelsEstablishedClientThreshold = *config.StopEstablishTunnelsEstablishedClientThreshold
+	}
+
 	err = accesscontrol.ValidateVerificationKeyRing(&config.AccessControlVerificationKeyRing)
 	err = accesscontrol.ValidateVerificationKeyRing(&config.AccessControlVerificationKeyRing)
 	if err != nil {
 	if err != nil {
 		return nil, errors.Tracef(
 		return nil, errors.Tracef(

+ 13 - 2
psiphon/server/services.go

@@ -224,6 +224,16 @@ loop:
 		case <-stopEstablishingTunnelsSignal:
 		case <-stopEstablishingTunnelsSignal:
 			tunnelServer.SetEstablishTunnels(false)
 			tunnelServer.SetEstablishTunnels(false)
 
 
+			if config.DumpProfilesOnStopEstablishTunnels(
+				tunnelServer.GetEstablishedClientCount()) {
+
+				// Run the profile dump in a goroutine and don't block this loop. Shutdown
+				// doesn't wait for any running outputProcessProfiles to complete.
+				go func() {
+					outputProcessProfiles(supportServices.Config, "stop_establish_tunnels")
+				}()
+			}
+
 		case <-resumeEstablishingTunnelsSignal:
 		case <-resumeEstablishingTunnelsSignal:
 			tunnelServer.SetEstablishTunnels(true)
 			tunnelServer.SetEstablishTunnels(true)
 
 
@@ -251,8 +261,9 @@ loop:
 	}
 	}
 
 
 	// During any delayed or hung shutdown, periodically dump profiles to help
 	// During any delayed or hung shutdown, periodically dump profiles to help
-	// diagnose the cause.
-	signalProfileDumperStop := make(chan struct{}, 1)
+	// diagnose the cause. Shutdown doesn't wait for any running
+	// outputProcessProfiles to complete.
+	signalProfileDumperStop := make(chan struct{})
 	go func() {
 	go func() {
 		tickSeconds := 10
 		tickSeconds := 10
 		ticker := time.NewTicker(time.Duration(tickSeconds) * time.Second)
 		ticker := time.NewTicker(time.Duration(tickSeconds) * time.Second)

+ 13 - 0
psiphon/server/tunnelServer.go

@@ -258,6 +258,12 @@ func (server *TunnelServer) GetLoadStats() (ProtocolStats, RegionStats) {
 	return server.sshServer.getLoadStats()
 	return server.sshServer.getLoadStats()
 }
 }
 
 
+// GetEstablishedClientCount returns the number of currently established
+// clients.
+func (server *TunnelServer) GetEstablishedClientCount() int {
+	return server.sshServer.getEstablishedClientCount()
+}
+
 // ResetAllClientTrafficRules resets all established client traffic rules
 // ResetAllClientTrafficRules resets all established client traffic rules
 // to use the latest config and client properties. Any existing traffic
 // to use the latest config and client properties. Any existing traffic
 // rule state is lost, including throttling state.
 // rule state is lost, including throttling state.
@@ -775,6 +781,13 @@ func (sshServer *sshServer) getLoadStats() (ProtocolStats, RegionStats) {
 	return protocolStats, regionStats
 	return protocolStats, regionStats
 }
 }
 
 
+func (sshServer *sshServer) getEstablishedClientCount() int {
+	sshServer.clientsMutex.Lock()
+	defer sshServer.clientsMutex.Unlock()
+	establishedClients := len(sshServer.clients)
+	return establishedClients
+}
+
 func (sshServer *sshServer) resetAllClientTrafficRules() {
 func (sshServer *sshServer) resetAllClientTrafficRules() {
 
 
 	sshServer.clientsMutex.Lock()
 	sshServer.clientsMutex.Lock()