Browse Source

Merge pull request #544 from rod-hynes/master

New metrics, rule filtering, and QUIC timer fixes
Rod Hynes 6 years ago
parent
commit
f069459d82

+ 11 - 9
psiphon/common/parameters/clientParameters.go

@@ -140,6 +140,7 @@ const (
 	SSHKeepAlivePeriodicInactivePeriod               = "SSHKeepAlivePeriodicInactivePeriod"
 	SSHKeepAliveProbeTimeout                         = "SSHKeepAliveProbeTimeout"
 	SSHKeepAliveProbeInactivePeriod                  = "SSHKeepAliveProbeInactivePeriod"
+	SSHKeepAliveNetworkConnectivityPollingPeriod     = "SSHKeepAliveNetworkConnectivityPollingPeriod"
 	HTTPProxyOriginServerTimeout                     = "HTTPProxyOriginServerTimeout"
 	HTTPProxyMaxIdleConnectionsPerHost               = "HTTPProxyMaxIdleConnectionsPerHost"
 	FetchRemoteServerListTimeout                     = "FetchRemoteServerListTimeout"
@@ -351,15 +352,16 @@ var defaultClientParameters = map[string]struct {
 	// The Psiphon server times out inactive tunnels after 5 minutes, so this
 	// is a soft max for SSHKeepAlivePeriodMax.
 
-	SSHKeepAliveSpeedTestSampleProbability: {value: 0.5, minimum: 0.0},
-	SSHKeepAlivePaddingMinBytes:            {value: 0, minimum: 0},
-	SSHKeepAlivePaddingMaxBytes:            {value: 256, minimum: 0},
-	SSHKeepAlivePeriodMin:                  {value: 1 * time.Minute, minimum: 1 * time.Second},
-	SSHKeepAlivePeriodMax:                  {value: 2 * time.Minute, minimum: 1 * time.Second},
-	SSHKeepAlivePeriodicTimeout:            {value: 30 * time.Second, minimum: 1 * time.Second, flags: useNetworkLatencyMultiplier},
-	SSHKeepAlivePeriodicInactivePeriod:     {value: 10 * time.Second, minimum: 1 * time.Second},
-	SSHKeepAliveProbeTimeout:               {value: 5 * time.Second, minimum: 1 * time.Second, flags: useNetworkLatencyMultiplier},
-	SSHKeepAliveProbeInactivePeriod:        {value: 10 * time.Second, minimum: 1 * time.Second},
+	SSHKeepAliveSpeedTestSampleProbability:       {value: 0.5, minimum: 0.0},
+	SSHKeepAlivePaddingMinBytes:                  {value: 0, minimum: 0},
+	SSHKeepAlivePaddingMaxBytes:                  {value: 256, minimum: 0},
+	SSHKeepAlivePeriodMin:                        {value: 1 * time.Minute, minimum: 1 * time.Second},
+	SSHKeepAlivePeriodMax:                        {value: 2 * time.Minute, minimum: 1 * time.Second},
+	SSHKeepAlivePeriodicTimeout:                  {value: 30 * time.Second, minimum: 1 * time.Second, flags: useNetworkLatencyMultiplier},
+	SSHKeepAlivePeriodicInactivePeriod:           {value: 10 * time.Second, minimum: 1 * time.Second},
+	SSHKeepAliveProbeTimeout:                     {value: 5 * time.Second, minimum: 1 * time.Second, flags: useNetworkLatencyMultiplier},
+	SSHKeepAliveProbeInactivePeriod:              {value: 10 * time.Second, minimum: 1 * time.Second},
+	SSHKeepAliveNetworkConnectivityPollingPeriod: {value: 500 * time.Millisecond, minimum: 1 * time.Millisecond},
 
 	HTTPProxyOriginServerTimeout:       {value: 15 * time.Second, minimum: time.Duration(0), flags: useNetworkLatencyMultiplier},
 	HTTPProxyMaxIdleConnectionsPerHost: {value: 50, minimum: 0},

+ 23 - 0
psiphon/common/tactics/tactics.go

@@ -269,6 +269,9 @@ type Filter struct {
 	// ISPs specifies a list of GeoIP ISPs the client must match.
 	ISPs []string
 
+	// Cities specifies a list of GeoIP Cities the client must match.
+	Cities []string
+
 	// APIParameters specifies API, e.g. handshake, parameter names and
 	// a list of values, one of which must be specified to match this
 	// filter. Only scalar string API parameters may be filtered.
@@ -281,6 +284,7 @@ type Filter struct {
 
 	regionLookup map[string]bool
 	ispLookup    map[string]bool
+	cityLookup   map[string]bool
 }
 
 // Range is a filter field which specifies that the aggregation of
@@ -594,6 +598,13 @@ func (server *Server) initLookups() {
 			}
 		}
 
+		if len(filteredTactics.Filter.Cities) >= stringLookupThreshold {
+			filteredTactics.Filter.cityLookup = make(map[string]bool)
+			for _, city := range filteredTactics.Filter.Cities {
+				filteredTactics.Filter.cityLookup[city] = true
+			}
+		}
+
 		// TODO: add lookups for APIParameters?
 		// Not expected to be long lists of values.
 	}
@@ -715,6 +726,18 @@ func (server *Server) GetTactics(
 			}
 		}
 
+		if len(filteredTactics.Filter.Cities) > 0 {
+			if filteredTactics.Filter.cityLookup != nil {
+				if !filteredTactics.Filter.cityLookup[geoIPData.City] {
+					continue
+				}
+			} else {
+				if !common.Contains(filteredTactics.Filter.Cities, geoIPData.City) {
+					continue
+				}
+			}
+		}
+
 		if filteredTactics.Filter.APIParameters != nil {
 			mismatch := false
 			for name, values := range filteredTactics.Filter.APIParameters {

+ 27 - 5
psiphon/common/tactics/tactics_test.go

@@ -101,7 +101,9 @@ func TestTactics(t *testing.T) {
         },
         {
           "Filter" : {
-            "Regions": ["R7"]
+            "Regions": ["R7"],
+            "ISPs": ["I1"],
+            "Cities": ["C1"]
           },
           "Tactics" : {
             "Parameters" : {
@@ -135,8 +137,18 @@ func TestTactics(t *testing.T) {
 	expectedApplyCount := 3
 
 	listenerProtocol := "OSSH"
-	listenerFragmentedGeoIP := func(string) common.GeoIPData { return common.GeoIPData{Country: "R7"} }
-	listenerUnfragmentedGeoIP := func(string) common.GeoIPData { return common.GeoIPData{Country: "R8"} }
+	listenerFragmentedGeoIP := func(string) common.GeoIPData {
+		return common.GeoIPData{Country: "R7", ISP: "I1", City: "C1"}
+	}
+	listenerUnfragmentedGeoIPWrongRegion := func(string) common.GeoIPData {
+		return common.GeoIPData{Country: "R8", ISP: "I1", City: "C1"}
+	}
+	listenerUnfragmentedGeoIPWrongISP := func(string) common.GeoIPData {
+		return common.GeoIPData{Country: "R7", ISP: "I2", City: "C1"}
+	}
+	listenerUnfragmentedGeoIPWrongCity := func(string) common.GeoIPData {
+		return common.GeoIPData{Country: "R7", ISP: "I1", City: "C2"}
+	}
 
 	tacticsConfig := fmt.Sprintf(
 		tacticsConfigTemplate,
@@ -754,8 +766,18 @@ func TestTactics(t *testing.T) {
 			true,
 		},
 		{
-			"unfragmented",
-			listenerUnfragmentedGeoIP,
+			"unfragmented-region",
+			listenerUnfragmentedGeoIPWrongRegion,
+			false,
+		},
+		{
+			"unfragmented-ISP",
+			listenerUnfragmentedGeoIPWrongISP,
+			false,
+		},
+		{
+			"unfragmented-city",
+			listenerUnfragmentedGeoIPWrongCity,
 			false,
 		},
 	}

+ 9 - 5
psiphon/server/geoip.go

@@ -72,15 +72,19 @@ func NewGeoIPData() GeoIPData {
 // SetLogFields adds the GeoIPData fields to LogFields, following Psiphon
 // metric field name and format conventions.
 func (g GeoIPData) SetLogFields(logFields LogFields) {
+	g.SetLogFieldsWithPrefix("", logFields)
+}
+
+func (g GeoIPData) SetLogFieldsWithPrefix(prefix string, logFields LogFields) {
 
 	// In psi_web, the space replacement was done to accommodate space
 	// delimited logging, which is no longer required; we retain the
 	// transformation so that stats aggregation isn't impacted.
-	logFields["client_region"] = strings.Replace(g.Country, " ", "_", -1)
-	logFields["client_city"] = strings.Replace(g.City, " ", "_", -1)
-	logFields["client_isp"] = strings.Replace(g.ISP, " ", "_", -1)
-	logFields["client_asn"] = strings.Replace(g.ASN, " ", "_", -1)
-	logFields["client_aso"] = strings.Replace(g.ASO, " ", "_", -1)
+	logFields[prefix+"client_region"] = strings.Replace(g.Country, " ", "_", -1)
+	logFields[prefix+"client_city"] = strings.Replace(g.City, " ", "_", -1)
+	logFields[prefix+"client_isp"] = strings.Replace(g.ISP, " ", "_", -1)
+	logFields[prefix+"client_asn"] = strings.Replace(g.ASN, " ", "_", -1)
+	logFields[prefix+"client_aso"] = strings.Replace(g.ASO, " ", "_", -1)
 }
 
 // GeoIPService implements GeoIP lookup and session/GeoIP caching.

+ 10 - 4
psiphon/server/meek.go

@@ -664,14 +664,14 @@ func (server *MeekServer) getSessionOrEndpoint(
 
 func (server *MeekServer) rateLimit(clientIP string) bool {
 
-	historySize, thresholdSeconds, regions, ISPs, GCTriggerCount, _ :=
+	historySize, thresholdSeconds, regions, ISPs, cities, GCTriggerCount, _ :=
 		server.support.TrafficRulesSet.GetMeekRateLimiterConfig()
 
 	if historySize == 0 {
 		return false
 	}
 
-	if len(regions) > 0 || len(ISPs) > 0 {
+	if len(regions) > 0 || len(ISPs) > 0 || len(cities) > 0 {
 
 		// TODO: avoid redundant GeoIP lookups?
 		geoIPData := server.support.GeoIPService.Lookup(clientIP)
@@ -687,6 +687,12 @@ func (server *MeekServer) rateLimit(clientIP string) bool {
 				return false
 			}
 		}
+
+		if len(cities) > 0 {
+			if !common.Contains(cities, geoIPData.City) {
+				return false
+			}
+		}
 	}
 
 	limit := true
@@ -738,7 +744,7 @@ func (server *MeekServer) rateLimit(clientIP string) bool {
 
 func (server *MeekServer) rateLimitWorker() {
 
-	_, _, _, _, _, reapFrequencySeconds :=
+	_, _, _, _, _, _, reapFrequencySeconds :=
 		server.support.TrafficRulesSet.GetMeekRateLimiterConfig()
 
 	timer := time.NewTimer(time.Duration(reapFrequencySeconds) * time.Second)
@@ -748,7 +754,7 @@ func (server *MeekServer) rateLimitWorker() {
 		select {
 		case <-timer.C:
 
-			_, thresholdSeconds, _, _, _, reapFrequencySeconds :=
+			_, thresholdSeconds, _, _, _, _, reapFrequencySeconds :=
 				server.support.TrafficRulesSet.GetMeekRateLimiterConfig()
 
 			server.rateLimitLock.Lock()

+ 34 - 2
psiphon/server/trafficRules.go

@@ -71,7 +71,7 @@ type TrafficRulesSet struct {
 	// not any meek request for an existing session, if the
 	// MeekRateLimiterHistorySize requests occur in
 	// MeekRateLimiterThresholdSeconds. The scope of rate limiting may be
-	// limited using LimitMeekRateLimiterRegions and LimitMeekRateLimiterISPs.
+	// limited using LimitMeekRateLimiterRegions/ISPs/Cities.
 	//
 	// Hot reloading a new history size will result in existing history being
 	// truncated.
@@ -93,6 +93,12 @@ type TrafficRulesSet struct {
 	// is applied to all client ISPs.
 	MeekRateLimiterISPs []string
 
+	// MeekRateLimiterCities, if set, limits application of the meek
+	// late-stage rate limiter to clients in the specified list of GeoIP
+	// cities. When omitted or empty, meek rate limiting, if configured,
+	// is applied to all client cities.
+	MeekRateLimiterCities []string
+
 	// MeekRateLimiterGarbageCollectionTriggerCount specifies the number of
 	// rate limit events after which garbage collection is manually triggered
 	// in order to reclaim memory used by rate limited and other rejected
@@ -125,6 +131,10 @@ type TrafficRulesFilter struct {
 	// match this filter. When omitted or empty, any client ISP matches.
 	ISPs []string
 
+	// Cities is a list of cities that the client must geolocate to in order to
+	// match this filter. When omitted or empty, any client city matches.
+	Cities []string
+
 	// APIProtocol specifies whether the client must use the SSH
 	// API protocol (when "ssh") or the web API protocol (when "web").
 	// When omitted or blank, any API protocol matches.
@@ -149,6 +159,7 @@ type TrafficRulesFilter struct {
 
 	regionLookup map[string]bool
 	ispLookup    map[string]bool
+	cityLookup   map[string]bool
 }
 
 // TrafficRules specify the limits placed on client traffic.
@@ -290,6 +301,7 @@ func NewTrafficRulesSet(filename string) (*TrafficRulesSet, error) {
 			set.MeekRateLimiterThresholdSeconds = newSet.MeekRateLimiterThresholdSeconds
 			set.MeekRateLimiterRegions = newSet.MeekRateLimiterRegions
 			set.MeekRateLimiterISPs = newSet.MeekRateLimiterISPs
+			set.MeekRateLimiterCities = newSet.MeekRateLimiterCities
 			set.MeekRateLimiterGarbageCollectionTriggerCount = newSet.MeekRateLimiterGarbageCollectionTriggerCount
 			set.MeekRateLimiterReapHistoryFrequencySeconds = newSet.MeekRateLimiterReapHistoryFrequencySeconds
 			set.DefaultRules = newSet.DefaultRules
@@ -432,6 +444,13 @@ func (set *TrafficRulesSet) initLookups() {
 				filter.ispLookup[ISP] = true
 			}
 		}
+
+		if len(filter.Cities) >= stringLookupThreshold {
+			filter.cityLookup = make(map[string]bool)
+			for _, city := range filter.Cities {
+				filter.cityLookup[city] = true
+			}
+		}
 	}
 
 	initTrafficRulesLookups(&set.DefaultRules)
@@ -579,6 +598,18 @@ func (set *TrafficRulesSet) GetTrafficRules(
 			}
 		}
 
+		if len(filteredRules.Filter.Cities) > 0 {
+			if filteredRules.Filter.cityLookup != nil {
+				if !filteredRules.Filter.cityLookup[geoIPData.City] {
+					continue
+				}
+			} else {
+				if !common.Contains(filteredRules.Filter.Cities, geoIPData.City) {
+					continue
+				}
+			}
+		}
+
 		if filteredRules.Filter.APIProtocol != "" {
 			if !state.completed {
 				continue
@@ -803,7 +834,7 @@ func (rules *TrafficRules) allowSubnet(remoteIP net.IP) bool {
 
 // GetMeekRateLimiterConfig gets a snapshot of the meek rate limiter
 // configuration values.
-func (set *TrafficRulesSet) GetMeekRateLimiterConfig() (int, int, []string, []string, int, int) {
+func (set *TrafficRulesSet) GetMeekRateLimiterConfig() (int, int, []string, []string, []string, int, int) {
 
 	set.ReloadableFile.RLock()
 	defer set.ReloadableFile.RUnlock()
@@ -823,6 +854,7 @@ func (set *TrafficRulesSet) GetMeekRateLimiterConfig() (int, int, []string, []st
 		set.MeekRateLimiterThresholdSeconds,
 		set.MeekRateLimiterRegions,
 		set.MeekRateLimiterISPs,
+		set.MeekRateLimiterCities,
 		GCTriggerCount,
 		reapFrequencySeconds
 }

+ 42 - 2
psiphon/server/tunnelServer.go

@@ -817,6 +817,10 @@ func (sshServer *sshServer) getLoadStats() (ProtocolStats, RegionStats) {
 				int64(client.qualityMetrics.TCPPortForwardFailedDuration / time.Millisecond)
 			stat["tcp_port_forward_rejected_dialing_limit_count"] +=
 				client.qualityMetrics.TCPPortForwardRejectedDialingLimitCount
+			stat["tcp_port_forward_rejected_disallowed_count"] +=
+				client.qualityMetrics.TCPPortForwardRejectedDisallowedCount
+			stat["udp_port_forward_rejected_disallowed_count"] +=
+				client.qualityMetrics.UDPPortForwardRejectedDisallowedCount
 
 			stat["tcp_ipv4_port_forward_dialed_count"] += client.qualityMetrics.TCPIPv4PortForwardDialedCount
 			stat["tcp_ipv4_port_forward_dialed_duration"] +=
@@ -838,6 +842,8 @@ func (sshServer *sshServer) getLoadStats() (ProtocolStats, RegionStats) {
 		client.qualityMetrics.TCPPortForwardFailedCount = 0
 		client.qualityMetrics.TCPPortForwardFailedDuration = 0
 		client.qualityMetrics.TCPPortForwardRejectedDialingLimitCount = 0
+		client.qualityMetrics.TCPPortForwardRejectedDisallowedCount = 0
+		client.qualityMetrics.UDPPortForwardRejectedDisallowedCount = 0
 
 		client.qualityMetrics.TCPIPv4PortForwardDialedCount = 0
 		client.qualityMetrics.TCPIPv4PortForwardDialedDuration = 0
@@ -1204,6 +1210,8 @@ type qualityMetrics struct {
 	TCPPortForwardFailedCount               int64
 	TCPPortForwardFailedDuration            time.Duration
 	TCPPortForwardRejectedDialingLimitCount int64
+	TCPPortForwardRejectedDisallowedCount   int64
+	UDPPortForwardRejectedDisallowedCount   int64
 	TCPIPv4PortForwardDialedCount           int64
 	TCPIPv4PortForwardDialedDuration        time.Duration
 	TCPIPv4PortForwardFailedCount           int64
@@ -2588,8 +2596,17 @@ func (sshClient *sshClient) setHandshakeState(
 		sessionID, ok := sshClient.sshServer.authorizationSessionIDs[authorizationID]
 		if ok && sessionID != sshClient.sessionID {
 
-			log.WithTraceFields(
-				LogFields{"authorizationID": authorizationID}).Warning("duplicate active authorization")
+			logFields := LogFields{
+				"event_name":                 "irregular_tunnel",
+				"tunnel_error":               "duplicate active authorization",
+				"duplicate_authorization_id": authorizationID,
+			}
+			sshClient.geoIPData.SetLogFields(logFields)
+			duplicateGeoIPData := sshClient.sshServer.support.GeoIPService.GetSessionCache(sessionID)
+			if duplicateGeoIPData != sshClient.geoIPData {
+				duplicateGeoIPData.SetLogFieldsWithPrefix("duplicate_authentication_", logFields)
+			}
+			log.LogRawFieldsWithTimestamp(logFields)
 
 			// Invoke asynchronously to avoid deadlocks.
 			// TODO: invoke only once for each distinct sessionID?
@@ -2894,6 +2911,13 @@ func (sshClient *sshClient) isPortForwardPermitted(
 		return true
 	}
 
+	switch portForwardType {
+	case portForwardTypeTCP:
+		sshClient.updateQualityMetricsWithTCPRejectedDisallowed()
+	case portForwardTypeUDP:
+		sshClient.updateQualityMetricsWithUDPRejectedDisallowed()
+	}
+
 	sshClient.enqueueDisallowedTrafficAlertRequest()
 
 	log.WithTraceFields(
@@ -3131,6 +3155,22 @@ func (sshClient *sshClient) updateQualityMetricsWithRejectedDialingLimit() {
 	sshClient.qualityMetrics.TCPPortForwardRejectedDialingLimitCount += 1
 }
 
+func (sshClient *sshClient) updateQualityMetricsWithTCPRejectedDisallowed() {
+
+	sshClient.Lock()
+	defer sshClient.Unlock()
+
+	sshClient.qualityMetrics.TCPPortForwardRejectedDisallowedCount += 1
+}
+
+func (sshClient *sshClient) updateQualityMetricsWithUDPRejectedDisallowed() {
+
+	sshClient.Lock()
+	defer sshClient.Unlock()
+
+	sshClient.qualityMetrics.UDPPortForwardRejectedDisallowedCount += 1
+}
+
 func (sshClient *sshClient) handleTCPChannel(
 	remainingDialTimeout time.Duration,
 	hostToConnect string,

+ 66 - 17
psiphon/tunnel.go

@@ -1262,13 +1262,30 @@ func (tunnel *Tunnel) operateTunnel(tunnelOwner TunnelOwner) {
 // closed, which will terminate the associated tunnel.
 func (tunnel *Tunnel) sendSshKeepAlive(isFirstPeriodicKeepAlive bool, timeout time.Duration) error {
 
-	// Note: there is no request context since SSH requests cannot be
-	// interrupted directly. Closing the tunnel will interrupt the request.
-	// A timeout is set to unblock this function, but the goroutine may
-	// not exit until the tunnel is closed.
+	p := tunnel.getCustomClientParameters()
 
-	// Use a buffer of 1 as there are two senders and only one guaranteed receive.
+	// Random padding to frustrate fingerprinting.
+	request := prng.Padding(
+		p.Int(parameters.SSHKeepAlivePaddingMinBytes),
+		p.Int(parameters.SSHKeepAlivePaddingMaxBytes))
+
+	speedTestSample := isFirstPeriodicKeepAlive
+	if !speedTestSample {
+		speedTestSample = p.WeightedCoinFlip(
+			parameters.SSHKeepAliveSpeedTestSampleProbability)
+	}
+
+	networkConnectivityPollPeriod := p.Duration(
+		parameters.SSHKeepAliveNetworkConnectivityPollingPeriod)
 
+	p.Close()
+
+	// Note: there is no request context since SSH requests cannot be interrupted
+	// directly. Closing the tunnel will interrupt the request. A timeout is set
+	// to unblock this function, but the goroutine may not exit until the tunnel
+	// is closed.
+
+	// Use a buffer of 1 as there are two senders and only one guaranteed receive.
 	errChannel := make(chan error, 1)
 
 	afterFunc := time.AfterFunc(timeout, func() {
@@ -1277,12 +1294,6 @@ func (tunnel *Tunnel) sendSshKeepAlive(isFirstPeriodicKeepAlive bool, timeout ti
 	defer afterFunc.Stop()
 
 	go func() {
-		// Random padding to frustrate fingerprinting.
-		p := tunnel.getCustomClientParameters()
-		request := prng.Padding(
-			p.Int(parameters.SSHKeepAlivePaddingMinBytes),
-			p.Int(parameters.SSHKeepAlivePaddingMaxBytes))
-		p.Close()
 
 		startTime := time.Now()
 
@@ -1302,10 +1313,7 @@ func (tunnel *Tunnel) sendSshKeepAlive(isFirstPeriodicKeepAlive bool, timeout ti
 		// only the last SpeedTestMaxSampleCount samples are retained, enables
 		// tuning the sampling frequency.
 
-		if err == nil && requestOk &&
-			(isFirstPeriodicKeepAlive ||
-				tunnel.getCustomClientParameters().WeightedCoinFlip(
-					parameters.SSHKeepAliveSpeedTestSampleProbability)) {
+		if err == nil && requestOk && speedTestSample {
 
 			err = tactics.AddSpeedTestSample(
 				tunnel.config.GetClientParameters(),
@@ -1322,13 +1330,54 @@ func (tunnel *Tunnel) sendSshKeepAlive(isFirstPeriodicKeepAlive bool, timeout ti
 		}
 	}()
 
-	err := <-errChannel
+	// While awaiting the response, poll the network connectivity state. If there
+	// is network connectivity, on the same network, for the entire duration of
+	// the keep alive request and the request fails, record a failed tunnel
+	// event.
+	//
+	// The network connectivity heuristic is intended to reduce the number of
+	// failed tunnels reported due to routine situations such as varying mobile
+	// network conditions. The polling may produce false positives if the network
+	// goes down and up between polling periods, or changes to a new network and
+	// back to the previous network between polling periods.
+	//
+	// For platforms that don't provide a NetworkConnectivityChecker, it is
+	// assumed that there is network connectivity.
+
+	ticker := time.NewTicker(networkConnectivityPollPeriod)
+	defer ticker.Stop()
+	continuousNetworkConnectivity := true
+	networkID := tunnel.config.GetNetworkID()
+
+	var err error
+loop:
+	for {
+		select {
+		case err = <-errChannel:
+			break loop
+		case <-ticker.C:
+			connectivityChecker := tunnel.config.NetworkConnectivityChecker
+			if (connectivityChecker != nil &&
+				connectivityChecker.HasNetworkConnectivity() != 1) ||
+				(networkID != tunnel.config.GetNetworkID()) {
+
+				continuousNetworkConnectivity = false
+			}
+		}
+	}
+
+	err = errors.Trace(err)
+
 	if err != nil {
 		tunnel.sshClient.Close()
 		tunnel.conn.Close()
+
+		if continuousNetworkConnectivity {
+			_ = RecordFailedTunnelStat(tunnel.config, tunnel.dialParams, err)
+		}
 	}
 
-	return errors.Trace(err)
+	return err
 }
 
 // sendStats is a helper for sending session stats to the server.

+ 7 - 0
vendor/github.com/Psiphon-Labs/quic-go/server.go

@@ -455,6 +455,13 @@ func (s *baseServer) createNewSession(
 	// We're already keeping track of this connection ID.
 	// This might happen if we receive two copies of the Initial at the same time.
 	if !added {
+
+		// [Psiphon]
+		// Stop timer to release resources
+		if s, ok := sess.(*session); ok {
+			s.timer.Reset(time.Time{})
+		}
+
 		return nil
 	}
 	s.sessionHandler.Add(srcConnID, sess)

+ 37 - 9
vendor/github.com/Psiphon-Labs/quic-go/session.go

@@ -446,6 +446,19 @@ func (s *session) preSetup() {
 	}
 }
 
+// [Psiphon]
+//
+// Backport https://github.com/lucas-clemente/quic-go/commit/079279b9cf4cb5dafc8b7f673a2e7e47a4b6a06e:
+//   > session.maybeResetTimer() and session.run() were using slightly
+//   > different definitions of when a keep-alive PING should be sent. Under
+//   > certain conditions, this would make us repeatedly set a timer for the
+//   > keep-alive, but on timer expiration no keep-alive would be sent.
+//
+// This changes session.run and session.maybeResetTimer. As we don't yet have
+// https://github.com/lucas-clemente/quic-go/commit/27549c56656665859354255d3912f6428bfcb9f0,
+// "use the minimum of the two peers' max_idle_timeouts", s.config.IdleTimeout is used
+// in place of s.idleTimeout.
+
 // run the session main loop
 func (s *session) run() error {
 	defer s.ctxCancel()
@@ -516,11 +529,17 @@ runLoop:
 		if s.pacingDeadline.IsZero() { // the timer didn't have a pacing deadline set
 			pacingDeadline = s.sentPacketHandler.TimeUntilSend()
 		}
-		if s.config.KeepAlive && !s.keepAlivePingSent && s.handshakeComplete && s.firstAckElicitingPacketAfterIdleSentTime.IsZero() && time.Since(s.lastPacketReceivedTime) >= s.keepAliveInterval/2 {
+		if keepAliveTime := s.nextKeepAliveTime(); !keepAliveTime.IsZero() && !now.Before(keepAliveTime) {
 			// send a PING frame since there is no activity in the session
-			s.logger.Debugf("Sending a keep-alive ping to keep the connection alive.")
+			s.logger.Debugf("Sending a keep-alive PING to keep the connection alive.")
 			s.framer.QueueControlFrame(&wire.PingFrame{})
 			s.keepAlivePingSent = true
+		} else if !s.handshakeComplete && now.Sub(s.sessionCreationTime) >= s.config.HandshakeTimeout {
+			s.destroyImpl(qerr.TimeoutError("Handshake did not complete in time"))
+			continue
+		} else if s.handshakeComplete && now.Sub(s.idleTimeoutStartTime()) >= s.config.IdleTimeout {
+			s.destroyImpl(qerr.TimeoutError("No recent network activity"))
+			continue
 		} else if !pacingDeadline.IsZero() && now.Before(pacingDeadline) {
 			// If we get to this point before the pacing deadline, we should wait until that deadline.
 			// This can happen when scheduleSending is called, or a packet is received.
@@ -571,12 +590,25 @@ func (s *session) ConnectionState() tls.ConnectionState {
 	return s.cryptoStreamHandler.ConnectionState()
 }
 
+// Time when the next keep-alive packet should be sent.
+// It returns a zero time if no keep-alive should be sent.
+func (s *session) nextKeepAliveTime() time.Time {
+	if !s.config.KeepAlive || s.keepAlivePingSent || s.firstAckElicitingPacketAfterIdleSentTime.IsZero() {
+		return time.Time{}
+	}
+	return s.lastPacketReceivedTime.Add(s.keepAliveInterval / 2)
+}
+
 func (s *session) maybeResetTimer() {
 	var deadline time.Time
-	if s.config.KeepAlive && s.handshakeComplete && !s.keepAlivePingSent {
-		deadline = s.idleTimeoutStartTime().Add(s.keepAliveInterval / 2)
+	if !s.handshakeComplete {
+		deadline = s.sessionCreationTime.Add(s.config.HandshakeTimeout)
 	} else {
-		deadline = s.idleTimeoutStartTime().Add(s.config.IdleTimeout)
+		if keepAliveTime := s.nextKeepAliveTime(); !keepAliveTime.IsZero() {
+			deadline = keepAliveTime
+		} else {
+			deadline = s.idleTimeoutStartTime().Add(s.config.IdleTimeout)
+		}
 	}
 
 	if ackAlarm := s.receivedPacketHandler.GetAlarmTimeout(); !ackAlarm.IsZero() {
@@ -585,10 +617,6 @@ func (s *session) maybeResetTimer() {
 	if lossTime := s.sentPacketHandler.GetLossDetectionTimeout(); !lossTime.IsZero() {
 		deadline = utils.MinTime(deadline, lossTime)
 	}
-	if !s.handshakeComplete {
-		handshakeDeadline := s.sessionCreationTime.Add(s.config.HandshakeTimeout)
-		deadline = utils.MinTime(deadline, handshakeDeadline)
-	}
 	if !s.pacingDeadline.IsZero() {
 		deadline = utils.MinTime(deadline, s.pacingDeadline)
 	}

+ 3 - 3
vendor/vendor.json

@@ -75,10 +75,10 @@
 			"revisionTime": "2019-12-04T18:36:04Z"
 		},
 		{
-			"checksumSHA1": "8MdwAjQlha5clFXwY1ayF4vNGAQ=",
+			"checksumSHA1": "atufb9edlfrNCDiyupCgjjRRQ10=",
 			"path": "github.com/Psiphon-Labs/quic-go",
-			"revision": "738e15bfe6c3d7a0ccc91e2f237e5554ab6a35a6",
-			"revisionTime": "2020-01-28T19:39:28Z"
+			"revision": "474e74c89fab6c1356b6653b9e1de707b4e4ef8b",
+			"revisionTime": "2020-03-06T19:33:10Z"
 		},
 		{
 			"checksumSHA1": "VMJLFpeoJ56PTQxR0wEkkiQTr1s=",