Просмотр исходного кода

Revisions to operateTunnel

* Flip periodic ssh keep alive default to "on"
* Wrap port forward conns in transferstats.conn even when DisableApi is on, as bytes transferred is now used for tunnel quality monitoring
* Completely remove port forward threshold logic
* Skip SSH keep alives if any payload bytes received in last few seconds: keep alives should not be necessary in these cases, and should reduce chance of false positive failure timeout
* Emit selected protocol in NoticeActiveTunnel for diagnostic readability
* Emit server address in NoticeBytesTransferred in case multi-tunnel is used
* Emit NoticeTotalBytesTransferred every few minutes and at end of tunnel for diagnostics
Rod Hynes 10 лет назад
Родитель
Сommit
9406e278b9

+ 1 - 10
SampleApps/Psibot/app/src/main/java/ca/psiphon/PsiphonTunnel.java

@@ -329,16 +329,6 @@ public class PsiphonTunnel extends Psi.PsiphonProvider.Stub {
         // This parameter is for stats reporting
         // This parameter is for stats reporting
         json.put("TunnelWholeDevice", isVpnMode ? 1 : 0);
         json.put("TunnelWholeDevice", isVpnMode ? 1 : 0);
 
 
-        // Enable tunnel auto-reconnect after a threshold number of port
-        // forward failures. By default, this mechanism is disabled in
-        // tunnel-core due to the chance of false positives due to
-        // bad user input. Since VpnService mode resolves domain names
-        // differently (udpgw), invalid domain name user input won't result
-        // in SSH port forward failures.
-        if (isVpnMode) {
-            json.put("PortForwardFailureThreshold", 10);
-        }
-
         json.put("EmitBytesTransferred", true);
         json.put("EmitBytesTransferred", true);
 
 
         if (mLocalSocksProxyPort != 0) {
         if (mLocalSocksProxyPort != 0) {
@@ -424,6 +414,7 @@ public class PsiphonTunnel extends Psi.PsiphonProvider.Stub {
                 mHostService.onUntunneledAddress(notice.getJSONObject("data").getString("address"));
                 mHostService.onUntunneledAddress(notice.getJSONObject("data").getString("address"));
 
 
             } else if (noticeType.equals("BytesTransferred")) {
             } else if (noticeType.equals("BytesTransferred")) {
+                diagnostic = false;
                 JSONObject data = notice.getJSONObject("data");
                 JSONObject data = notice.getJSONObject("data");
                 mHostService.onBytesTransferred(data.getLong("sent"), data.getLong("received"));
                 mHostService.onBytesTransferred(data.getLong("sent"), data.getLong("received"));
             }
             }

+ 38 - 48
psiphon/config.go

@@ -29,38 +29,40 @@ import (
 // TODO: allow all params to be configured
 // TODO: allow all params to be configured
 
 
 const (
 const (
-	DATA_STORE_FILENAME                          = "psiphon.db"
-	CONNECTION_WORKER_POOL_SIZE                  = 10
-	TUNNEL_POOL_SIZE                             = 1
-	TUNNEL_CONNECT_TIMEOUT                       = 20 * time.Second
-	TUNNEL_OPERATE_SHUTDOWN_TIMEOUT              = 500 * time.Millisecond
-	TUNNEL_PORT_FORWARD_DIAL_TIMEOUT             = 10 * time.Second
-	TUNNEL_SSH_KEEP_ALIVE_PAYLOAD_MAX_BYTES      = 256
-	TUNNEL_SSH_KEEP_ALIVE_PERIOD_MIN             = 60 * time.Second
-	TUNNEL_SSH_KEEP_ALIVE_PERIOD_MAX             = 120 * time.Second
-	TUNNEL_SSH_KEEP_ALIVE_PERIODIC_TIMEOUT       = 30 * time.Second
-	TUNNEL_SSH_KEEP_ALIVE_PROBE_TIMEOUT          = 5 * time.Second
-	ESTABLISH_TUNNEL_TIMEOUT_SECONDS             = 300
-	ESTABLISH_TUNNEL_WORK_TIME                   = 60 * time.Second
-	ESTABLISH_TUNNEL_PAUSE_PERIOD                = 5 * time.Second
-	PORT_FORWARD_FAILURE_THRESHOLD               = 0
-	HTTP_PROXY_ORIGIN_SERVER_TIMEOUT             = 15 * time.Second
-	HTTP_PROXY_MAX_IDLE_CONNECTIONS_PER_HOST     = 50
-	FETCH_REMOTE_SERVER_LIST_TIMEOUT             = 30 * time.Second
-	FETCH_REMOTE_SERVER_LIST_RETRY_PERIOD        = 5 * time.Second
-	FETCH_REMOTE_SERVER_LIST_STALE_PERIOD        = 6 * time.Hour
-	PSIPHON_API_CLIENT_SESSION_ID_LENGTH         = 16
-	PSIPHON_API_SERVER_TIMEOUT                   = 20 * time.Second
-	PSIPHON_API_STATUS_REQUEST_PERIOD_MIN        = 5 * time.Minute
-	PSIPHON_API_STATUS_REQUEST_PERIOD_MAX        = 10 * time.Minute
-	PSIPHON_API_STATUS_REQUEST_PADDING_MAX_BYTES = 256
-	PSIPHON_API_CONNECTED_REQUEST_PERIOD         = 24 * time.Hour
-	PSIPHON_API_CONNECTED_REQUEST_RETRY_PERIOD   = 5 * time.Second
-	FETCH_ROUTES_TIMEOUT                         = 1 * time.Minute
-	DOWNLOAD_UPGRADE_TIMEOUT                     = 15 * time.Minute
-	DOWNLOAD_UPGRADE_RETRY_PAUSE_PERIOD          = 5 * time.Second
-	IMPAIRED_PROTOCOL_CLASSIFICATION_DURATION    = 2 * time.Minute
-	IMPAIRED_PROTOCOL_CLASSIFICATION_THRESHOLD   = 3
+	DATA_STORE_FILENAME                            = "psiphon.db"
+	CONNECTION_WORKER_POOL_SIZE                    = 10
+	TUNNEL_POOL_SIZE                               = 1
+	TUNNEL_CONNECT_TIMEOUT                         = 20 * time.Second
+	TUNNEL_OPERATE_SHUTDOWN_TIMEOUT                = 500 * time.Millisecond
+	TUNNEL_PORT_FORWARD_DIAL_TIMEOUT               = 10 * time.Second
+	TUNNEL_SSH_KEEP_ALIVE_PAYLOAD_MAX_BYTES        = 256
+	TUNNEL_SSH_KEEP_ALIVE_PERIOD_MIN               = 60 * time.Second
+	TUNNEL_SSH_KEEP_ALIVE_PERIOD_MAX               = 120 * time.Second
+	TUNNEL_SSH_KEEP_ALIVE_PERIODIC_TIMEOUT         = 30 * time.Second
+	TUNNEL_SSH_KEEP_ALIVE_PERIODIC_INACTIVE_PERIOD = 10 * time.Second
+	TUNNEL_SSH_KEEP_ALIVE_PROBE_TIMEOUT            = 5 * time.Second
+	TUNNEL_SSH_KEEP_ALIVE_PROBE_INACTIVE_PERIOD    = 5 * time.Second
+	ESTABLISH_TUNNEL_TIMEOUT_SECONDS               = 300
+	ESTABLISH_TUNNEL_WORK_TIME                     = 60 * time.Second
+	ESTABLISH_TUNNEL_PAUSE_PERIOD                  = 5 * time.Second
+	HTTP_PROXY_ORIGIN_SERVER_TIMEOUT               = 15 * time.Second
+	HTTP_PROXY_MAX_IDLE_CONNECTIONS_PER_HOST       = 50
+	FETCH_REMOTE_SERVER_LIST_TIMEOUT               = 30 * time.Second
+	FETCH_REMOTE_SERVER_LIST_RETRY_PERIOD          = 5 * time.Second
+	FETCH_REMOTE_SERVER_LIST_STALE_PERIOD          = 6 * time.Hour
+	PSIPHON_API_CLIENT_SESSION_ID_LENGTH           = 16
+	PSIPHON_API_SERVER_TIMEOUT                     = 20 * time.Second
+	PSIPHON_API_STATUS_REQUEST_PERIOD_MIN          = 5 * time.Minute
+	PSIPHON_API_STATUS_REQUEST_PERIOD_MAX          = 10 * time.Minute
+	PSIPHON_API_STATUS_REQUEST_PADDING_MAX_BYTES   = 256
+	PSIPHON_API_CONNECTED_REQUEST_PERIOD           = 24 * time.Hour
+	PSIPHON_API_CONNECTED_REQUEST_RETRY_PERIOD     = 5 * time.Second
+	FETCH_ROUTES_TIMEOUT                           = 1 * time.Minute
+	DOWNLOAD_UPGRADE_TIMEOUT                       = 15 * time.Minute
+	DOWNLOAD_UPGRADE_RETRY_PAUSE_PERIOD            = 5 * time.Second
+	IMPAIRED_PROTOCOL_CLASSIFICATION_DURATION      = 2 * time.Minute
+	IMPAIRED_PROTOCOL_CLASSIFICATION_THRESHOLD     = 3
+	TOTAL_BYTES_TRANSFERRED_NOTICE_PERIOD          = 5 * time.Minute
 )
 )
 
 
 // To distinguish omitted timeout params from explicit 0 value timeout
 // To distinguish omitted timeout params from explicit 0 value timeout
@@ -162,14 +164,6 @@ type Config struct {
 	// which is recommended.
 	// which is recommended.
 	TunnelPoolSize int
 	TunnelPoolSize int
 
 
-	// PortForwardFailureThreshold specifies a threshold number of port forward
-	// failures (failure to connect, or I/O failure) after which the tunnel is
-	// considered to be degraded and a re-establish is launched. This facility
-	// can suffer from false positives, especially when the host client is running
-	// in configuration where domain name resolution is done as part of the port
-	// forward (as opposed to tunneling UDP, for example). The default is 0, off.
-	PortForwardFailureThreshold int
-
 	// UpstreamProxyUrl is a URL specifying an upstream proxy to use for all
 	// UpstreamProxyUrl is a URL specifying an upstream proxy to use for all
 	// outbound connections. The URL should include proxy type and authentication
 	// outbound connections. The URL should include proxy type and authentication
 	// information, as required. See example URLs here:
 	// information, as required. See example URLs here:
@@ -251,10 +245,10 @@ type Config struct {
 	// that require typical (system CA) server authentication.
 	// that require typical (system CA) server authentication.
 	TrustedCACertificatesFilename string
 	TrustedCACertificatesFilename string
 
 
-	// EnablePeriodicSshKeepAlive indicates whether to send an SSH keepalive every
-	// 1-2 minutes. If the SSH keepalive timesout, the tunnel is considered to have
-	// failed.
-	EnablePeriodicSshKeepAlive bool
+	// DisablePeriodicSshKeepAlive indicates whether to send an SSH keepalive every
+	// 1-2 minutes, when the tunnel is idle. If the SSH keepalive times out, the tunnel
+	// is considered to have failed.
+	DisablePeriodicSshKeepAlive bool
 }
 }
 
 
 // LoadConfig parses and validates a JSON format Psiphon config JSON
 // LoadConfig parses and validates a JSON format Psiphon config JSON
@@ -307,10 +301,6 @@ func LoadConfig(configJson []byte) (*Config, error) {
 		config.TunnelPoolSize = TUNNEL_POOL_SIZE
 		config.TunnelPoolSize = TUNNEL_POOL_SIZE
 	}
 	}
 
 
-	if config.PortForwardFailureThreshold == 0 {
-		config.PortForwardFailureThreshold = PORT_FORWARD_FAILURE_THRESHOLD
-	}
-
 	if config.NetworkConnectivityChecker != nil {
 	if config.NetworkConnectivityChecker != nil {
 		return nil, ContextError(errors.New("NetworkConnectivityChecker interface must be set at runtime"))
 		return nil, ContextError(errors.New("NetworkConnectivityChecker interface must be set at runtime"))
 	}
 	}

+ 1 - 1
psiphon/controller.go

@@ -440,7 +440,7 @@ loop:
 		// solution(?) target MIN(CountServerEntries(region, protocol), TunnelPoolSize)
 		// solution(?) target MIN(CountServerEntries(region, protocol), TunnelPoolSize)
 		case establishedTunnel := <-controller.establishedTunnels:
 		case establishedTunnel := <-controller.establishedTunnels:
 			if controller.registerTunnel(establishedTunnel) {
 			if controller.registerTunnel(establishedTunnel) {
-				NoticeActiveTunnel(establishedTunnel.serverEntry.IpAddress)
+				NoticeActiveTunnel(establishedTunnel.serverEntry.IpAddress, establishedTunnel.protocol)
 			} else {
 			} else {
 				controller.discardTunnel(establishedTunnel)
 				controller.discardTunnel(establishedTunnel)
 			}
 			}

+ 13 - 5
psiphon/notice.go

@@ -124,8 +124,8 @@ func NoticeConnectingServer(ipAddress, region, protocol, frontingAddress string)
 }
 }
 
 
 // NoticeActiveTunnel is a successful connection that is used as an active tunnel for port forwarding
 // NoticeActiveTunnel is a successful connection that is used as an active tunnel for port forwarding
-func NoticeActiveTunnel(ipAddress string) {
-	outputNotice("ActiveTunnel", false, "ipAddress", ipAddress)
+func NoticeActiveTunnel(ipAddress, protocol string) {
+	outputNotice("ActiveTunnel", false, "ipAddress", ipAddress, "protocol", protocol)
 }
 }
 
 
 // NoticeSocksProxyPortInUse is a failure to use the configured LocalSocksProxyPort
 // NoticeSocksProxyPortInUse is a failure to use the configured LocalSocksProxyPort
@@ -201,9 +201,17 @@ func NoticeClientUpgradeDownloaded(filename string) {
 }
 }
 
 
 // NoticeBytesTransferred reports how many tunneled bytes have been
 // NoticeBytesTransferred reports how many tunneled bytes have been
-// transferred since the last NoticeBytesTransferred.
-func NoticeBytesTransferred(sent, received int64) {
-	outputNotice("BytesTransferred", false, "sent", sent, "received", received)
+// transferred since the last NoticeBytesTransferred, for the tunnel
+// to the server at ipAddress.
+func NoticeBytesTransferred(ipAddress string, sent, received int64) {
+	outputNotice("BytesTransferred", false, "ipAddress", ipAddress, "sent", sent, "received", received)
+}
+
+// NoticeTotalBytesTransferred reports how many tunneled bytes have been
+// transferred in total up to this point, for the tunnel to the server
+// at ipAddress.
+func NoticeTotalBytesTransferred(ipAddress string, sent, received int64) {
+	outputNotice("TotalBytesTransferred", false, "ipAddress", ipAddress, "sent", sent, "received", received)
 }
 }
 
 
 // NoticeLocalProxyError reports a local proxy error message. Repetitive
 // NoticeLocalProxyError reports a local proxy error message. Repetitive

+ 6 - 4
psiphon/transferstats/regexp.go

@@ -71,10 +71,12 @@ func MakeRegexps(pageViewRegexes, httpsRequestRegexes []map[string]string) (rege
 // string that should be used for stats.
 // string that should be used for stats.
 func regexHostname(hostname string, regexps *Regexps) (statsHostname string) {
 func regexHostname(hostname string, regexps *Regexps) (statsHostname string) {
 	statsHostname = "(OTHER)"
 	statsHostname = "(OTHER)"
-	for _, rr := range *regexps {
-		if rr.regexp.MatchString(hostname) {
-			statsHostname = rr.regexp.ReplaceAllString(hostname, rr.replace)
-			break
+	if regexps != nil {
+		for _, rr := range *regexps {
+			if rr.regexp.MatchString(hostname) {
+				statsHostname = rr.regexp.ReplaceAllString(hostname, rr.replace)
+				break
+			}
 		}
 		}
 	}
 	}
 	return
 	return

+ 99 - 76
psiphon/tunnel.go

@@ -71,8 +71,8 @@ type Tunnel struct {
 	sshClient                *ssh.Client
 	sshClient                *ssh.Client
 	operateWaitGroup         *sync.WaitGroup
 	operateWaitGroup         *sync.WaitGroup
 	shutdownOperateBroadcast chan struct{}
 	shutdownOperateBroadcast chan struct{}
-	portForwardFailures      chan int
-	portForwardFailureTotal  int
+	signalPortForwardFailure chan struct{}
+	totalPortForwardFailures int
 	sessionStartTime         time.Time
 	sessionStartTime         time.Time
 }
 }
 
 
@@ -122,9 +122,10 @@ func EstablishTunnel(
 		sshClient:                sshClient,
 		sshClient:                sshClient,
 		operateWaitGroup:         new(sync.WaitGroup),
 		operateWaitGroup:         new(sync.WaitGroup),
 		shutdownOperateBroadcast: make(chan struct{}),
 		shutdownOperateBroadcast: make(chan struct{}),
-		// portForwardFailures buffer size is large enough to receive the thresold number
-		// of failure reports without blocking. Senders can drop failures without blocking.
-		portForwardFailures: make(chan int, config.PortForwardFailureThreshold)}
+		// A buffer allows at least one signal to be sent even when the receiver is
+		// not listening. Senders should not block.
+		signalPortForwardFailure: make(chan struct{}, 1),
+	}
 
 
 	// Create a new Psiphon API session for this tunnel. This includes performing
 	// Create a new Psiphon API session for this tunnel. This includes performing
 	// a handshake request. If the handshake fails, this establishment fails.
 	// a handshake request. If the handshake fails, this establishment fails.
@@ -214,7 +215,7 @@ func (tunnel *Tunnel) Dial(
 	if result.err != nil {
 	if result.err != nil {
 		// TODO: conditional on type of error or error message?
 		// TODO: conditional on type of error or error message?
 		select {
 		select {
-		case tunnel.portForwardFailures <- 1:
+		case tunnel.signalPortForwardFailure <- *new(struct{}):
 		default:
 		default:
 		}
 		}
 		return nil, ContextError(result.err)
 		return nil, ContextError(result.err)
@@ -225,11 +226,14 @@ func (tunnel *Tunnel) Dial(
 		tunnel:         tunnel,
 		tunnel:         tunnel,
 		downstreamConn: downstreamConn}
 		downstreamConn: downstreamConn}
 
 
-	// Tunnel does not have a session when DisableApi is set
+	// Tunnel does not have a session when DisableApi is set. We still use
+	// transferstats.Conn to count bytes transferred for monitoring tunnel
+	// quality.
+	var regexps *transferstats.Regexps
 	if tunnel.session != nil {
 	if tunnel.session != nil {
-		conn = transferstats.NewConn(
-			conn, tunnel.session.StatsServerID(), tunnel.session.StatsRegexps())
+		regexps = tunnel.session.StatsRegexps()
 	}
 	}
+	conn = transferstats.NewConn(conn, tunnel.serverEntry.IpAddress, regexps)
 
 
 	return conn, nil
 	return conn, nil
 }
 }
@@ -255,11 +259,11 @@ type TunneledConn struct {
 func (conn *TunneledConn) Read(buffer []byte) (n int, err error) {
 func (conn *TunneledConn) Read(buffer []byte) (n int, err error) {
 	n, err = conn.Conn.Read(buffer)
 	n, err = conn.Conn.Read(buffer)
 	if err != nil && err != io.EOF {
 	if err != nil && err != io.EOF {
-		// Report 1 new failure. Won't block; assumes the receiver
+		// Report new failure. Won't block; assumes the receiver
 		// has a sufficient buffer for the threshold number of reports.
 		// has a sufficient buffer for the threshold number of reports.
 		// TODO: conditional on type of error or error message?
 		// TODO: conditional on type of error or error message?
 		select {
 		select {
-		case conn.tunnel.portForwardFailures <- 1:
+		case conn.tunnel.signalPortForwardFailure <- *new(struct{}):
 		default:
 		default:
 		}
 		}
 	}
 	}
@@ -271,7 +275,7 @@ func (conn *TunneledConn) Write(buffer []byte) (n int, err error) {
 	if err != nil && err != io.EOF {
 	if err != nil && err != io.EOF {
 		// Same as TunneledConn.Read()
 		// Same as TunneledConn.Read()
 		select {
 		select {
-		case conn.tunnel.portForwardFailures <- 1:
+		case conn.tunnel.signalPortForwardFailure <- *new(struct{}):
 		default:
 		default:
 		}
 		}
 	}
 	}
@@ -485,74 +489,82 @@ func dialSsh(
 	return conn, result.sshClient, nil
 	return conn, result.sshClient, nil
 }
 }
 
 
-// operateTunnel periodically sends status requests (traffic stats updates updates)
-// to the Psiphon API; and monitors the tunnel for failures:
+// operateTunnel monitors the health of the tunnel and performs
+// periodic work.
+//
+// BytesTransferred and TotalBytesTransferred notices are emitted
+// for live reporting and diagnostics reporting, respectively.
+//
+// Status requests are sent to the Psiphon API to report bytes
+// transferred.
 //
 //
-// 1. Overall tunnel failure: the tunnel sends a signal to the ClosedSignal
-// channel on keep-alive failure and other transport I/O errors. In case
-// of such a failure, the tunnel is marked as failed.
+// Periodic SSH keep alive packets are sent to ensure the underlying
+// TCP connection isn't terminated by NAT, or other network
+// interference -- or test if it has been terminated while the device
+// has been asleep. When a keep alive times out, the tunnel is
+// considered failed.
 //
 //
-// 2. Tunnel port forward failures: the tunnel connection may stay up but
-// the client may still fail to establish port forwards due to server load
-// and other conditions. After a threshold number of such failures, the
-// overall tunnel is marked as failed.
+// An immediate SSH keep alive "probe" is sent to test the tunnel and
+// server responsiveness when a port forward failure is detected: a
+// failed dial or failed read/write. This keep alive has a shorter
+// timeout.
 //
 //
-// TODO: currently, any connect (dial), read, or write error associated with
-// a port forward is counted as a failure. It may be important to differentiate
-// between failures due to Psiphon server conditions and failures due to the
-// origin/target server (in the latter case, the tunnel is healthy). Here are
-// some typical error messages to consider matching against (or ignoring):
+// Note that port foward failures may be due to non-failure conditions.
+// For example, when the user inputs an invalid domain name and
+// resolution is done by the ssh server; or trying to connect to a
+// non-white-listed port; and the error message in these cases is not
+// distinguishable from a a true server error (a common error message,
+// "ssh: rejected: administratively prohibited (open failed)", may be
+// returned for these cases but also if the server has run out of
+// ephemeral ports, for example).
 //
 //
-// - "ssh: rejected: administratively prohibited (open failed)"
-//   (this error message is reported in both actual and false cases: when a server
-//    is overloaded and has no free ephemeral ports; and when the user mistypes
-//    a domain in a browser address bar and name resolution fails)
-// - "ssh: rejected: connect failed (Connection timed out)"
-// - "write tcp ... broken pipe"
-// - "read tcp ... connection reset by peer"
-// - "ssh: unexpected packet in response to channel open: <nil>"
+// SSH keep alives are not sent when the tunnel has been recently
+// active (not only does tunnel activity obviate the necessity of a keep
+// alive, testing has shown that keep alives may time out for "busy"
+// tunnels, especially over meek protocol and other high latency
+// conditions).
 //
 //
-// Update: the above is superceded by SSH keep alives with timeouts. When a keep
-// alive times out, the tunnel is marked as failed. Keep alives are triggered
-// periodically, and also immediately in the case of a port forward failure (so
-// as to immediately detect a situation such as a device waking up and trying
-// to use a dead tunnel). By default, port forward theshold counting does not
-// cause a tunnel to be marked as failed, with the conservative assumption that
-// a server which responds to an SSH keep alive is fully functional.
+// "Recently active" is defined has having received payload bytes. Sent
+// bytes are not considered as testing has shown bytes may appear to
+// send when certain NAT devices have interfered with the tunnel, while
+// no bytes are received. In a pathological case, with DNS implemented
+// as tunneled UDP, a browser may wait excessively for a domain name to
+// resolve, while no new port forward is attempted which would otherwise
+// result in a tunnel failure detection.
 //
 //
 func (tunnel *Tunnel) operateTunnel(config *Config, tunnelOwner TunnelOwner) {
 func (tunnel *Tunnel) operateTunnel(config *Config, tunnelOwner TunnelOwner) {
 	defer tunnel.operateWaitGroup.Done()
 	defer tunnel.operateWaitGroup.Done()
 
 
+	lastBytesReceivedTime := time.Now()
+
+	lastTotalBytesTransferedTime := time.Now()
+	totalSent := int64(0)
+	totalReceived := int64(0)
+
+	noticeBytesTransferredTicker := time.NewTicker(1 * time.Second)
+	defer noticeBytesTransferredTicker.Stop()
+
 	// The next status request and ssh keep alive times are picked at random,
 	// The next status request and ssh keep alive times are picked at random,
 	// from a range, to make the resulting traffic less fingerprintable,
 	// from a range, to make the resulting traffic less fingerprintable,
-	// especially when then tunnel is otherwise idle.
 	// Note: not using Tickers since these are not fixed time periods.
 	// Note: not using Tickers since these are not fixed time periods.
-
 	nextStatusRequestPeriod := func() time.Duration {
 	nextStatusRequestPeriod := func() time.Duration {
 		return MakeRandomPeriod(
 		return MakeRandomPeriod(
 			PSIPHON_API_STATUS_REQUEST_PERIOD_MIN,
 			PSIPHON_API_STATUS_REQUEST_PERIOD_MIN,
 			PSIPHON_API_STATUS_REQUEST_PERIOD_MAX)
 			PSIPHON_API_STATUS_REQUEST_PERIOD_MAX)
 	}
 	}
+
+	statsTimer := time.NewTimer(nextStatusRequestPeriod())
+	defer statsTimer.Stop()
+
 	nextSshKeepAlivePeriod := func() time.Duration {
 	nextSshKeepAlivePeriod := func() time.Duration {
 		return MakeRandomPeriod(
 		return MakeRandomPeriod(
 			TUNNEL_SSH_KEEP_ALIVE_PERIOD_MIN,
 			TUNNEL_SSH_KEEP_ALIVE_PERIOD_MIN,
 			TUNNEL_SSH_KEEP_ALIVE_PERIOD_MAX)
 			TUNNEL_SSH_KEEP_ALIVE_PERIOD_MAX)
 	}
 	}
 
 
-	// TODO: don't initialize timer if !config.EmitBytesTransferred
-	noticeBytesTransferredTicker := time.NewTicker(1 * time.Second)
-	if !config.EmitBytesTransferred {
-		noticeBytesTransferredTicker.Stop()
-	} else {
-		defer noticeBytesTransferredTicker.Stop()
-	}
-
-	statsTimer := time.NewTimer(nextStatusRequestPeriod())
-	defer statsTimer.Stop()
-
-	// TODO: don't initialize timer if !config.EnablePeriodicSshKeepAlive
+	// TODO: don't initialize timer when config.DisablePeriodicSshKeepAlive is set
 	sshKeepAliveTimer := time.NewTimer(nextSshKeepAlivePeriod())
 	sshKeepAliveTimer := time.NewTimer(nextSshKeepAlivePeriod())
-	if !config.EnablePeriodicSshKeepAlive {
+	if config.DisablePeriodicSshKeepAlive {
 		sshKeepAliveTimer.Stop()
 		sshKeepAliveTimer.Stop()
 	} else {
 	} else {
 		defer sshKeepAliveTimer.Stop()
 		defer sshKeepAliveTimer.Stop()
@@ -564,39 +576,48 @@ func (tunnel *Tunnel) operateTunnel(config *Config, tunnelOwner TunnelOwner) {
 		case <-noticeBytesTransferredTicker.C:
 		case <-noticeBytesTransferredTicker.C:
 			sent, received := transferstats.GetBytesTransferredForServer(
 			sent, received := transferstats.GetBytesTransferredForServer(
 				tunnel.serverEntry.IpAddress)
 				tunnel.serverEntry.IpAddress)
-			// Only emit notice when tunnel is not idle.
-			if sent > 0 || received > 0 {
-				NoticeBytesTransferred(sent, received)
+
+			if received > 0 {
+				lastBytesReceivedTime = time.Now()
+			}
+
+			totalSent += sent
+			totalReceived += received
+
+			if lastTotalBytesTransferedTime.Add(TOTAL_BYTES_TRANSFERRED_NOTICE_PERIOD).Before(time.Now()) {
+				NoticeTotalBytesTransferred(tunnel.serverEntry.IpAddress, totalSent, totalReceived)
+				lastTotalBytesTransferedTime = time.Now()
+			}
+
+			// Only emit the frequent BytesTransferred notice when tunnel is not idle.
+			if config.EmitBytesTransferred && (sent > 0 || received > 0) {
+				NoticeBytesTransferred(tunnel.serverEntry.IpAddress, sent, received)
 			}
 			}
 
 
 		case <-statsTimer.C:
 		case <-statsTimer.C:
+			// TODO: perform this request asynchronously; don't block other operations
 			sendStats(tunnel)
 			sendStats(tunnel)
 			statsTimer.Reset(nextStatusRequestPeriod())
 			statsTimer.Reset(nextStatusRequestPeriod())
 
 
 		case <-sshKeepAliveTimer.C:
 		case <-sshKeepAliveTimer.C:
-			err = sendSshKeepAlive(
-				tunnel.sshClient, tunnel.conn, TUNNEL_SSH_KEEP_ALIVE_PERIODIC_TIMEOUT)
+			if lastBytesReceivedTime.Add(TUNNEL_SSH_KEEP_ALIVE_PERIODIC_INACTIVE_PERIOD).Before(time.Now()) {
+				err = sendSshKeepAlive(
+					tunnel.sshClient, tunnel.conn, TUNNEL_SSH_KEEP_ALIVE_PERIODIC_TIMEOUT)
+			}
 			sshKeepAliveTimer.Reset(nextSshKeepAlivePeriod())
 			sshKeepAliveTimer.Reset(nextSshKeepAlivePeriod())
 
 
-		case failures := <-tunnel.portForwardFailures:
+		case <-tunnel.signalPortForwardFailure:
 			// Note: no mutex on portForwardFailureTotal; only referenced here
 			// Note: no mutex on portForwardFailureTotal; only referenced here
-			tunnel.portForwardFailureTotal += failures
+			tunnel.totalPortForwardFailures++
 			NoticeInfo("port forward failures for %s: %d",
 			NoticeInfo("port forward failures for %s: %d",
-				tunnel.serverEntry.IpAddress, tunnel.portForwardFailureTotal)
-			if config.PortForwardFailureThreshold > 0 &&
-				tunnel.portForwardFailureTotal > config.PortForwardFailureThreshold {
-				err = errors.New("tunnel exceeded port forward failure threshold")
-			} else {
-				// Try an SSH keep alive to check the state of the SSH connection
-				// Some port forward failures are due to intermittent conditions
-				// on the server, so we don't abort the connection until the threshold
-				// is hit. But if we can't make a simple round trip request to the
-				// server, we'll immediately abort.
+				tunnel.serverEntry.IpAddress, tunnel.totalPortForwardFailures)
+
+			if lastBytesReceivedTime.Add(TUNNEL_SSH_KEEP_ALIVE_PROBE_INACTIVE_PERIOD).Before(time.Now()) {
 				err = sendSshKeepAlive(
 				err = sendSshKeepAlive(
 					tunnel.sshClient, tunnel.conn, TUNNEL_SSH_KEEP_ALIVE_PROBE_TIMEOUT)
 					tunnel.sshClient, tunnel.conn, TUNNEL_SSH_KEEP_ALIVE_PROBE_TIMEOUT)
-				if config.EnablePeriodicSshKeepAlive {
-					sshKeepAliveTimer.Reset(nextSshKeepAlivePeriod())
-				}
+			}
+			if !config.DisablePeriodicSshKeepAlive {
+				sshKeepAliveTimer.Reset(nextSshKeepAlivePeriod())
 			}
 			}
 
 
 		case <-tunnel.shutdownOperateBroadcast:
 		case <-tunnel.shutdownOperateBroadcast:
@@ -607,6 +628,8 @@ func (tunnel *Tunnel) operateTunnel(config *Config, tunnelOwner TunnelOwner) {
 		}
 		}
 	}
 	}
 
 
+	NoticeTotalBytesTransferred(tunnel.serverEntry.IpAddress, totalSent, totalReceived)
+
 	if err != nil {
 	if err != nil {
 		NoticeAlert("operate tunnel error for %s: %s", tunnel.serverEntry.IpAddress, err)
 		NoticeAlert("operate tunnel error for %s: %s", tunnel.serverEntry.IpAddress, err)
 		tunnelOwner.SignalTunnelFailure(tunnel)
 		tunnelOwner.SignalTunnelFailure(tunnel)