Просмотр исходного кода

Merge pull request #520 from rod-hynes/master

Bolt DB resiliency improvements
Rod Hynes 6 лет назад
Родитель
Сommit
96169bc562

+ 6 - 6
ConsoleClient/main.go

@@ -161,22 +161,22 @@ func main() {
 	// Handle required config file parameter
 
 	// EmitDiagnosticNotices is set by LoadConfig; force to true
-	// an emit diagnostics when LoadConfig-related errors occur.
+	// and emit diagnostics when LoadConfig-related errors occur.
 
 	if configFilename == "" {
-		psiphon.SetEmitDiagnosticNotices(true)
+		psiphon.SetEmitDiagnosticNotices(true, false)
 		psiphon.NoticeError("configuration file is required")
 		os.Exit(1)
 	}
 	configFileContents, err := ioutil.ReadFile(configFilename)
 	if err != nil {
-		psiphon.SetEmitDiagnosticNotices(true)
+		psiphon.SetEmitDiagnosticNotices(true, false)
 		psiphon.NoticeError("error loading configuration file: %s", err)
 		os.Exit(1)
 	}
 	config, err := psiphon.LoadConfig(configFileContents)
 	if err != nil {
-		psiphon.SetEmitDiagnosticNotices(true)
+		psiphon.SetEmitDiagnosticNotices(true, false)
 		psiphon.NoticeError("error processing configuration file: %s", err)
 		os.Exit(1)
 	}
@@ -191,7 +191,7 @@ func main() {
 		tunDeviceFile, err := configurePacketTunnel(
 			config, tunDevice, tunBindInterface, tunPrimaryDNS, tunSecondaryDNS)
 		if err != nil {
-			psiphon.SetEmitDiagnosticNotices(true)
+			psiphon.SetEmitDiagnosticNotices(true, false)
 			psiphon.NoticeError("error configuring packet tunnel: %s", err)
 			os.Exit(1)
 		}
@@ -202,7 +202,7 @@ func main() {
 
 	err = config.Commit()
 	if err != nil {
-		psiphon.SetEmitDiagnosticNotices(true)
+		psiphon.SetEmitDiagnosticNotices(true, false)
 		psiphon.NoticeError("error loading configuration file: %s", err)
 		os.Exit(1)
 	}

+ 11 - 3
psiphon/LookupIP.go

@@ -62,12 +62,20 @@ func LookupIP(ctx context.Context, host string, config *DialConfig) ([]net.IP, e
 			return ips, err
 		}
 
-		NoticeAlert("retry resolve host %s: %s", host, err)
+		if GetEmitNetworkParameters() {
+			NoticeAlert("retry resolve host %s: %s", host, err)
+		}
 
 		return bindLookupIP(ctx, host, dnsServer, config)
 	}
 
 	addrs, err := net.DefaultResolver.LookupIPAddr(ctx, host)
+
+	// Remove domain names from "net" error messages.
+	if err != nil && !GetEmitNetworkParameters() {
+		err = RedactNetError(err)
+	}
+
 	if err != nil {
 		return nil, common.ContextError(err)
 	}
@@ -116,7 +124,7 @@ func bindLookupIP(
 		copy(ipv6[:], ipAddr.To16())
 		domain = syscall.AF_INET6
 	} else {
-		return nil, common.ContextError(fmt.Errorf("invalid IP address for dns server: %s", ipAddr.String()))
+		return nil, common.ContextError(errors.New("invalid IP address for DNS server"))
 	}
 
 	socketFd, err := syscall.Socket(domain, syscall.SOCK_DGRAM, 0)
@@ -127,7 +135,7 @@ func bindLookupIP(
 	_, err = config.DeviceBinder.BindToDevice(socketFd)
 	if err != nil {
 		syscall.Close(socketFd)
-		return nil, common.ContextError(fmt.Errorf("BindToDevice failed: %s", err))
+		return nil, common.ContextError(fmt.Errorf("BindToDevice failed with %s", err))
 	}
 
 	// Connect socket to the server's IP address

+ 6 - 0
psiphon/LookupIP_nobind.go

@@ -37,6 +37,12 @@ func LookupIP(ctx context.Context, host string, config *DialConfig) ([]net.IP, e
 	}
 
 	addrs, err := net.DefaultResolver.LookupIPAddr(ctx, host)
+
+	// Remove domain names from "net" error messages.
+	if err != nil && !GetEmitNetworkParameters() {
+		err = RedactNetError(err)
+	}
+
 	if err != nil {
 		return nil, common.ContextError(err)
 	}

+ 3 - 1
psiphon/TCPConn.go

@@ -84,7 +84,9 @@ func DialTCP(
 	if config.FragmentorConfig.MayFragment() {
 		conn = fragmentor.NewConn(
 			config.FragmentorConfig,
-			func(message string) { NoticeInfo(message) },
+			func(message string) {
+				NoticeFragmentor(config.DiagnosticID, message)
+			},
 			conn)
 	}
 

+ 2 - 2
psiphon/TCPConn_bind.go

@@ -117,7 +117,7 @@ func tcpDial(ctx context.Context, addr string, config *DialConfig) (net.Conn, er
 			copy(ipv6[:], ipAddr.To16())
 			domain = syscall.AF_INET6
 		} else {
-			lastErr = common.ContextError(fmt.Errorf("invalid IP address: %s", ipAddr.String()))
+			lastErr = common.ContextError(errors.New("invalid IP address"))
 			continue
 		}
 		if domain == syscall.AF_INET {
@@ -142,7 +142,7 @@ func tcpDial(ctx context.Context, addr string, config *DialConfig) (net.Conn, er
 			_, err = config.DeviceBinder.BindToDevice(socketFD)
 			if err != nil {
 				syscall.Close(socketFD)
-				lastErr = common.ContextError(fmt.Errorf("BindToDevice failed: %s", err))
+				lastErr = common.ContextError(fmt.Errorf("BindToDevice failed with %s", err))
 				continue
 			}
 		}

+ 5 - 0
psiphon/TCPConn_nobind.go

@@ -40,6 +40,11 @@ func tcpDial(ctx context.Context, addr string, config *DialConfig) (net.Conn, er
 
 	conn, err := dialer.DialContext(ctx, "tcp", addr)
 
+	// Remove domain names from "net" error messages.
+	if err != nil && !GetEmitNetworkParameters() {
+		err = RedactNetError(err)
+	}
+
 	if err != nil {
 		return nil, common.ContextError(err)
 	}

+ 1 - 8
psiphon/common/fragmentor/fragmentor.go

@@ -259,14 +259,7 @@ func (c *Conn) Write(buffer []byte) (int, error) {
 	var notice bytes.Buffer
 
 	if emitNotice {
-		remoteAddrStr := "(nil)"
-		remoteAddr := c.Conn.RemoteAddr()
-		if remoteAddr != nil {
-			remoteAddrStr = remoteAddr.String()
-		}
-		fmt.Fprintf(&notice,
-			"fragment %s %d bytes:",
-			remoteAddrStr, len(buffer))
+		fmt.Fprintf(&notice, "fragment %d bytes:", len(buffer))
 	}
 
 	for iterations := 0; len(buffer) > 0; iterations += 1 {

+ 28 - 0
psiphon/common/protocol/serverEntry.go

@@ -155,6 +155,18 @@ func (fields ServerEntryFields) SetTag(tag string) {
 	fields["isLocalDerivedTag"] = true
 }
 
+func (fields ServerEntryFields) GetDiagnosticID() string {
+	tag, ok := fields["tag"]
+	if !ok {
+		return ""
+	}
+	tagStr, ok := tag.(string)
+	if !ok {
+		return ""
+	}
+	return TagToDiagnosticID(tagStr)
+}
+
 func (fields ServerEntryFields) GetIPAddress() string {
 	ipAddress, ok := fields["ipAddress"]
 	if !ok {
@@ -507,6 +519,10 @@ func (serverEntry *ServerEntry) HasSignature() bool {
 	return serverEntry.Signature != ""
 }
 
+func (serverEntry *ServerEntry) GetDiagnosticID() string {
+	return TagToDiagnosticID(serverEntry.Tag)
+}
+
 // GenerateServerEntryTag creates a server entry tag value that is
 // cryptographically derived from the IP address and web server secret in a
 // way that is difficult to reverse the IP address value from the tag or
@@ -521,6 +537,18 @@ func GenerateServerEntryTag(ipAddress, webServerSecret string) string {
 	return base64.StdEncoding.EncodeToString(h.Sum(nil))
 }
 
+// TagToDiagnosticID returns a prefix of the server entry tag that should be
+// sufficient to uniquely identify servers in diagnostics, while also being
+// more human readable than emitting the full tag. The tag is used as the base
+// of the diagnostic ID as it doesn't leak the server IP address in diagnostic
+// output.
+func TagToDiagnosticID(tag string) string {
+	if len(tag) < 8 {
+		return "<unknown>"
+	}
+	return tag[:8]
+}
+
 // EncodeServerEntry returns a string containing the encoding of
 // a ServerEntry following Psiphon conventions.
 func EncodeServerEntry(serverEntry *ServerEntry) (string, error) {

+ 13 - 6
psiphon/config.go

@@ -447,10 +447,16 @@ type Config struct {
 
 	// EmitDiagnosticNotices indicates whether to output notices containing
 	// detailed information about the Psiphon session. As these notices may
-	// contain sensitive network information, they should not be insecurely
-	// distributed or displayed to users. Default is off.
+	// contain sensitive information, they should not be insecurely distributed
+	// or displayed to users. Default is off.
 	EmitDiagnosticNotices bool
 
+	// EmitDiagnosticNetworkParameters indicates whether to include network
+	// parameters in diagnostic notices. As these parameters are sensitive
+	// circumvention network information, they should not be insecurely
+	// distributed or displayed to users. Default is off.
+	EmitDiagnosticNetworkParameters bool
+
 	// RateLimits specify throttling configuration for the tunnel.
 	RateLimits common.RateLimits
 
@@ -596,10 +602,11 @@ func (config *Config) IsCommitted() bool {
 // not be reflected in internal data structures.
 func (config *Config) Commit() error {
 
-	// Do SetEmitDiagnosticNotices first, to ensure config file errors are emitted.
-
+	// Do SetEmitDiagnosticNotices first, to ensure config file errors are
+	// emitted.
 	if config.EmitDiagnosticNotices {
-		SetEmitDiagnosticNotices(true)
+		SetEmitDiagnosticNotices(
+			true, config.EmitDiagnosticNetworkParameters)
 	}
 
 	// Promote legacy fields.
@@ -1265,7 +1272,7 @@ func (n *loggingNetworkIDGetter) GetNetworkID() string {
 	}
 	if len(logNetworkID)+1 < len(networkID) {
 		// Indicate when additional network info was present after the first "-".
-		logNetworkID += "+<network info>"
+		logNetworkID += "+[redacted]"
 	}
 	NoticeNetworkID(logNetworkID)
 

+ 9 - 9
psiphon/controller.go

@@ -292,7 +292,7 @@ func (controller *Controller) TerminateNextActiveTunnel() {
 	tunnel := controller.getNextActiveTunnel()
 	if tunnel != nil {
 		controller.SignalTunnelFailure(tunnel)
-		NoticeInfo("terminated tunnel: %s", tunnel.dialParams.ServerEntry.IpAddress)
+		NoticeInfo("terminated tunnel: %s", tunnel.dialParams.ServerEntry.GetDiagnosticID())
 	}
 }
 
@@ -643,7 +643,7 @@ loop:
 			}
 
 		case failedTunnel := <-controller.failedTunnels:
-			NoticeAlert("tunnel failed: %s", failedTunnel.dialParams.ServerEntry.IpAddress)
+			NoticeAlert("tunnel failed: %s", failedTunnel.dialParams.ServerEntry.GetDiagnosticID())
 			controller.terminateTunnel(failedTunnel)
 
 			// Clear the reference to this tunnel before calling startEstablishing,
@@ -699,7 +699,7 @@ loop:
 
 				if err != nil {
 					NoticeAlert("failed to activate %s: %s",
-						connectedTunnel.dialParams.ServerEntry.IpAddress, err)
+						connectedTunnel.dialParams.ServerEntry.GetDiagnosticID(), err)
 					discardTunnel = true
 				} else {
 					// It's unlikely that registerTunnel will fail, since only this goroutine
@@ -707,7 +707,7 @@ loop:
 					// expected.
 					if !controller.registerTunnel(connectedTunnel) {
 						NoticeAlert("failed to register %s: %s",
-							connectedTunnel.dialParams.ServerEntry.IpAddress, err)
+							connectedTunnel.dialParams.ServerEntry.GetDiagnosticID(), err)
 						discardTunnel = true
 					}
 				}
@@ -732,7 +732,7 @@ loop:
 			}
 
 			NoticeActiveTunnel(
-				connectedTunnel.dialParams.ServerEntry.IpAddress,
+				connectedTunnel.dialParams.ServerEntry.GetDiagnosticID(),
 				connectedTunnel.dialParams.TunnelProtocol,
 				connectedTunnel.dialParams.ServerEntry.SupportsSSHAPIRequests())
 
@@ -843,7 +843,7 @@ func (controller *Controller) SignalTunnelFailure(tunnel *Tunnel) {
 
 // discardTunnel disposes of a successful connection that is no longer required.
 func (controller *Controller) discardTunnel(tunnel *Tunnel) {
-	NoticeInfo("discard tunnel: %s", tunnel.dialParams.ServerEntry.IpAddress)
+	NoticeInfo("discard tunnel: %s", tunnel.dialParams.ServerEntry.GetDiagnosticID())
 	// TODO: not calling PromoteServerEntry, since that would rank the
 	// discarded tunnel before fully active tunnels. Can a discarded tunnel
 	// be promoted (since it connects), but with lower rank than all active
@@ -866,7 +866,7 @@ func (controller *Controller) registerTunnel(tunnel *Tunnel) bool {
 		if activeTunnel.dialParams.ServerEntry.IpAddress ==
 			tunnel.dialParams.ServerEntry.IpAddress {
 
-			NoticeAlert("duplicate tunnel: %s", tunnel.dialParams.ServerEntry.IpAddress)
+			NoticeAlert("duplicate tunnel: %s", tunnel.dialParams.ServerEntry.GetDiagnosticID())
 			return false
 		}
 	}
@@ -1901,7 +1901,7 @@ loop:
 			// Silently skip the candidate in this case. Otherwise, emit error.
 			if err != nil {
 				NoticeInfo("failed to select protocol for %s: %s",
-					candidateServerEntry.serverEntry.IpAddress, err)
+					candidateServerEntry.serverEntry.GetDiagnosticID(), err)
 			}
 
 			// Unblock other candidates immediately when server affinity
@@ -2008,7 +2008,7 @@ loop:
 			}
 
 			NoticeInfo("failed to connect to %s: %s",
-				candidateServerEntry.serverEntry.IpAddress, err)
+				candidateServerEntry.serverEntry.GetDiagnosticID(), err)
 
 			continue
 		}

+ 2 - 6
psiphon/dataStore.go

@@ -192,10 +192,6 @@ func StoreServerEntry(serverEntryFields protocol.ServerEntryFields, replaceIfExi
 		update := !exists || replaceIfExists || newer
 
 		if !update {
-			// Disabling this notice, for now, as it generates too much noise
-			// in diagnostics with clients that always submit embedded servers
-			// to the core on each run.
-			// NoticeInfo("ignored update for server %s", serverEntry.IpAddress)
 			return nil
 		}
 
@@ -242,7 +238,7 @@ func StoreServerEntry(serverEntryFields protocol.ServerEntryFields, replaceIfExi
 			return common.ContextError(err)
 		}
 
-		NoticeInfo("updated server %s", serverEntryFields.GetIPAddress())
+		NoticeInfo("updated server %s", serverEntryFields.GetDiagnosticID())
 
 		return nil
 	})
@@ -506,7 +502,7 @@ func newTargetServerEntryIterator(config *Config, isTactics bool) (bool, *Server
 		targetServerEntry:            serverEntry,
 	}
 
-	NoticeInfo("using TargetServerEntry: %s", serverEntry.IpAddress)
+	NoticeInfo("using TargetServerEntry: %s", serverEntry.GetDiagnosticID())
 
 	return false, iterator, nil
 }

+ 282 - 41
psiphon/dataStore_bolt.go

@@ -22,8 +22,12 @@
 package psiphon
 
 import (
+	"errors"
+	"fmt"
 	"os"
 	"path/filepath"
+	"runtime/debug"
+	"sync/atomic"
 	"time"
 
 	"github.com/Psiphon-Labs/bolt"
@@ -31,60 +35,86 @@ import (
 )
 
 type datastoreDB struct {
-	boltDB *bolt.DB
+	boltDB   *bolt.DB
+	isFailed int32
 }
 
 type datastoreTx struct {
+	db     *datastoreDB
 	boltTx *bolt.Tx
 }
 
 type datastoreBucket struct {
+	db         *datastoreDB
 	boltBucket *bolt.Bucket
 }
 
 type datastoreCursor struct {
+	db         *datastoreDB
 	boltCursor *bolt.Cursor
 }
 
 func datastoreOpenDB(rootDataDirectory string) (*datastoreDB, error) {
 
-	filename := filepath.Join(rootDataDirectory, "psiphon.boltdb")
-
-	var newDB *bolt.DB
+	var db *datastoreDB
 	var err error
 
 	for retry := 0; retry < 3; retry++ {
 
-		if retry > 0 {
-			NoticeAlert("datastoreOpenDB retry: %d", retry)
+		db, err = tryDatastoreOpenDB(rootDataDirectory, retry > 0)
+		if err == nil {
+			break
 		}
 
-		newDB, err = bolt.Open(filename, 0600, &bolt.Options{Timeout: 1 * time.Second})
+		NoticeAlert("tryDatastoreOpenDB failed: %s", err)
 
-		// The datastore file may be corrupt, so attempt to delete and try again
-		if err != nil {
-			NoticeAlert("bolt.Open error: %s", err)
-			os.Remove(filename)
-			continue
-		}
+		// The datastore file may be corrupt, so, in subsequent iterations, set the
+		// "reset" flag and attempt to delete the file and try again.
+	}
 
-		// Run consistency checks on datastore and emit errors for diagnostics purposes
-		// We assume this will complete quickly for typical size Psiphon datastores.
-		err = newDB.View(func(tx *bolt.Tx) error {
-			return tx.SynchronousCheck()
-		})
+	return db, err
+}
+
+func tryDatastoreOpenDB(rootDataDirectory string, reset bool) (retdb *datastoreDB, reterr error) {
+
+	// Testing indicates that the bolt Check function can raise SIGSEGV due to
+	// invalid mmap buffer accesses in cases such as opening a valid but
+	// truncated datastore file.
+	//
+	// To handle this, we temporarily set SetPanicOnFault in order to treat the
+	// fault as a panic, recover any panic, and return an error which will result
+	// in a retry with reset.
 
-		// The datastore file may be corrupt, so attempt to delete and try again
-		if err != nil {
-			NoticeAlert("bolt.SynchronousCheck error: %s", err)
-			newDB.Close()
-			os.Remove(filename)
-			continue
+	// Begin recovery preamble
+	panicOnFault := debug.SetPanicOnFault(true)
+	defer debug.SetPanicOnFault(panicOnFault)
+
+	defer func() {
+		if r := recover(); r != nil {
+			retdb = nil
+			reterr = common.ContextError(fmt.Errorf("panic: %v", r))
 		}
+	}()
+	// End recovery preamble
 
-		break
+	filename := filepath.Join(rootDataDirectory, "psiphon.boltdb")
+
+	if reset {
+		NoticeAlert("tryDatastoreOpenDB: reset")
+		os.Remove(filename)
 	}
 
+	newDB, err := bolt.Open(filename, 0600, &bolt.Options{Timeout: 1 * time.Second})
+	if err != nil {
+		return nil, common.ContextError(err)
+	}
+
+	// Run consistency checks on datastore and emit errors for diagnostics
+	// purposes. We assume this will complete quickly for typical size Psiphon
+	// datastores and wait for the check to complete before proceeding.
+	err = newDB.View(func(tx *bolt.Tx) error {
+		return tx.SynchronousCheck()
+	})
 	if err != nil {
 		return nil, common.ContextError(err)
 	}
@@ -142,14 +172,58 @@ func datastoreOpenDB(rootDataDirectory string) (*datastoreDB, error) {
 	return &datastoreDB{boltDB: newDB}, nil
 }
 
+var errDatastoreFailed = errors.New("datastore has failed")
+
+func (db *datastoreDB) isDatastoreFailed() bool {
+	return atomic.LoadInt32(&db.isFailed) == 1
+}
+
+func (db *datastoreDB) setDatastoreFailed(r interface{}) {
+	atomic.StoreInt32(&db.isFailed, 1)
+	NoticeAlert("Datastore failed: %s",
+		common.ContextError(fmt.Errorf("panic: %v", r)))
+}
+
 func (db *datastoreDB) close() error {
+
+	// Limitation: there is no panic recover in this case. We assume boltDB.Close
+	// does not make  mmap accesses and prefer to not continue with the datastore
+	// file in a locked or open state. We also assume that any locks aquired by
+	// boltDB.Close, held by transactions, will be released even if the
+	// transaction panics and the database is in the failed state.
+
 	return db.boltDB.Close()
 }
 
-func (db *datastoreDB) view(fn func(tx *datastoreTx) error) error {
+func (db *datastoreDB) view(fn func(tx *datastoreTx) error) (reterr error) {
+
+	// Any bolt function that performs mmap buffer accesses can raise SIGBUS  due
+	// to underlying storage changes, such as a truncation of the datastore file
+	// or removal or network attached storage, etc.
+	//
+	// To handle this, we temporarily set SetPanicOnFault in order to treat the
+	// fault as a panic, recover any panic to avoid crashing the process, and
+	// putting this datastoreDB instance into a failed state. All subsequent
+	// calls to this datastoreDBinstance or its related datastoreTx and
+	// datastoreBucket instances will fail.
+
+	// Begin recovery preamble
+	if db.isDatastoreFailed() {
+		return errDatastoreFailed
+	}
+	panicOnFault := debug.SetPanicOnFault(true)
+	defer debug.SetPanicOnFault(panicOnFault)
+	defer func() {
+		if r := recover(); r != nil {
+			db.setDatastoreFailed(r)
+			reterr = errDatastoreFailed
+		}
+	}()
+	// End recovery preamble
+
 	return db.boltDB.View(
 		func(tx *bolt.Tx) error {
-			err := fn(&datastoreTx{boltTx: tx})
+			err := fn(&datastoreTx{db: db, boltTx: tx})
 			if err != nil {
 				return common.ContextError(err)
 			}
@@ -157,10 +231,25 @@ func (db *datastoreDB) view(fn func(tx *datastoreTx) error) error {
 		})
 }
 
-func (db *datastoreDB) update(fn func(tx *datastoreTx) error) error {
+func (db *datastoreDB) update(fn func(tx *datastoreTx) error) (reterr error) {
+
+	// Begin recovery preamble
+	if db.isDatastoreFailed() {
+		return errDatastoreFailed
+	}
+	panicOnFault := debug.SetPanicOnFault(true)
+	defer debug.SetPanicOnFault(panicOnFault)
+	defer func() {
+		if r := recover(); r != nil {
+			db.setDatastoreFailed(r)
+			reterr = errDatastoreFailed
+		}
+	}()
+	// End recovery preamble
+
 	return db.boltDB.Update(
 		func(tx *bolt.Tx) error {
-			err := fn(&datastoreTx{boltTx: tx})
+			err := fn(&datastoreTx{db: db, boltTx: tx})
 			if err != nil {
 				return common.ContextError(err)
 			}
@@ -168,11 +257,41 @@ func (db *datastoreDB) update(fn func(tx *datastoreTx) error) error {
 		})
 }
 
-func (tx *datastoreTx) bucket(name []byte) *datastoreBucket {
-	return &datastoreBucket{boltBucket: tx.boltTx.Bucket(name)}
+func (tx *datastoreTx) bucket(name []byte) (retbucket *datastoreBucket) {
+
+	// Begin recovery preamble
+	if tx.db.isDatastoreFailed() {
+		return &datastoreBucket{db: tx.db, boltBucket: nil}
+	}
+	panicOnFault := debug.SetPanicOnFault(true)
+	defer debug.SetPanicOnFault(panicOnFault)
+	defer func() {
+		if r := recover(); r != nil {
+			tx.db.setDatastoreFailed(r)
+			retbucket = &datastoreBucket{db: tx.db, boltBucket: nil}
+		}
+	}()
+	// End recovery preamble
+
+	return &datastoreBucket{db: tx.db, boltBucket: tx.boltTx.Bucket(name)}
 }
 
-func (tx *datastoreTx) clearBucket(name []byte) error {
+func (tx *datastoreTx) clearBucket(name []byte) (reterr error) {
+
+	// Begin recovery preamble
+	if tx.db.isDatastoreFailed() {
+		return errDatastoreFailed
+	}
+	panicOnFault := debug.SetPanicOnFault(true)
+	defer debug.SetPanicOnFault(panicOnFault)
+	defer func() {
+		if r := recover(); r != nil {
+			tx.db.setDatastoreFailed(r)
+			reterr = errDatastoreFailed
+		}
+	}()
+	// End recovery preamble
+
 	err := tx.boltTx.DeleteBucket(name)
 	if err != nil {
 		return common.ContextError(err)
@@ -184,11 +303,41 @@ func (tx *datastoreTx) clearBucket(name []byte) error {
 	return nil
 }
 
-func (b *datastoreBucket) get(key []byte) []byte {
+func (b *datastoreBucket) get(key []byte) (retvalue []byte) {
+
+	// Begin recovery preamble
+	if b.db.isDatastoreFailed() {
+		return nil
+	}
+	panicOnFault := debug.SetPanicOnFault(true)
+	defer debug.SetPanicOnFault(panicOnFault)
+	defer func() {
+		if r := recover(); r != nil {
+			b.db.setDatastoreFailed(r)
+			retvalue = nil
+		}
+	}()
+	// End recovery preamble
+
 	return b.boltBucket.Get(key)
 }
 
-func (b *datastoreBucket) put(key, value []byte) error {
+func (b *datastoreBucket) put(key, value []byte) (reterr error) {
+
+	// Begin recovery preamble
+	if b.db.isDatastoreFailed() {
+		return errDatastoreFailed
+	}
+	panicOnFault := debug.SetPanicOnFault(true)
+	defer debug.SetPanicOnFault(panicOnFault)
+	defer func() {
+		if r := recover(); r != nil {
+			b.db.setDatastoreFailed(r)
+			reterr = errDatastoreFailed
+		}
+	}()
+	// End recovery preamble
+
 	err := b.boltBucket.Put(key, value)
 	if err != nil {
 		return common.ContextError(err)
@@ -196,7 +345,22 @@ func (b *datastoreBucket) put(key, value []byte) error {
 	return nil
 }
 
-func (b *datastoreBucket) delete(key []byte) error {
+func (b *datastoreBucket) delete(key []byte) (reterr error) {
+
+	// Begin recovery preamble
+	if b.db.isDatastoreFailed() {
+		return errDatastoreFailed
+	}
+	panicOnFault := debug.SetPanicOnFault(true)
+	defer debug.SetPanicOnFault(panicOnFault)
+	defer func() {
+		if r := recover(); r != nil {
+			b.db.setDatastoreFailed(r)
+			reterr = errDatastoreFailed
+		}
+	}()
+	// End recovery preamble
+
 	err := b.boltBucket.Delete(key)
 	if err != nil {
 		return common.ContextError(err)
@@ -204,25 +368,102 @@ func (b *datastoreBucket) delete(key []byte) error {
 	return nil
 }
 
-func (b *datastoreBucket) cursor() datastoreCursor {
-	return datastoreCursor{boltCursor: b.boltBucket.Cursor()}
+func (b *datastoreBucket) cursor() (retcursor datastoreCursor) {
+
+	// Begin recovery preamble
+	if b.db.isDatastoreFailed() {
+		return datastoreCursor{db: b.db, boltCursor: nil}
+	}
+	panicOnFault := debug.SetPanicOnFault(true)
+	defer debug.SetPanicOnFault(panicOnFault)
+	defer func() {
+		if r := recover(); r != nil {
+			b.db.setDatastoreFailed(r)
+			retcursor = datastoreCursor{db: b.db, boltCursor: nil}
+		}
+	}()
+	// End recovery preamble
+
+	return datastoreCursor{db: b.db, boltCursor: b.boltBucket.Cursor()}
 }
 
-func (c *datastoreCursor) firstKey() []byte {
+func (c *datastoreCursor) firstKey() (retkey []byte) {
+
+	// Begin recovery preamble
+	if c.db.isDatastoreFailed() {
+		return nil
+	}
+	panicOnFault := debug.SetPanicOnFault(true)
+	defer debug.SetPanicOnFault(panicOnFault)
+	defer func() {
+		if r := recover(); r != nil {
+			c.db.setDatastoreFailed(r)
+			retkey = nil
+		}
+	}()
+	// End recovery preamble
+
 	key, _ := c.boltCursor.First()
 	return key
 }
 
-func (c *datastoreCursor) nextKey() []byte {
+func (c *datastoreCursor) nextKey() (retkey []byte) {
+
+	// Begin recovery preamble
+	if c.db.isDatastoreFailed() {
+		return nil
+	}
+	panicOnFault := debug.SetPanicOnFault(true)
+	defer debug.SetPanicOnFault(panicOnFault)
+	defer func() {
+		if r := recover(); r != nil {
+			c.db.setDatastoreFailed(r)
+			retkey = nil
+		}
+	}()
+	// End recovery preamble
+
 	key, _ := c.boltCursor.Next()
 	return key
 }
 
-func (c *datastoreCursor) first() ([]byte, []byte) {
+func (c *datastoreCursor) first() (retkey, retvalue []byte) {
+
+	// Begin recovery preamble
+	if c.db.isDatastoreFailed() {
+		return nil, nil
+	}
+	panicOnFault := debug.SetPanicOnFault(true)
+	defer debug.SetPanicOnFault(panicOnFault)
+	defer func() {
+		if r := recover(); r != nil {
+			c.db.setDatastoreFailed(r)
+			retkey = nil
+			retvalue = nil
+		}
+	}()
+	// End recovery preamble
+
 	return c.boltCursor.First()
 }
 
-func (c *datastoreCursor) next() ([]byte, []byte) {
+func (c *datastoreCursor) next() (retkey, retvalue []byte) {
+
+	// Begin recovery preamble
+	if c.db.isDatastoreFailed() {
+		return nil, nil
+	}
+	panicOnFault := debug.SetPanicOnFault(true)
+	defer debug.SetPanicOnFault(panicOnFault)
+	defer func() {
+		if r := recover(); r != nil {
+			c.db.setDatastoreFailed(r)
+			retkey = nil
+			retvalue = nil
+		}
+	}()
+	// End recovery preamble
+
 	return c.boltCursor.Next()
 }
 

+ 4 - 2
psiphon/dialParameters.go

@@ -558,6 +558,7 @@ func MakeDialParameters(
 	// Initialize Dial/MeekConfigs to be passed to the corresponding dialers.
 
 	dialParams.dialConfig = &DialConfig{
+		DiagnosticID:                  serverEntry.GetDiagnosticID(),
 		UpstreamProxyURL:              config.UpstreamProxyURL,
 		CustomHeaders:                 dialCustomHeaders,
 		DeviceBinder:                  config.deviceBinder,
@@ -574,6 +575,7 @@ func MakeDialParameters(
 	if protocol.TunnelProtocolUsesMeek(dialParams.TunnelProtocol) {
 
 		dialParams.meekConfig = &MeekConfig{
+			DiagnosticID:                  serverEntry.GetDiagnosticID(),
 			ClientParameters:              config.clientParameters,
 			DialAddress:                   dialParams.MeekDialAddress,
 			UseQUIC:                       protocol.TunnelProtocolUsesFrontedMeekQUIC(dialParams.TunnelProtocol),
@@ -623,7 +625,7 @@ func (dialParams *DialParameters) Succeeded() {
 		return
 	}
 
-	NoticeInfo("Set dial parameters for %s", dialParams.ServerEntry.IpAddress)
+	NoticeInfo("Set dial parameters for %s", dialParams.ServerEntry.GetDiagnosticID())
 	err := SetDialParameters(dialParams.ServerEntry.IpAddress, dialParams.NetworkID, dialParams)
 	if err != nil {
 		NoticeAlert("SetDialParameters failed: %s", err)
@@ -648,7 +650,7 @@ func (dialParams *DialParameters) Failed(config *Config) {
 		!config.GetClientParametersSnapshot().WeightedCoinFlip(
 			parameters.ReplayRetainFailedProbability) {
 
-		NoticeInfo("Delete dial parameters for %s", dialParams.ServerEntry.IpAddress)
+		NoticeInfo("Delete dial parameters for %s", dialParams.ServerEntry.GetDiagnosticID())
 		err := DeleteDialParameters(dialParams.ServerEntry.IpAddress, dialParams.NetworkID)
 		if err != nil {
 			NoticeAlert("DeleteDialParameters failed: %s", err)

+ 4 - 1
psiphon/meekConn.go

@@ -65,6 +65,9 @@ const (
 // MeekConfig specifies the behavior of a MeekConn
 type MeekConfig struct {
 
+	// DiagnosticID is the server ID to record in any diagnostics notices.
+	DiagnosticID string
+
 	// ClientParameters is the active set of client parameters to use
 	// for the meek dial.
 	ClientParameters *parameters.ClientParameters
@@ -348,7 +351,7 @@ func DialMeek(
 		cachedTLSDialer = newCachedTLSDialer(preConn, tlsDialer)
 
 		if IsTLSConnUsingHTTP2(preConn) {
-			NoticeInfo("negotiated HTTP/2 for %s", meekConfig.DialAddress)
+			NoticeInfo("negotiated HTTP/2 for %s", meekConfig.DiagnosticID)
 			transport = &http2.Transport{
 				DialTLS: func(network, addr string, _ *tls.Config) (net.Conn, error) {
 					return cachedTLSDialer.dial(network, addr)

+ 1 - 1
psiphon/memory_test/memory_test.go

@@ -85,7 +85,7 @@ func runMemoryTest(t *testing.T, testMode int) {
 	}
 	defer os.RemoveAll(testDataDirName)
 
-	psiphon.SetEmitDiagnosticNotices(true)
+	psiphon.SetEmitDiagnosticNotices(true, true)
 
 	configJSON, err := ioutil.ReadFile("../controller_test.config")
 	if err != nil {

+ 11 - 0
psiphon/net.go

@@ -30,6 +30,7 @@ import (
 	"net"
 	"net/http"
 	"os"
+	"strings"
 	"sync"
 	"sync/atomic"
 	"time"
@@ -45,6 +46,9 @@ const DNS_PORT = 53
 // of a Psiphon dialer (TCPDial, UDPDial, MeekDial, etc.)
 type DialConfig struct {
 
+	// DiagnosticID is the server ID to record in any diagnostics notices.
+	DiagnosticID string
+
 	// UpstreamProxyURL specifies a proxy to connect through.
 	// E.g., "http://proxyhost:8080"
 	//       "socks5://user:password@proxyhost:1080"
@@ -527,6 +531,13 @@ func ResumeDownload(
 		err = fmt.Errorf("unexpected response status code: %d", response.StatusCode)
 	}
 	if err != nil {
+
+		// Redact URL from "net/http" error message.
+		if !GetEmitNetworkParameters() {
+			errStr := err.Error()
+			err = errors.New(strings.ReplaceAll(errStr, downloadURL, "[redacted]"))
+		}
+
 		return 0, "", common.ContextError(err)
 	}
 	defer response.Body.Close()

+ 118 - 80
psiphon/notice.go

@@ -35,7 +35,8 @@ import (
 )
 
 type noticeLogger struct {
-	logDiagnostics             int32
+	emitDiagnostics            int32
+	emitNetworkParameters      int32
 	mutex                      sync.Mutex
 	writer                     io.Writer
 	homepageFilename           string
@@ -53,23 +54,40 @@ var singletonNoticeLogger = noticeLogger{
 	writer: os.Stderr,
 }
 
-// SetEmitDiagnosticNotices toggles whether diagnostic notices
-// are emitted. Diagnostic notices contain potentially sensitive
-// circumvention network information; only enable this in environments
-// where notices are handled securely (for example, don't include these
-// notices in log files which users could post to public forums).
-func SetEmitDiagnosticNotices(enable bool) {
-	if enable {
-		atomic.StoreInt32(&singletonNoticeLogger.logDiagnostics, 1)
+// SetEmitDiagnosticNotices toggles whether diagnostic notices are emitted;
+// and whether to include circumvention network parameters in diagnostics.
+//
+// Diagnostic notices contain potentially sensitive user information; and
+// sensitive circumvention network parameters, when enabled. Only enable this
+// in environments where notices are handled securely (for example, don't
+// include these notices in log files which users could post to public
+// forums).
+func SetEmitDiagnosticNotices(
+	emitDiagnostics bool, emitNetworkParameters bool) {
+
+	if emitDiagnostics {
+		atomic.StoreInt32(&singletonNoticeLogger.emitDiagnostics, 1)
+	} else {
+		atomic.StoreInt32(&singletonNoticeLogger.emitDiagnostics, 0)
+	}
+
+	if emitNetworkParameters {
+		atomic.StoreInt32(&singletonNoticeLogger.emitNetworkParameters, 1)
 	} else {
-		atomic.StoreInt32(&singletonNoticeLogger.logDiagnostics, 0)
+		atomic.StoreInt32(&singletonNoticeLogger.emitNetworkParameters, 0)
 	}
 }
 
-// GetEmitDiagnoticNotices returns the current state
+// GetEmitDiagnosticNotices returns the current state
 // of emitting diagnostic notices.
-func GetEmitDiagnoticNotices() bool {
-	return atomic.LoadInt32(&singletonNoticeLogger.logDiagnostics) == 1
+func GetEmitDiagnosticNotices() bool {
+	return atomic.LoadInt32(&singletonNoticeLogger.emitDiagnostics) == 1
+}
+
+// GetEmitNetworkParameters returns the current state
+// of emitting network parameters.
+func GetEmitNetworkParameters() bool {
+	return atomic.LoadInt32(&singletonNoticeLogger.emitNetworkParameters) == 1
 }
 
 // SetNoticeWriter sets a target writer to receive notices. By default,
@@ -188,7 +206,7 @@ const (
 // outputNotice encodes a notice in JSON and writes it to the output writer.
 func (nl *noticeLogger) outputNotice(noticeType string, noticeFlags uint32, args ...interface{}) {
 
-	if (noticeFlags&noticeIsDiagnostic != 0) && atomic.LoadInt32(&nl.logDiagnostics) != 1 {
+	if (noticeFlags&noticeIsDiagnostic != 0) && !GetEmitDiagnosticNotices() {
 		return
 	}
 
@@ -218,6 +236,11 @@ func (nl *noticeLogger) outputNotice(noticeType string, noticeFlags uint32, args
 			fmt.Sprintf("marshal notice failed: %s", common.ContextError(err)))
 	}
 
+	// Ensure direct server IPs are not exposed in notices. The "net" package,
+	// and possibly other 3rd party packages, will include destination addresses
+	// in I/O error messages.
+	output = StripIPAddresses(output)
+
 	nl.mutex.Lock()
 	defer nl.mutex.Unlock()
 
@@ -404,78 +427,81 @@ func NoticeAvailableEgressRegions(regions []string) {
 func noticeWithDialParameters(noticeType string, dialParams *DialParameters) {
 
 	args := []interface{}{
-		"ipAddress", dialParams.ServerEntry.IpAddress,
+		"diagnosticID", dialParams.ServerEntry.GetDiagnosticID(),
 		"region", dialParams.ServerEntry.Region,
 		"protocol", dialParams.TunnelProtocol,
 		"isReplay", dialParams.IsReplay,
 	}
 
-	if dialParams.SelectedSSHClientVersion {
-		args = append(args, "SSHClientVersion", dialParams.SSHClientVersion)
-	}
+	if GetEmitNetworkParameters() {
 
-	if dialParams.UpstreamProxyType != "" {
-		args = append(args, "upstreamProxyType", dialParams.UpstreamProxyType)
-	}
+		if dialParams.SelectedSSHClientVersion {
+			args = append(args, "SSHClientVersion", dialParams.SSHClientVersion)
+		}
 
-	if dialParams.UpstreamProxyCustomHeaderNames != nil {
-		args = append(args, "upstreamProxyCustomHeaderNames", strings.Join(dialParams.UpstreamProxyCustomHeaderNames, ","))
-	}
+		if dialParams.UpstreamProxyType != "" {
+			args = append(args, "upstreamProxyType", dialParams.UpstreamProxyType)
+		}
 
-	if dialParams.MeekDialAddress != "" {
-		args = append(args, "meekDialAddress", dialParams.MeekDialAddress)
-	}
+		if dialParams.UpstreamProxyCustomHeaderNames != nil {
+			args = append(args, "upstreamProxyCustomHeaderNames", strings.Join(dialParams.UpstreamProxyCustomHeaderNames, ","))
+		}
 
-	meekResolvedIPAddress := dialParams.MeekResolvedIPAddress.Load().(string)
-	if meekResolvedIPAddress != "" {
-		args = append(args, "meekResolvedIPAddress", meekResolvedIPAddress)
-	}
+		if dialParams.MeekDialAddress != "" {
+			args = append(args, "meekDialAddress", dialParams.MeekDialAddress)
+		}
 
-	if dialParams.MeekSNIServerName != "" {
-		args = append(args, "meekSNIServerName", dialParams.MeekSNIServerName)
-	}
+		meekResolvedIPAddress := dialParams.MeekResolvedIPAddress.Load().(string)
+		if meekResolvedIPAddress != "" {
+			args = append(args, "meekResolvedIPAddress", meekResolvedIPAddress)
+		}
 
-	if dialParams.MeekHostHeader != "" {
-		args = append(args, "meekHostHeader", dialParams.MeekHostHeader)
-	}
+		if dialParams.MeekSNIServerName != "" {
+			args = append(args, "meekSNIServerName", dialParams.MeekSNIServerName)
+		}
 
-	// MeekTransformedHostName is meaningful when meek is used, which is when MeekDialAddress != ""
-	if dialParams.MeekDialAddress != "" {
-		args = append(args, "meekTransformedHostName", dialParams.MeekTransformedHostName)
-	}
+		if dialParams.MeekHostHeader != "" {
+			args = append(args, "meekHostHeader", dialParams.MeekHostHeader)
+		}
 
-	if dialParams.SelectedUserAgent {
-		args = append(args, "userAgent", dialParams.UserAgent)
-	}
+		// MeekTransformedHostName is meaningful when meek is used, which is when MeekDialAddress != ""
+		if dialParams.MeekDialAddress != "" {
+			args = append(args, "meekTransformedHostName", dialParams.MeekTransformedHostName)
+		}
 
-	if dialParams.SelectedTLSProfile {
-		args = append(args, "TLSProfile", dialParams.TLSProfile)
-		args = append(args, "TLSVersion", dialParams.TLSVersion)
-	}
+		if dialParams.SelectedUserAgent {
+			args = append(args, "userAgent", dialParams.UserAgent)
+		}
 
-	if dialParams.DialPortNumber != "" {
-		args = append(args, "dialPortNumber", dialParams.DialPortNumber)
-	}
+		if dialParams.SelectedTLSProfile {
+			args = append(args, "TLSProfile", dialParams.TLSProfile)
+			args = append(args, "TLSVersion", dialParams.TLSVersion)
+		}
 
-	if dialParams.QUICVersion != "" {
-		args = append(args, "QUICVersion", dialParams.QUICVersion)
-	}
+		if dialParams.DialPortNumber != "" {
+			args = append(args, "dialPortNumber", dialParams.DialPortNumber)
+		}
 
-	if dialParams.QUICDialSNIAddress != "" {
-		args = append(args, "QUICDialSNIAddress", dialParams.QUICDialSNIAddress)
-	}
+		if dialParams.QUICVersion != "" {
+			args = append(args, "QUICVersion", dialParams.QUICVersion)
+		}
 
-	if dialParams.DialConnMetrics != nil {
-		metrics := dialParams.DialConnMetrics.GetMetrics()
-		for name, value := range metrics {
-			args = append(args, name, value)
+		if dialParams.QUICDialSNIAddress != "" {
+			args = append(args, "QUICDialSNIAddress", dialParams.QUICDialSNIAddress)
 		}
-	}
 
-	if dialParams.ObfuscatedSSHConnMetrics != nil {
-		metrics := dialParams.ObfuscatedSSHConnMetrics.GetMetrics()
-		for name, value := range metrics {
-			args = append(args, name, value)
+		if dialParams.DialConnMetrics != nil {
+			metrics := dialParams.DialConnMetrics.GetMetrics()
+			for name, value := range metrics {
+				args = append(args, name, value)
+			}
+		}
+
+		if dialParams.ObfuscatedSSHConnMetrics != nil {
+			metrics := dialParams.ObfuscatedSSHConnMetrics.GetMetrics()
+			for name, value := range metrics {
+				args = append(args, name, value)
+			}
 		}
 	}
 
@@ -505,10 +531,10 @@ func NoticeRequestedTactics(dialParams *DialParameters) {
 }
 
 // NoticeActiveTunnel is a successful connection that is used as an active tunnel for port forwarding
-func NoticeActiveTunnel(ipAddress, protocol string, isTCS bool) {
+func NoticeActiveTunnel(diagnosticID, protocol string, isTCS bool) {
 	singletonNoticeLogger.outputNotice(
 		"ActiveTunnel", noticeIsDiagnostic,
-		"ipAddress", ipAddress,
+		"diagnosticID", diagnosticID,
 		"protocol", protocol,
 		"isTCS", isTCS)
 }
@@ -642,25 +668,24 @@ func NoticeClientUpgradeDownloaded(filename string) {
 }
 
 // NoticeBytesTransferred reports how many tunneled bytes have been
-// transferred since the last NoticeBytesTransferred, for the tunnel
-// to the server at ipAddress. This is not a diagnostic notice: the
-// user app has requested this notice with EmitBytesTransferred for
-// functionality such as traffic display; and this frequent notice
-// is not intended to be included with feedback.
-func NoticeBytesTransferred(ipAddress string, sent, received int64) {
+// transferred since the last NoticeBytesTransferred. This is not a diagnostic
+// notice: the user app has requested this notice with EmitBytesTransferred
+// for functionality such as traffic display; and this frequent notice is not
+// intended to be included with feedback.
+func NoticeBytesTransferred(diagnosticID string, sent, received int64) {
 	singletonNoticeLogger.outputNotice(
 		"BytesTransferred", 0,
+		"diagnosticID", diagnosticID,
 		"sent", sent,
 		"received", received)
 }
 
 // NoticeTotalBytesTransferred reports how many tunneled bytes have been
-// transferred in total up to this point, for the tunnel to the server
-// at ipAddress. This is a diagnostic notice.
-func NoticeTotalBytesTransferred(ipAddress string, sent, received int64) {
+// transferred in total up to this point. This is a diagnostic notice.
+func NoticeTotalBytesTransferred(diagnosticID string, sent, received int64) {
 	singletonNoticeLogger.outputNotice(
 		"TotalBytesTransferred", noticeIsDiagnostic,
-		"ipAddress", ipAddress,
+		"diagnosticID", diagnosticID,
 		"sent", sent,
 		"received", received)
 }
@@ -701,6 +726,9 @@ func NoticeExiting() {
 
 // NoticeRemoteServerListResourceDownloadedBytes reports remote server list download progress.
 func NoticeRemoteServerListResourceDownloadedBytes(url string, bytes int64) {
+	if !GetEmitNetworkParameters() {
+		url = "[redacted]"
+	}
 	singletonNoticeLogger.outputNotice(
 		"RemoteServerListResourceDownloadedBytes", noticeIsDiagnostic,
 		"url", url,
@@ -710,6 +738,9 @@ func NoticeRemoteServerListResourceDownloadedBytes(url string, bytes int64) {
 // NoticeRemoteServerListResourceDownloaded indicates that a remote server list download
 // completed successfully.
 func NoticeRemoteServerListResourceDownloaded(url string) {
+	if !GetEmitNetworkParameters() {
+		url = "[redacted]"
+	}
 	singletonNoticeLogger.outputNotice(
 		"RemoteServerListResourceDownloaded", noticeIsDiagnostic,
 		"url", url)
@@ -757,10 +788,10 @@ func NoticeNetworkID(networkID string) {
 		"NetworkID", 0, "ID", networkID)
 }
 
-func NoticeLivenessTest(ipAddress string, metrics *livenessTestMetrics, success bool) {
+func NoticeLivenessTest(diagnosticID string, metrics *livenessTestMetrics, success bool) {
 	singletonNoticeLogger.outputNotice(
 		"LivenessTest", noticeIsDiagnostic,
-		"ipAddress", ipAddress,
+		"diagnosticID", diagnosticID,
 		"metrics", metrics,
 		"success", success)
 }
@@ -779,6 +810,13 @@ func NoticeEstablishTunnelTimeout(timeout time.Duration) {
 		"timeout", timeout)
 }
 
+func NoticeFragmentor(diagnosticID string, message string) {
+	singletonNoticeLogger.outputNotice(
+		"Fragmentor", noticeIsDiagnostic,
+		"diagnosticID", diagnosticID,
+		"message", message)
+}
+
 type repetitiveNoticeState struct {
 	message string
 	repeats int

+ 6 - 1
psiphon/remoteServerList.go

@@ -25,6 +25,7 @@ import (
 	"errors"
 	"fmt"
 	"os"
+	"sync/atomic"
 	"time"
 
 	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common"
@@ -188,10 +189,14 @@ func FetchObfuscatedServerLists(
 		registryFilename = downloadFilename
 	}
 
+	// Prevent excessive notice noise in cases such as a general database
+	// failure, as GetSLOK may be called thousands of times per fetch.
+	emittedGetSLOKAlert := int32(0)
+
 	lookupSLOKs := func(slokID []byte) []byte {
 		// Lookup SLOKs in local datastore
 		key, err := GetSLOK(slokID)
-		if err != nil {
+		if err != nil && atomic.CompareAndSwapInt32(&emittedGetSLOKAlert, 0, 1) {
 			NoticeAlert("GetSLOK failed: %s", err)
 		}
 		return key

+ 1 - 1
psiphon/server/server_test.go

@@ -80,7 +80,7 @@ func TestMain(m *testing.M) {
 	}
 	defer os.RemoveAll(testDataDirName)
 
-	psiphon.SetEmitDiagnosticNotices(true)
+	psiphon.SetEmitDiagnosticNotices(true, true)
 
 	mockWebServerURL, mockWebServerExpectedResponse = runMockWebServer()
 

+ 2 - 2
psiphon/server/tunnelServer.go

@@ -297,7 +297,7 @@ func (server *TunnelServer) GetClientHandshaked(
 	return server.sshServer.getClientHandshaked(sessionID)
 }
 
-// UpdateClientAPIParameters updates the recorded handhake API parameters for
+// UpdateClientAPIParameters updates the recorded handshake API parameters for
 // the client corresponding to sessionID.
 func (server *TunnelServer) UpdateClientAPIParameters(
 	sessionID string,
@@ -431,7 +431,7 @@ func (sshServer *sshServer) runListener(
 	handleClient := func(clientTunnelProtocol string, clientConn net.Conn) {
 
 		// Note: establish tunnel limiter cannot simply stop TCP
-		// listeners in all cases (e.g., meek) since SSH tunnel can
+		// listeners in all cases (e.g., meek) since SSH tunnels can
 		// span multiple TCP connections.
 
 		if !sshServer.getEstablishTunnels() {

+ 7 - 9
psiphon/serverApi.go

@@ -32,7 +32,6 @@ import (
 	"net"
 	"net/http"
 	"net/url"
-	"regexp"
 	"strconv"
 	"strings"
 
@@ -616,13 +615,6 @@ func RecordRemoteServerListStat(
 		config, datastorePersistentStatTypeRemoteServerList, remoteServerListStatJson)
 }
 
-// failedTunnelErrStripAddressRegex strips IPv4 address [and optional port]
-// strings from "net" package I/O error messages. This is to avoid
-// inadvertently recording direct server IPs via error message logs, and to
-// reduce the error space due to superfluous source port data.
-var failedTunnelErrStripAddressRegex = regexp.MustCompile(
-	`(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}(:(6553[0-5]|655[0-2][0-9]\d|65[0-4](\d){2}|6[0-4](\d){3}|[1-5](\d){4}|[1-9](\d){0,3}))?`)
-
 // RecordFailedTunnelStat records metrics for a failed tunnel dial, including
 // dial parameters and error condition (tunnelErr).
 //
@@ -647,7 +639,13 @@ func RecordFailedTunnelStat(
 	params["server_entry_tag"] = dialParams.ServerEntry.Tag
 	params["last_connected"] = lastConnected
 	params["client_failed_timestamp"] = common.TruncateTimestampToHour(common.GetCurrentTimestamp())
-	params["tunnel_error"] = failedTunnelErrStripAddressRegex.ReplaceAllString(tunnelErr.Error(), "<address>")
+
+	// Ensure direct server IPs are not exposed in logs. The "net" package, and
+	// possibly other 3rd party packages, will include destination addresses in
+	// I/O error messages.
+	tunnelError := StripIPAddressesString(tunnelErr.Error())
+
+	params["tunnel_error"] = tunnelError
 
 	failedTunnelStatJson, err := json.Marshal(params)
 	if err != nil {

+ 9 - 9
psiphon/tunnel.go

@@ -174,7 +174,7 @@ func (tunnel *Tunnel) Activate(
 	if !tunnel.config.DisableApi {
 		NoticeInfo(
 			"starting server context for %s",
-			tunnel.dialParams.ServerEntry.IpAddress)
+			tunnel.dialParams.ServerEntry.GetDiagnosticID())
 
 		// Call NewServerContext in a goroutine, as it blocks on a network operation,
 		// the handshake request, and would block shutdown. If the shutdown signal is
@@ -224,7 +224,7 @@ func (tunnel *Tunnel) Activate(
 		if result.err != nil {
 			return common.ContextError(
 				fmt.Errorf("error starting server context for %s: %s",
-					tunnel.dialParams.ServerEntry.IpAddress, result.err))
+					tunnel.dialParams.ServerEntry.GetDiagnosticID(), result.err))
 		}
 
 		serverContext = result.serverContext
@@ -829,7 +829,7 @@ func dialTunnel(
 				// Skip notice when cancelling.
 				if baseCtx.Err() == nil {
 					NoticeLivenessTest(
-						dialParams.ServerEntry.IpAddress, metrics, err == nil)
+						dialParams.ServerEntry.GetDiagnosticID(), metrics, err == nil)
 				}
 			}
 		}
@@ -1101,14 +1101,14 @@ func (tunnel *Tunnel) operateTunnel(tunnelOwner TunnelOwner) {
 
 			if lastTotalBytesTransferedTime.Add(noticePeriod).Before(monotime.Now()) {
 				NoticeTotalBytesTransferred(
-					tunnel.dialParams.ServerEntry.IpAddress, totalSent, totalReceived)
+					tunnel.dialParams.ServerEntry.GetDiagnosticID(), totalSent, totalReceived)
 				lastTotalBytesTransferedTime = monotime.Now()
 			}
 
 			// Only emit the frequent BytesTransferred notice when tunnel is not idle.
 			if tunnel.config.EmitBytesTransferred && (sent > 0 || received > 0) {
 				NoticeBytesTransferred(
-					tunnel.dialParams.ServerEntry.IpAddress, sent, received)
+					tunnel.dialParams.ServerEntry.GetDiagnosticID(), sent, received)
 			}
 
 			// Once the tunnel has connected, activated, and successfully
@@ -1147,7 +1147,7 @@ func (tunnel *Tunnel) operateTunnel(tunnelOwner TunnelOwner) {
 			// Note: no mutex on portForwardFailureTotal; only referenced here
 			tunnel.totalPortForwardFailures++
 			NoticeInfo("port forward failures for %s: %d",
-				tunnel.dialParams.ServerEntry.IpAddress,
+				tunnel.dialParams.ServerEntry.GetDiagnosticID(),
 				tunnel.totalPortForwardFailures)
 
 			// If the underlying Conn has closed (meek and other plugin protocols may close
@@ -1202,7 +1202,7 @@ func (tunnel *Tunnel) operateTunnel(tunnelOwner TunnelOwner) {
 
 	// Always emit a final NoticeTotalBytesTransferred
 	NoticeTotalBytesTransferred(
-		tunnel.dialParams.ServerEntry.IpAddress, totalSent, totalReceived)
+		tunnel.dialParams.ServerEntry.GetDiagnosticID(), totalSent, totalReceived)
 
 	if err == nil {
 		NoticeInfo("shutdown operate tunnel")
@@ -1216,7 +1216,7 @@ func (tunnel *Tunnel) operateTunnel(tunnelOwner TunnelOwner) {
 
 	} else {
 		NoticeAlert("operate tunnel error for %s: %s",
-			tunnel.dialParams.ServerEntry.IpAddress, err)
+			tunnel.dialParams.ServerEntry.GetDiagnosticID(), err)
 		tunnelOwner.SignalTunnelFailure(tunnel)
 	}
 }
@@ -1312,7 +1312,7 @@ func sendStats(tunnel *Tunnel) bool {
 	err := tunnel.serverContext.DoStatusRequest(tunnel)
 	if err != nil {
 		NoticeAlert("DoStatusRequest failed for %s: %s",
-			tunnel.dialParams.ServerEntry.IpAddress, err)
+			tunnel.dialParams.ServerEntry.GetDiagnosticID(), err)
 	}
 
 	return err == nil

+ 52 - 0
psiphon/utils.go

@@ -27,8 +27,10 @@ import (
 	"net"
 	"net/url"
 	"os"
+	"regexp"
 	"runtime"
 	"runtime/debug"
+	"strings"
 	"syscall"
 	"time"
 
@@ -111,6 +113,56 @@ func IsAddressInUseError(err error) bool {
 	return false
 }
 
+var stripIPv4AddressRegex = regexp.MustCompile(
+	`(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}(:(6553[0-5]|655[0-2][0-9]\d|65[0-4](\d){2}|6[0-4](\d){3}|[1-5](\d){4}|[1-9](\d){0,3}))?`)
+
+// StripIPAddresses returns a copy of the input with all IP addresses [and
+// optional ports] replaced  by "[address]". This is intended to be used to
+// strip addresses from "net" package I/O error messages and otherwise avoid
+// inadvertently recording direct server IPs via error message logs; and, in
+// metrics, to reduce the error space due to superfluous source port data.
+//
+// Limitation: only strips IPv4 addresses.
+func StripIPAddresses(b []byte) []byte {
+	// TODO: IPv6 support
+	return stripIPv4AddressRegex.ReplaceAll(b, []byte("[redacted]"))
+}
+
+// StripIPAddressesString is StripIPAddresses for strings.
+func StripIPAddressesString(s string) string {
+	// TODO: IPv6 support
+	return stripIPv4AddressRegex.ReplaceAllString(s, "[redacted]")
+}
+
+// RedactNetError removes network address information from a "net" package
+// error message. Addresses may be domains or IP addresses.
+//
+// Limitations: some non-address error context can be lost; this function
+// makes assumptions about how the Go "net" package error messages are
+// formatted and will fail to redact network addresses if this assumptions
+// become untrue.
+func RedactNetError(err error) error {
+
+	// Example "net" package error messages:
+	//
+	// - lookup <domain>: no such host
+	// - lookup <domain>: No address associated with hostname
+	// - dial tcp <address>: connectex: No connection could be made because the target machine actively refused it
+	// - write tcp <address>-><address>: write: connection refused
+
+	if err == nil {
+		return err
+	}
+
+	errstr := err.Error()
+	index := strings.Index(errstr, ": ")
+	if index == -1 {
+		return err
+	}
+
+	return errors.New("[redacted]" + errstr[index:])
+}
+
 // SyncFileWriter wraps a file and exposes an io.Writer. At predefined
 // steps, the file is synced (flushed to disk) while writing.
 type SyncFileWriter struct {

+ 33 - 3
vendor/github.com/Psiphon-Labs/bolt/db.go

@@ -110,6 +110,10 @@ type DB struct {
 	freelist *freelist
 	stats    Stats
 
+	// [Psiphon]
+	// https://github.com/etcd-io/bbolt/commit/b3e98dcb3752e0a8d5db6503b80fe19e462fdb73
+	mmapErr error // set on mmap failure; subsequently returned by all methods
+
 	pagePool sync.Pool
 
 	batchMu sync.Mutex
@@ -275,7 +279,13 @@ func (db *DB) mmap(minsz int) error {
 
 	// Memory-map the data file as a byte slice.
 	if err := mmap(db, size); err != nil {
-		return err
+
+		// [Psiphon]
+		// https://github.com/etcd-io/bbolt/commit/b3e98dcb3752e0a8d5db6503b80fe19e462fdb73
+		// If mmap fails, we cannot safely continue. Mark the db as unusable,
+		// causing all future calls to return the mmap error.
+		db.mmapErr = MmapError(err.Error())
+		return db.mmapErr
 	}
 
 	// Save references to the meta pages.
@@ -395,8 +405,11 @@ func (db *DB) Close() error {
 	db.metalock.Lock()
 	defer db.metalock.Unlock()
 
-	db.mmaplock.RLock()
-	defer db.mmaplock.RUnlock()
+	// [Psiphon]
+	// https://github.com/etcd-io/bbolt/commit/e06ec0a754bc30c2e17ad871962e71635bf94d45
+	// "Fix Close() to wait for view transactions by getting a full lock on mmaplock"
+	db.mmaplock.Lock()
+	defer db.mmaplock.Unlock()
 
 	return db.close()
 }
@@ -481,6 +494,15 @@ func (db *DB) beginTx() (*Tx, error) {
 		return nil, ErrDatabaseNotOpen
 	}
 
+	// [Psiphon]
+	// https://github.com/etcd-io/bbolt/commit/b3e98dcb3752e0a8d5db6503b80fe19e462fdb73
+	// Return mmap error if a previous mmap failed.
+	if db.mmapErr != nil {
+		db.mmaplock.RUnlock()
+		db.metalock.Unlock()
+		return nil, db.mmapErr
+	}
+
 	// Create a transaction associated with the database.
 	t := &Tx{}
 	t.init(db)
@@ -522,6 +544,14 @@ func (db *DB) beginRWTx() (*Tx, error) {
 		return nil, ErrDatabaseNotOpen
 	}
 
+	// [Psiphon]
+	// https://github.com/etcd-io/bbolt/commit/b3e98dcb3752e0a8d5db6503b80fe19e462fdb73
+	// Return mmap error if a previous mmap failed.
+	if db.mmapErr != nil {
+		db.rwlock.Unlock()
+		return nil, db.mmapErr
+	}
+
 	// Create a transaction associated with the database.
 	t := &Tx{writable: true}
 	t.init(db)

+ 10 - 0
vendor/github.com/Psiphon-Labs/bolt/errors.go

@@ -69,3 +69,13 @@ var (
 	// non-bucket key on an existing bucket key.
 	ErrIncompatibleValue = errors.New("incompatible value")
 )
+
+// [Psiphon]
+// https://github.com/etcd-io/bbolt/commit/b3e98dcb3752e0a8d5db6503b80fe19e462fdb73
+
+// MmapError represents an error resulting from a failed mmap call. Typically,
+// this error means that no further database writes will be possible. The most
+// common cause is insufficient disk space.
+type MmapError string
+
+func (e MmapError) Error() string { return string(e) }

+ 13 - 10
vendor/github.com/Psiphon-Labs/bolt/tx.go

@@ -250,6 +250,15 @@ func (tx *Tx) rollback() {
 	if tx.db == nil {
 		return
 	}
+
+	// [Psiphon]
+	// https://github.com/etcd-io/bbolt/commit/b3e98dcb3752e0a8d5db6503b80fe19e462fdb73
+	// If the transaction failed due to mmap, rollback is futile.
+	if tx.db.mmapErr != nil {
+		tx.close()
+		return
+	}
+
 	if tx.writable {
 		tx.db.freelist.rollback(tx.meta.txid)
 		tx.db.freelist.reload(tx.db.page(tx.db.meta().freelist))
@@ -379,18 +388,12 @@ func (tx *Tx) Check() <-chan error {
 }
 
 // [Psiphon]
-// SynchronousCheck performs the Check function in the current goroutine and recovers
-// from any panics, such as the panic in Cursor.search().
-func (tx *Tx) SynchronousCheck() (reterr error) {
-	defer func() {
-		if e := recover(); e != nil {
-			reterr = fmt.Errorf("SynchronousCheck panic: %s", e)
-		}
-	}()
+// SynchronousCheck performs the Check function in the current goroutine,
+// allowing the caller to recover from any panics or faults.
+func (tx *Tx) SynchronousCheck() error {
 	ch := make(chan error)
 	tx.check(ch)
-	reterr = <-ch
-	return
+	return <-ch
 }
 
 func (tx *Tx) check(ch chan error) {

+ 3 - 3
vendor/vendor.json

@@ -33,10 +33,10 @@
 			"revisionTime": "2017-02-28T16:03:01Z"
 		},
 		{
-			"checksumSHA1": "3dYPdIMg6hc6whNAEXrMm09WFW0=",
+			"checksumSHA1": "dVfefYjTFuN1AKIJyCcqNrlLEJo=",
 			"path": "github.com/Psiphon-Labs/bolt",
-			"revision": "c6e046a80d4b4c6c87b9563b9dd92098f4f6f50a",
-			"revisionTime": "2017-08-14T17:37:24Z"
+			"revision": "b7c055003ce9d8a1bddb9416648cb7d955a3148d",
+			"revisionTime": "2019-07-19T17:53:53Z"
 		},
 		{
 			"checksumSHA1": "d3DwsdacdFn1/KCG/2uPV1PwR3s=",