Просмотр исходного кода

Add protobuf support for panicwrap logs

Rod Hynes 4 месяцев назад
Родитель
Сommit
449cfed680

+ 123 - 39
Server/main.go

@@ -20,6 +20,7 @@
 package main
 
 import (
+	"context"
 	"encoding/json"
 	"flag"
 	"fmt"
@@ -33,10 +34,15 @@ import (
 	"time"
 
 	"github.com/Psiphon-Inc/rotate-safe-writer"
+	udsipc "github.com/Psiphon-Inc/uds-ipc"
 	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common"
 	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/buildinfo"
+	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/errors"
 	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/server"
+	pb "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/server/pb/psiphond"
 	"github.com/mitchellh/panicwrap"
+	"google.golang.org/protobuf/proto"
+	"google.golang.org/protobuf/types/known/timestamppb"
 )
 
 var loadedConfigJSON []byte
@@ -277,56 +283,134 @@ func (list *stringListFlag) Set(flagValue string) error {
 }
 
 func panicHandler(output string) {
-	if len(loadedConfigJSON) > 0 {
-		config, err := server.LoadConfig([]byte(loadedConfigJSON))
+
+	// As this is the parent panicwrap process, no config or logging is
+	// initialized. Each of the JSON and protobuf panic logging helpers
+	// directly initialize the bare minimum required to emit the single panic
+	// log.
+
+	if len(loadedConfigJSON) == 0 {
+		fmt.Printf("no configuration JSON was loaded, cannot continue\n%s\n", output)
+		os.Exit(1)
+	}
+
+	config, err := server.LoadConfig([]byte(loadedConfigJSON))
+	if err != nil {
+		fmt.Printf("error parsing configuration file: %s\n%s\n", err, output)
+		os.Exit(1)
+	}
+
+	if config.LogFormat == "protobuf" || config.LogFormat == "both" {
+		err := panicHandlerProtobuf(config, output)
 		if err != nil {
-			fmt.Printf("error parsing configuration file: %s\n%s\n", err, output)
-			os.Exit(1)
+			fmt.Printf("error logging panic: %s\n%s\n", err, output)
+			// continue
 		}
+	}
 
-		logEvent := make(map[string]string)
-		logEvent["host_id"] = config.HostID
-		logEvent["build_rev"] = buildinfo.GetBuildInfo().BuildRev
-		logEvent["timestamp"] = time.Now().Format(time.RFC3339)
-		logEvent["event_name"] = "server_panic"
-
-		// ELK has a maximum field length of 32766 UTF8 bytes. Over that length, the
-		// log won't be delivered. Truncate the panic field, as it may be much longer.
-		maxLen := 32766
-		if len(output) > maxLen {
-			output = output[:maxLen]
-		}
+	// To maximize the chance of recording a panic log, always emit a classic
+	// log even in "protobuf" mode.
 
-		logEvent["panic"] = output
+	err = panicHandlerJSON(config, output)
+	if err != nil {
+		fmt.Printf("error logging panic: %s\n%s\n", err, output)
+		// continue
+	}
 
-		// Logs are written to the configured file name. If no name is specified, logs are written to stderr
-		var jsonWriter io.Writer
-		if config.LogFilename != "" {
+	os.Exit(1)
+}
 
-			retries, create, mode := config.GetLogFileReopenConfig()
-			panicLog, err := rotate.NewRotatableFileWriter(
-				config.LogFilename, retries, create, mode)
-			if err != nil {
-				fmt.Printf("unable to set panic log output: %s\n%s\n", err, output)
-				os.Exit(1)
-			}
-			defer panicLog.Close()
+func panicHandlerJSON(config *server.Config, output string) error {
 
-			jsonWriter = panicLog
-		} else {
-			jsonWriter = os.Stderr
-		}
+	logEvent := make(map[string]string)
+	logEvent["timestamp"] = time.Now().Format(time.RFC3339)
+	logEvent["host_id"] = config.HostID
+	logEvent["build_rev"] = buildinfo.GetBuildInfo().BuildRev
+	if config.HostProvider != "" {
+		logEvent["provider"] = config.HostProvider
+	}
+	logEvent["event_name"] = "server_panic"
+
+	// ELK has a maximum field length of 32766 UTF8 bytes. Over that length, the
+	// log won't be delivered. Truncate the panic field, as it may be much longer.
+	maxLen := 32766
+	if len(output) > maxLen {
+		output = output[:maxLen]
+	}
 
-		enc := json.NewEncoder(jsonWriter)
-		err = enc.Encode(logEvent)
+	logEvent["panic"] = output
+
+	// Logs are written to the configured file name. If no name is specified,
+	// logs are written to stderr.
+	var jsonWriter io.Writer
+	if config.LogFilename != "" {
+
+		retries, create, mode := config.GetLogFileReopenConfig()
+		panicLog, err := rotate.NewRotatableFileWriter(
+			config.LogFilename, retries, create, mode)
 		if err != nil {
-			fmt.Printf("unable to serialize panic message to JSON: %s\n%s\n", err, output)
-			os.Exit(1)
+			return errors.Trace(err)
 		}
+		defer panicLog.Close()
+
+		jsonWriter = panicLog
 	} else {
-		fmt.Printf("no configuration JSON was loaded, cannot continue\n%s\n", output)
-		os.Exit(1)
+		jsonWriter = os.Stderr
 	}
 
-	os.Exit(1)
+	err := json.NewEncoder(jsonWriter).Encode(logEvent)
+	if err != nil {
+		return errors.Trace(err)
+	}
+
+	return nil
+}
+
+func panicHandlerProtobuf(config *server.Config, output string) error {
+
+	metricSocketWriter, err := udsipc.NewWriter(config.MetricSocketPath)
+	if err != nil {
+		return errors.Trace(err)
+	}
+
+	metricSocketWriter.Start()
+	// No stop deadline; prioritize recording the panic
+	defer metricSocketWriter.Stop(context.Background())
+
+	// Avoid shipping a huge set of panic stacks. The main stack, which is
+	// first, and a selection of other stacks is sufficient.
+	maxLen := 32766
+	if len(output) > maxLen {
+		output = output[:maxLen]
+	}
+
+	psiphondMsg := &pb.Psiphond{
+		Timestamp:    timestamppb.Now(),
+		HostId:       &config.HostID,
+		HostBuildRev: &buildinfo.GetBuildInfo().BuildRev,
+		Provider:     &config.HostProvider,
+		Metric: &pb.Psiphond_ServerPanic{
+			ServerPanic: &pb.ServerPanic{
+				Panic: &output,
+			}},
+	}
+
+	routedMsg, err := server.NewProtobufRoutedMessage(
+		config.LogDestinationPrefix,
+		psiphondMsg)
+	if err != nil {
+		return errors.Trace(err)
+	}
+
+	serialized, err := proto.Marshal(routedMsg)
+	if err != nil {
+		return errors.Trace(err)
+	}
+
+	err = metricSocketWriter.WriteMessage(serialized)
+	if err != nil {
+		return errors.Trace(err)
+	}
+
+	return nil
 }

+ 14 - 0
psiphon/server/config.go

@@ -29,6 +29,7 @@ import (
 	"encoding/pem"
 	"net"
 	"os"
+	"slices"
 	"strconv"
 	"strings"
 	"sync/atomic"
@@ -675,6 +676,19 @@ func LoadConfig(configJSON []byte) (*Config, error) {
 		return nil, errors.Trace(err)
 	}
 
+	if !slices.Contains([]string{"", "json", "protobuf", "both"}, config.LogFormat) {
+		return nil, errors.Tracef("invalid log format: %s", logFormat)
+	}
+
+	if config.LogFormat == "protobuf" || config.LogFormat == "both" {
+		if config.LogDestinationPrefix == "" {
+			return nil, errors.TraceNew("LogDestinationPrefix must be set if protobuf logging is enabled")
+		}
+		if config.MetricSocketPath == "" {
+			return nil, errors.TraceNew("MetricSocketPath must be set if protobuf logging is enabled")
+		}
+	}
+
 	if config.ServerIPAddress == "" {
 		return nil, errors.TraceNew("ServerIPAddress is required")
 	}

+ 0 - 15
psiphon/server/log.go

@@ -26,7 +26,6 @@ import (
 	"io/ioutil"
 	go_log "log"
 	"os"
-	"slices"
 	"sync"
 	"sync/atomic"
 	"time"
@@ -323,11 +322,6 @@ func InitLogging(config *Config) (retErr error) {
 			logFormat = "json"
 		}
 
-		if !slices.Contains([]string{"json", "protobuf", "both"}, logFormat) {
-			retErr = errors.Tracef("invalid log format: %s", logFormat)
-			return
-		}
-
 		shouldLogProtobuf = (logFormat == "protobuf" || logFormat == "both")
 		shouldLogJSON = (logFormat == "json" || logFormat == "both")
 
@@ -382,15 +376,6 @@ func InitLogging(config *Config) (retErr error) {
 		}
 
 		if shouldLogProtobuf {
-			if logDestinationPrefix == "" {
-				retErr = errors.TraceNew("LogDestinationPrefix must be set if protobuf logging is enabled")
-				return
-			}
-
-			if config.MetricSocketPath == "" {
-				retErr = errors.TraceNew("MetricSocketPath must be set if protobuf logging is enabled")
-				return
-			}
 
 			metricSocketWriter, retErr = udsipc.NewWriter(config.MetricSocketPath)
 			if retErr != nil {

+ 3 - 23
psiphon/server/pb/psiphond/server_panic.pb.go

@@ -24,8 +24,6 @@ const (
 type ServerPanic struct {
 	state         protoimpl.MessageState `protogen:"open.v1"`
 	Panic         *string                `protobuf:"bytes,100,opt,name=panic,proto3,oneof" json:"panic,omitempty"`
-	RecoverValue  *string                `protobuf:"bytes,101,opt,name=recover_value,json=recoverValue,proto3,oneof" json:"recover_value,omitempty"`
-	Stack         *string                `protobuf:"bytes,102,opt,name=stack,proto3,oneof" json:"stack,omitempty"`
 	unknownFields protoimpl.UnknownFields
 	sizeCache     protoimpl.SizeCache
 }
@@ -67,32 +65,14 @@ func (x *ServerPanic) GetPanic() string {
 	return ""
 }
 
-func (x *ServerPanic) GetRecoverValue() string {
-	if x != nil && x.RecoverValue != nil {
-		return *x.RecoverValue
-	}
-	return ""
-}
-
-func (x *ServerPanic) GetStack() string {
-	if x != nil && x.Stack != nil {
-		return *x.Stack
-	}
-	return ""
-}
-
 var File_ca_psiphon_psiphond_server_panic_proto protoreflect.FileDescriptor
 
 const file_ca_psiphon_psiphond_server_panic_proto_rawDesc = "" +
 	"\n" +
-	"&ca.psiphon.psiphond/server_panic.proto\x12\x13ca.psiphon.psiphond\"\x93\x01\n" +
+	"&ca.psiphon.psiphond/server_panic.proto\x12\x13ca.psiphon.psiphond\"2\n" +
 	"\vServerPanic\x12\x19\n" +
-	"\x05panic\x18d \x01(\tH\x00R\x05panic\x88\x01\x01\x12(\n" +
-	"\rrecover_value\x18e \x01(\tH\x01R\frecoverValue\x88\x01\x01\x12\x19\n" +
-	"\x05stack\x18f \x01(\tH\x02R\x05stack\x88\x01\x01B\b\n" +
-	"\x06_panicB\x10\n" +
-	"\x0e_recover_valueB\b\n" +
-	"\x06_stackBHZFgithub.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/server/pb/psiphondb\x06proto3"
+	"\x05panic\x18d \x01(\tH\x00R\x05panic\x88\x01\x01B\b\n" +
+	"\x06_panicBHZFgithub.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/server/pb/psiphondb\x06proto3"
 
 var (
 	file_ca_psiphon_psiphond_server_panic_proto_rawDescOnce sync.Once

+ 0 - 2
psiphon/server/proto/ca.psiphon.psiphond/server_panic.proto

@@ -8,6 +8,4 @@ message ServerPanic {
     // Fields 1-99 are reserved for field groupings.
 
     optional string panic = 100;
-    optional string recover_value = 101;
-    optional string stack = 102;
 }

+ 24 - 11
psiphon/server/protobufConverter.go

@@ -7,6 +7,7 @@ import (
 	"strings"
 	"time"
 
+	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/errors"
 	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/inproxy"
 	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/protocol"
 	pb "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/server/pb/psiphond"
@@ -53,7 +54,6 @@ var protobufMessageFieldGroups = map[string]protobufFieldGroupConfig{
 		baseParams: true,
 		dialParams: true,
 	},
-	"panic": {},
 	"tactics": {
 		baseParams: true,
 		dialParams: true,
@@ -63,28 +63,26 @@ var protobufMessageFieldGroups = map[string]protobufFieldGroupConfig{
 	},
 }
 
-// newProtobufRoutedMessage returns a new pointer to a populated Router
-// protobuf message. The error paths in this function should never be
-// reached, but in rare cases where they do, instead of returning an error,
-// we panic, and allow the existing recovery and logging message to capture
-// the error.
-func newProtobufRoutedMessage(msg proto.Message) *pbr.Router {
+// NewProtobufRoutedMessage returns a populated Router protobuf message.
+func NewProtobufRoutedMessage(
+	destinationPrefix string, msg proto.Message) (*pbr.Router, error) {
+
 	md := msg.ProtoReflect().Descriptor()
 	metric := md.Oneofs().ByName("metric")
 	if metric == nil {
-		panic("cannot find oneof field: metric")
+		return nil, errors.TraceNew("cannot find oneof field: metric")
 	}
 
 	messageType := string(md.FullName())
 
 	metricType := msg.ProtoReflect().WhichOneof(metric).TextName()
+
 	destination := strings.ToLower(strings.ReplaceAll(
-		fmt.Sprintf("%s-%s-%s", logDestinationPrefix, md.Name(), metricType), "_", "-",
-	))
+		fmt.Sprintf("%s-%s-%s", destinationPrefix, md.Name(), metricType), "_", "-"))
 
 	serialized, err := proto.Marshal(msg)
 	if err != nil {
-		panic(fmt.Errorf("failed to serialize inner protobuf message to bytes: %w", err))
+		return nil, errors.Trace(err)
 	}
 
 	return &pbr.Router{
@@ -92,7 +90,22 @@ func newProtobufRoutedMessage(msg proto.Message) *pbr.Router {
 		MessageType: &messageType,
 		Key:         []byte(logHostID),
 		Value:       serialized,
+	}, nil
+}
+
+// newProtobufRoutedMessage returns a new pointer to a populated Router
+// protobuf message. The error paths in this function should never be
+// reached, but in rare cases where they do, instead of returning an error,
+// we panic, and allow the existing recovery and logging message to capture
+// the error.
+func newProtobufRoutedMessage(msg proto.Message) *pbr.Router {
+
+	routedMsg, err := NewProtobufRoutedMessage(logDestinationPrefix, msg)
+	if err != nil {
+		panic(err.Error())
 	}
+
+	return routedMsg
 }
 
 // newPsiphondProtobufMessageWrapper returns a new pointer to a Psiphond