Просмотр исходного кода

Merge pull request #495 from mirokuratczyk/log-analysis

Psiphon Tunnel Core server log analysis
Rod Hynes 7 лет назад
Родитель
Сommit
ee1bdc4dc0
4 измененных файлов с 1049 добавлено и 0 удалено
  1. 2 0
      .travis.yml
  2. 684 0
      Server/logging/analysis/analysis.go
  3. 248 0
      Server/logging/analysis/analysis_test.go
  4. 115 0
      Server/logging/main.go

+ 2 - 0
.travis.yml

@@ -28,6 +28,7 @@ script:
 - go test -race -v ./transferstats
 - go test -race -v ./server
 - go test -race -v ./server/psinet
+- go test -race -v ../Server/logging/analysis
 - go test -race -v
 - go test -v -covermode=count -coverprofile=common.coverprofile ./common
 - go test -v -covermode=count -coverprofile=accesscontrol.coverprofile ./common/accesscontrol
@@ -44,6 +45,7 @@ script:
 - go test -v -covermode=count -coverprofile=transferstats.coverprofile ./transferstats
 - go test -v -covermode=count -coverprofile=server.coverprofile ./server
 - go test -v -covermode=count -coverprofile=psinet.coverprofile ./server/psinet
+- go test -v -covermode=count -coverprofile=analysis.coverprofile ../Server/logging/analysis
 - go test -v -covermode=count -coverprofile=psiphon.coverprofile
 - go test -v ./memory_test -run TestReconnectTunnel
 - go test -v ./memory_test -run TestRestartController

+ 684 - 0
Server/logging/analysis/analysis.go

@@ -0,0 +1,684 @@
+/*
+ * Copyright (c) 2018, Psiphon Inc.
+ * All rights reserved.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+// Package analysis implements heuristical frequency analysis of Psiphon Tunnel
+// Core server logs. Log lines are parsed into 3 distinct log types: message,
+// metrics and unknown. Under these log types the number of logs of each unique
+// identifier is counted. The unique identifiers are as follows:
+// message: "msg" field
+// metrics: "event_name" field
+// unknown: key graph
+package analysis
+
+import (
+	"bufio"
+	"encoding/json"
+	"fmt"
+	"log"
+	"os"
+	"reflect"
+	"regexp"
+	"sort"
+
+	"github.com/sirupsen/logrus"
+)
+
+type LogLevel int
+
+const (
+	LOG_LEVEL_UNKNOWN          = -1
+	LOG_LEVEL_DEBUG   LogLevel = iota
+	LOG_LEVEL_INFO
+	LOG_LEVEL_WARNING
+	LOG_LEVEL_ERROR
+)
+
+func (l LogLevel) String() string {
+	switch l {
+	default:
+		return "Unknown"
+	case LOG_LEVEL_UNKNOWN:
+		return "Unknown"
+	case LOG_LEVEL_DEBUG:
+		return "Debug"
+	case LOG_LEVEL_INFO:
+		return "Info"
+	case LOG_LEVEL_WARNING:
+		return "Warning"
+	case LOG_LEVEL_ERROR:
+		return "Error"
+	}
+}
+
+type MetricsLogEventName string
+type MessageLogKey string
+type MessageLogName string
+type MessageLogContext string
+type MessageLogError string
+type LogFields logrus.Fields
+type node map[string]interface{}
+
+// Models for each psiphond log type
+
+type LogModel interface {
+	JsonString() string
+	Print(bool, bool)
+}
+
+type BaseLogModel struct {
+	Example string
+	Node    node
+}
+
+type MessageLogModel struct {
+	BaseLogModel
+	Msg               MessageLogName
+	Level             LogLevel
+	MessageLogContext *MessageLogContext
+	MessageLogError   *MessageLogError
+}
+
+type MetricsLogModel struct {
+	BaseLogModel
+	Event MetricsLogEventName
+}
+
+type UnknownLogModel struct {
+	BaseLogModel
+}
+
+func (a *BaseLogModel) equal(b BaseLogModel) bool {
+	return a.Node.equal(b.Node)
+}
+
+func (a *MessageLogModel) key() MessageLogKey {
+	var errorString string
+	var context string
+
+	if a.MessageLogError != nil {
+		errorString = string(*a.MessageLogError)
+	}
+	if a.MessageLogContext != nil {
+		context = string(*a.MessageLogContext)
+	}
+
+	return MessageLogKey(fmt.Sprintf("(%s,%s, %s,%s)", MessageLogKey(a.Msg), MessageLogKey(a.Level), errorString, context))
+}
+
+func (a *MessageLogContext) equal(b *MessageLogContext) bool {
+	if a != nil && b != nil {
+		return *a == *b
+	} else if a == nil && b == nil {
+		return true
+	}
+	return false
+}
+
+func (a *MessageLogError) equal(b *MessageLogError) bool {
+	if a != nil && b != nil {
+		return *a == *b
+	} else if a == nil && b == nil {
+		return true
+	}
+	return false
+}
+
+func (a *MessageLogModel) equal(b MessageLogModel) bool {
+	if a.Msg != b.Msg {
+		return false
+	} else if a.Level != b.Level {
+		return false
+	}
+
+	return a.MessageLogContext.equal(b.MessageLogContext) && a.MessageLogError.equal(b.MessageLogError)
+}
+
+func (a *MetricsLogModel) equal(b MetricsLogModel) bool {
+	return a.Event == b.Event
+}
+
+func (a *UnknownLogModel) equal(b UnknownLogModel) bool {
+	return a.Node.equal(b.Node)
+}
+
+// equal returns true if both nodes have the same key graphs.
+func (a *node) equal(b node) bool {
+	for k, v := range *a {
+		if val, ok := b[k]; ok {
+			if reflect.TypeOf(v) != reflect.TypeOf(val) {
+				return false
+			}
+			switch m := val.(type) {
+			case nil:
+				return true
+			case node:
+				vNode := v.(node)
+				return vNode.equal(m)
+			case []node:
+				vNode := v.([]node)
+				if len(vNode) != len(m) {
+					return false
+				}
+				for i := range m {
+					if vNode[i].equal(m[i]) != true {
+						return false
+					}
+				}
+			default:
+				log.Fatalf("Unexpected val.(type) of %v\n", reflect.TypeOf(val))
+			}
+		} else {
+			return false
+		}
+	}
+	return true
+}
+
+func (a *BaseLogModel) JsonString() string {
+	b, err := json.Marshal(a.Node)
+	if err != nil {
+		log.Fatal(err)
+	}
+	return string(b)
+}
+
+func (a *BaseLogModel) Print(printStructure, printExample bool) {
+	if printStructure {
+		fmt.Printf("Structure: %s\n", a.JsonString())
+	}
+	if printExample {
+		fmt.Println("ExampleText: ", a.Example)
+	}
+}
+
+func (a *MessageLogModel) Print(printStructure, printExample bool) {
+	fmt.Printf("MessageLog\n")
+	fmt.Printf("MessageLogName: %s\n", a.Msg)
+
+	if a.MessageLogError != nil {
+		fmt.Printf("MessageLogError: %s\n", *a.MessageLogError)
+	}
+	if a.MessageLogContext != nil {
+		fmt.Printf("MessageLogContext: %s\n", *a.MessageLogContext)
+	}
+
+	if printStructure {
+		fmt.Printf("Structure: %s\n", a.JsonString())
+	}
+	if printExample {
+		fmt.Println("ExampleText: ", a.Example)
+	}
+}
+
+func (a *MetricsLogModel) Print(printStructure, printExample bool) {
+	fmt.Printf("MetricsLog\n")
+	fmt.Printf("MetricsLogEventName: %s\n", a.Event)
+	if printStructure {
+		fmt.Printf("Structure: %s\n", a.JsonString())
+	}
+	if printExample {
+		fmt.Println("ExampleText: ", a.Example)
+	}
+}
+
+func (a *UnknownLogModel) Print(printStructure, printExample bool) {
+	fmt.Printf("UnknownLog\n")
+	fmt.Printf("Structure: %s\n", a.JsonString())
+	if printExample {
+		fmt.Println("ExampleText: ", a.Example)
+	}
+}
+
+// Stats for each log model
+
+type LogModelStatsMetrics interface {
+	NumLogs() uint
+}
+
+type LogModelStats struct {
+	Count uint
+}
+
+type MessageLogModelStats struct {
+	LogModelStats
+	MessageLogModel
+}
+
+type MetricsLogModelStats struct {
+	LogModelStats
+	MetricsLogModel
+}
+
+type UnknownLogModelStats struct {
+	LogModelStats
+	UnknownLogModel
+}
+
+func (a MessageLogModelStats) NumLogs() uint {
+	return a.Count
+}
+
+func (a MetricsLogModelStats) NumLogs() uint {
+	return a.Count
+}
+
+func (a UnknownLogModelStats) NumLogs() uint {
+	return a.Count
+}
+
+func (a *MessageLogModelStats) Print(printStructure, printExample bool) {
+	a.MessageLogModel.Print(printStructure, printExample)
+}
+
+func (a *MetricsLogModelStats) Print(printStructure, printExample bool) {
+	a.MetricsLogModel.Print(printStructure, printExample)
+}
+
+func (a *UnknownLogModelStats) Print(printExample bool) {
+	a.UnknownLogModel.Print(true, printExample)
+}
+
+func safeDivide(a, b float64) float64 {
+	if b != 0 {
+		return a / b
+	}
+	return 0
+}
+
+func (a *MessageLogModelStats) PrintWithRelativePercent(count uint, printStructure, printExample bool) {
+	a.Print(printStructure, printExample)
+	fmt.Printf("Count: %d of %d\n", a.Count, count)
+	fmt.Printf("Percent: %0.2f\n", safeDivide(float64(a.Count), float64(count)))
+	fmt.Printf("\n")
+}
+
+func (a *MetricsLogModelStats) PrintWithRelativePercent(count uint, printStructure, printExample bool) {
+	a.Print(printStructure, printExample)
+	fmt.Printf("Count: %d of %d\n", a.Count, count)
+	fmt.Printf("Percent: %0.2f\n", safeDivide(float64(a.Count), float64(count)))
+	fmt.Printf("\n")
+}
+
+func (a *UnknownLogModelStats) PrintWithRelativePercent(count uint, printExample bool) {
+	a.Print(printExample)
+	fmt.Printf("Count: %d of %d\n", a.Count, count)
+	fmt.Printf("Percent: %0.2f\n", safeDivide(float64(a.Count), float64(count)))
+	fmt.Printf("\n")
+}
+
+// Log type stats
+// Aggregate log models by log type
+
+type LogTypeStats struct {
+	Count uint
+}
+
+type MessageLogStats struct {
+	LogTypeStats
+	modelStats map[MessageLogKey]*MessageLogModelStats
+}
+
+type MetricsLogStats struct {
+	LogTypeStats
+	modelStats map[MetricsLogEventName]*MetricsLogModelStats
+}
+
+type UnknownLogStats struct {
+	LogTypeStats
+	modelStats []UnknownLogModelStats
+}
+
+func (a *MessageLogStats) Print() {
+	for _, v := range a.Sort() {
+		v.PrintWithRelativePercent(a.Count, false, false)
+	}
+}
+
+func (a *MetricsLogStats) Print() {
+	for _, v := range a.Sort() {
+		v.PrintWithRelativePercent(a.Count, false, false)
+	}
+}
+
+func (a *UnknownLogStats) Print() {
+	for _, v := range a.Sort() {
+		v.PrintWithRelativePercent(a.Count, true)
+	}
+}
+
+func (a *MessageLogStats) Sort() []MessageLogModelStats {
+	var s []MessageLogModelStats
+	for _, v := range a.modelStats {
+		if v != nil {
+			s = append(s, *v)
+		}
+	}
+
+	sort.Slice(s, func(i, j int) bool {
+		return s[j].Count > s[i].Count
+	})
+
+	return s
+}
+
+func (a *MetricsLogStats) Sort() []MetricsLogModelStats {
+	var s []MetricsLogModelStats
+	for _, v := range a.modelStats {
+		if v != nil {
+			s = append(s, *v)
+		}
+	}
+
+	sort.Slice(s, func(i, j int) bool {
+		return s[j].Count > s[i].Count
+	})
+	return s
+}
+
+func (a *UnknownLogStats) Sort() []UnknownLogModelStats {
+	var s []UnknownLogModelStats
+	for _, v := range a.modelStats {
+		s = append(s, v)
+	}
+
+	sort.Slice(s, func(i, j int) bool {
+		return s[j].Count > s[i].Count
+	})
+	return s
+}
+
+// Log file stats
+
+type LogStats struct {
+	MessageLogModels MessageLogStats
+	MetricsLogModels MetricsLogStats
+	UnknownLogModels UnknownLogStats
+}
+
+// NewLogStats initializes a new LogStats structure.
+func NewLogStats() (l *LogStats) {
+	l = &LogStats{
+		MessageLogModels: MessageLogStats{
+			modelStats: make(map[MessageLogKey]*MessageLogModelStats),
+		},
+		MetricsLogModels: MetricsLogStats{
+			modelStats: make(map[MetricsLogEventName]*MetricsLogModelStats),
+		},
+		UnknownLogModels: UnknownLogStats{
+			modelStats: nil,
+		},
+	}
+
+	return l
+}
+
+func NewLogStatsFromFiles(files []string) (l *LogStats, err error) {
+	l = NewLogStats()
+
+	for _, file := range files {
+		err = l.ParseFile(file)
+		if err != nil {
+			return nil, err
+		}
+	}
+
+	return l, nil
+}
+
+// ParseFile takes a psiphond log file as input, parses the log lines into log
+// models and updates the LogStats structure.
+func (l *LogStats) ParseFile(filename string) error {
+	file, err := os.Open(filename)
+	if err != nil {
+		return err
+	}
+	defer file.Close()
+
+	scanner := bufio.NewScanner(file)
+	for scanner.Scan() {
+		err := l.ParseLogLine(scanner.Text())
+		if err != nil {
+			return err
+		}
+	}
+
+	if err := scanner.Err(); err != nil {
+		return err
+	}
+
+	return nil
+}
+
+// ParseLogLine attempts to parse a log line into a log model and then updates the
+// LogStats structure.
+func (l *LogStats) ParseLogLine(log string) error {
+	MessageLogModels := &l.MessageLogModels
+	MetricsLogModels := &l.MetricsLogModels
+
+	logModel, err := parseLogModel(log)
+	if err != nil {
+		return err
+	}
+
+	switch v := logModel.(type) {
+	case *MessageLogModel:
+		MessageLogModels.Count += 1
+
+		if m, ok := MessageLogModels.modelStats[v.key()]; ok {
+			m.Count += 1
+		} else {
+			MessageLogModels.modelStats[v.key()] = &MessageLogModelStats{LogModelStats{1}, *v}
+		}
+	case *MetricsLogModel:
+		l.MetricsLogModels.Count += 1
+		if m, ok := l.MetricsLogModels.modelStats[v.Event]; ok {
+			m.Count += 1
+		} else {
+			MetricsLogModels.modelStats[v.Event] = &MetricsLogModelStats{LogModelStats{1}, *v}
+		}
+	case *UnknownLogModel:
+		l.UnknownLogModels.Count += 1
+		found := false
+		for i := range l.UnknownLogModels.modelStats {
+			if l.UnknownLogModels.modelStats[i].UnknownLogModel.equal(*v) {
+				l.UnknownLogModels.modelStats[i].Count += 1
+				found = true
+				break
+			}
+		}
+		if !found {
+			l.UnknownLogModels.modelStats = append(l.UnknownLogModels.modelStats, UnknownLogModelStats{LogModelStats{1}, *v})
+		}
+	default:
+		return fmt.Errorf("Unexpected model type of %v\n", reflect.TypeOf(v))
+	}
+
+	return nil
+}
+
+func redactIpAddressesAndPorts(a string) string {
+	ipAddressWithOptionalPort := regexp.MustCompile(`(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}(:(6553[0-5]|655[0-2][0-9]\d|65[0-4](\d){2}|6[0-4](\d){3}|[1-5](\d){4}|[1-9](\d){0,3}))?`)
+	return ipAddressWithOptionalPort.ReplaceAllString(a, "<redacted>")
+}
+
+// parseLogModel attempts to parse a string into a log model. It is expected
+// that the provided string is valid JSON.
+func parseLogModel(s string) (LogModel, error) {
+	var m LogFields
+	err := json.Unmarshal([]byte(s), &m)
+	if err != nil {
+		return nil, fmt.Errorf("Failed to parse log line into JSON: %s", err)
+	}
+
+	var l LogModel
+	var b BaseLogModel
+	b.Example = s
+	b.Node = parseNode(&m)
+
+	if m["event_name"] != nil {
+		l = &MetricsLogModel{
+			BaseLogModel: b,
+			Event:        MetricsLogEventName(m["event_name"].(string)),
+		}
+	} else if m["msg"] != nil && m["level"] != nil {
+		var level LogLevel
+		switch m["level"].(string) {
+		case "debug":
+			level = LOG_LEVEL_DEBUG
+		case "info":
+			level = LOG_LEVEL_INFO
+		case "warning":
+			level = LOG_LEVEL_WARNING
+		case "error":
+			level = LOG_LEVEL_ERROR
+		default:
+			return nil, fmt.Errorf("Unexpected log level: %s\n", m["level"].(string))
+		}
+
+		var context *MessageLogContext
+		var err *MessageLogError
+
+		if val, ok := m["context"]; ok {
+			c := MessageLogContext(val.(string))
+			context = &c
+		}
+
+		if val, ok := m["error"]; ok {
+			errorWithIpsRedacted := redactIpAddressesAndPorts(val.(string))
+			e := MessageLogError(errorWithIpsRedacted)
+			err = &e
+		}
+
+		l = &MessageLogModel{
+			BaseLogModel:      b,
+			Msg:               MessageLogName(m["msg"].(string)),
+			Level:             level,
+			MessageLogContext: context,
+			MessageLogError:   err,
+		}
+	} else {
+		l = &UnknownLogModel{
+			BaseLogModel: b,
+		}
+	}
+
+	return l, nil
+}
+
+// parseNode takes a map and transforms it into a graph which represents its
+// structure.
+func parseNode(m *LogFields) node {
+	n := make(node)
+	for k, v := range *m {
+		i := parseInterface(v)
+		n[k] = i
+	}
+	return n
+}
+
+// parseInterface takes an interface and transforms it into a graph of its
+// structure.
+func parseInterface(i interface{}) interface{} {
+	switch v := i.(type) {
+	default:
+		return nil
+	case map[string]interface{}:
+		l := LogFields(v)
+		return parseNode(&l)
+	case []interface{}:
+		n := make([]node, 1)
+		for i := range v {
+			switch p := parseInterface(v[i]).(type) {
+			case node:
+				n = append(n, p)
+			}
+		}
+		return n
+	}
+}
+
+// sortLogModelsDescending merges all log models of different types and then
+// sorts them in ascending order by the number times each occurs. Returns the
+// sorted list and the total number of logs represented by each log model in
+// the list.
+func (l *LogStats) SortLogModels(messages, metrics, unknown bool) (models []interface{}, numLogs uint) {
+	var messagesSort []MessageLogModelStats
+	var metricsSort []MetricsLogModelStats
+	var unknownSort []UnknownLogModelStats
+
+	if messages {
+		messagesSort = l.MessageLogModels.Sort()
+		messages := make([]interface{}, len(messagesSort))
+		for i, v := range messagesSort {
+			messages[i] = v
+		}
+		models = append(models, messages...)
+		numLogs += l.MessageLogModels.Count
+	}
+
+	if metrics {
+		metricsSort = l.MetricsLogModels.Sort()
+		metrics := make([]interface{}, len(metricsSort))
+		for i, v := range metricsSort {
+			metrics[i] = v
+		}
+		models = append(models, metrics...)
+		numLogs += l.MetricsLogModels.Count
+	}
+
+	if unknown {
+		unknownSort = l.UnknownLogModels.Sort()
+		unknown := make([]interface{}, len(unknownSort))
+		for i, v := range unknownSort {
+			unknown[i] = v
+		}
+		models = append(models, unknown...)
+		numLogs += l.UnknownLogModels.Count
+	}
+
+	sort.Slice(models, func(i, j int) bool {
+		a := models[i].(LogModelStatsMetrics)
+		b := models[j].(LogModelStatsMetrics)
+
+		return b.NumLogs() > a.NumLogs()
+	})
+
+	return models, numLogs
+}
+
+// NumDistinctLogs returns the number of unique log models contained within the
+// LogStats structure.
+func (l *LogStats) NumDistinctLogs() uint {
+	return uint(len(l.MessageLogModels.modelStats) + len(l.MetricsLogModels.modelStats) + len(l.UnknownLogModels.modelStats))
+}
+
+func (l *LogStats) Print(messages, metrics, unknown, printStructure, printExample bool) {
+	logs, numLogs := l.SortLogModels(messages, metrics, unknown)
+
+	for _, x := range logs {
+		switch v := x.(type) {
+		case MessageLogModelStats:
+			v.PrintWithRelativePercent(numLogs, printStructure, printExample)
+		case MetricsLogModelStats:
+			v.PrintWithRelativePercent(numLogs, printStructure, printExample)
+		case UnknownLogModelStats:
+			v.PrintWithRelativePercent(numLogs, printExample)
+		}
+	}
+}

+ 248 - 0
Server/logging/analysis/analysis_test.go

@@ -0,0 +1,248 @@
+/*
+ * Copyright (c) 2018, Psiphon Inc.
+ * All rights reserved.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+package analysis
+
+import (
+	"reflect"
+	"testing"
+)
+
+func TestAllLogModelsAndSorting(t *testing.T) {
+	l := parseLogsAndTestExpectations(logLinesWithExpectations(), t)
+
+	logs, _ := l.SortLogModels(true, true, true)
+	var prevLogCount uint
+
+	for _, x := range logs {
+		var count uint
+
+		switch v := x.(type) {
+		case MessageLogModelStats:
+			count = v.Count
+		case MetricsLogModelStats:
+			count = v.Count
+		case UnknownLogModelStats:
+			count = v.Count
+		default:
+			t.Errorf("Encountered unexpected struct of type %v\n", reflect.TypeOf(v))
+		}
+
+		if prevLogCount != 0 && prevLogCount > count {
+			t.Errorf("Expected list to be sorted in ascending order")
+		}
+		prevLogCount = count
+	}
+}
+
+func TestMessageLogsWithErrorAndContext(t *testing.T) {
+	logs := []LogLineWithExpectation{
+		// The following messages should parse into 4 distinct log models
+		messageLogExpectation(`{"msg":"a", "level":"info"}`),
+		messageLogExpectation(`{"msg":"a", "level":"info"}`),
+		messageLogExpectation(`{"msg":"a", "level":"info", "error": "b"}`),
+		messageLogExpectation(`{"msg":"a", "level":"info", "error": "b"}`),
+		messageLogExpectation(`{"msg":"a", "level":"info", "context": "c"}`),
+		messageLogExpectation(`{"msg":"a", "level":"info", "context": "c"}`),
+		messageLogExpectation(`{"msg":"a", "level":"info", "error": "b", "context": "c"}`),
+		messageLogExpectation(`{"msg":"a", "level":"info", "error": "b", "context": "c"}`),
+
+		// The following messages should parse into 2 distinct log models
+		messageLogExpectation(`{"msg":"b", "level":"info", "error": "b"}`),
+		messageLogExpectation(`{"msg":"b", "level":"info", "context": "b"}`),
+
+		// The following messages should parse into 2 distinct log models
+		messageLogExpectation(`{"msg":"c", "level":"info"}`),
+		messageLogExpectation(`{"msg":"c", "level":"warning"}`),
+	}
+
+	l := parseLogsAndTestExpectations(logs, t)
+
+	numLogModels := len(l.MessageLogModels.modelStats)
+	expectedUniqueModels := 8
+	if numLogModels != expectedUniqueModels {
+		t.Errorf("Expected %d message log models but found %d\n", expectedUniqueModels, numLogModels)
+	}
+}
+
+func TestMessageLogsWithRedactedIpAddresses(t *testing.T) {
+	logs := []LogLineWithExpectation{
+		// The following messages should parse into 1 distinct log model
+		messageLogExpectation(`{"msg":"a", "level":"info", "error": "1.1.1.1->2.2.2.2"}`),
+		messageLogExpectation(`{"msg":"a", "level":"info", "error": "3.3.3.3->4.4.4.4"}`),
+		messageLogExpectation(`{"msg":"a", "level":"info", "error": "1.1.1.1->2.2.2.2:1"}`),
+		messageLogExpectation(`{"msg":"a", "level":"info", "error": "1.1.1.1->2.2.2.2:65535"}`),
+	}
+
+	l := parseLogsAndTestExpectations(logs, t)
+
+	numLogModels := len(l.MessageLogModels.modelStats)
+	expectedUniqueModels := 1
+	if numLogModels != expectedUniqueModels {
+		t.Errorf("Expected %d message log models but found %d\n", expectedUniqueModels, numLogModels)
+	}
+}
+
+// Helpers
+
+type LogLineWithExpectation struct {
+	log     string
+	expects parseLineExpectation
+}
+
+type parseLineExpectation struct {
+	error   bool
+	message bool
+	metrics bool
+	unknown bool
+}
+
+func parseLogsAndTestExpectations(expectations []LogLineWithExpectation, t *testing.T) (l *LogStats) {
+	l = NewLogStats()
+
+	for _, expectation := range expectations {
+		snapshot := *l
+
+		err := l.ParseLogLine(expectation.log)
+
+		// Check that the expectation is valid
+		if !(expectation.expects.error || expectation.expects.message ||
+			expectation.expects.metrics || expectation.expects.unknown) {
+			t.Errorf("Malformed expectation expects nothing")
+			t.FailNow()
+		}
+
+		// Check error expectation
+		if err != nil {
+			if expectation.expects.error != true {
+				t.Errorf("Unexpected error: < %s >, from log line: \"%s\"\n", err, expectation.log)
+			}
+		}
+
+		// Check message expectation
+		if expectation.expects.message {
+			if l.MessageLogModels.Count != snapshot.MessageLogModels.Count+1 {
+				t.Errorf("Expected message log from: \"%s\"\n", expectation.log)
+			}
+		}
+
+		// Check metric expectation
+		if expectation.expects.metrics {
+			if l.MetricsLogModels.Count != snapshot.MetricsLogModels.Count+1 {
+				t.Errorf("Expected metric log model from: \"%s\"\n", expectation.log)
+			}
+		}
+
+		// Check unknown expectation
+		if expectation.expects.unknown {
+			if l.UnknownLogModels.Count != snapshot.UnknownLogModels.Count+1 {
+				t.Errorf("Expected unknown log model from: \"%s\"\n", expectation.log)
+			}
+		}
+	}
+
+	return l
+}
+
+func logLinesWithExpectations() (l []LogLineWithExpectation) {
+	l = []LogLineWithExpectation{
+
+		// ************
+		// Message logs
+		// ************
+
+		// Test collision of basic message logs
+		messageLogExpectation(`{"msg":"a", "level":"info"}`),
+		messageLogExpectation(`{"msg":"a", "level":"info"}`),
+
+		// Different valid levels
+		messageLogExpectation(`{"msg":"a", "level":"debug"}`),
+		messageLogExpectation(`{"msg":"a", "level":"warning"}`),
+		messageLogExpectation(`{"msg":"a", "level":"error"}`),
+
+		// ************
+		// Metrics logs
+		// ************
+
+		// Test collision of basic metrics logs
+		metricsLogExpectation(`{"event_name":"a"}`),
+		metricsLogExpectation(`{"event_name":"a"}`),
+
+		// ************
+		// Unknown logs
+		// ************
+
+		unknownLogExpectation(`{}`),
+		unknownLogExpectation(`{"a":"b"}`),
+
+		// Test collision of unknown logs with depth
+		unknownLogExpectation(`{"a":{"b":[{"c":{}}]}}`),
+		unknownLogExpectation(`{"a":{"b":[{"c":{}}]}}`),
+
+		// Message log line missing level field
+		unknownLogExpectation(`{"msg":"a"}`),
+
+		// **************
+		// Malformed logs
+		// **************
+
+		malformedLogExpectation(`{`),
+		// Invalid message log levels
+		malformedLogExpectation(`{"msg":"a", "level":"{"}`),
+		malformedLogExpectation(`{"msg":"a", "level":"unknown"}`),
+	}
+
+	return l
+}
+
+func messageLogExpectation(log string) LogLineWithExpectation {
+	return LogLineWithExpectation{
+		log: log,
+		expects: parseLineExpectation{
+			message: true,
+		},
+	}
+}
+
+func metricsLogExpectation(log string) LogLineWithExpectation {
+	return LogLineWithExpectation{
+		log: log,
+		expects: parseLineExpectation{
+			metrics: true,
+		},
+	}
+}
+
+func unknownLogExpectation(log string) LogLineWithExpectation {
+	return LogLineWithExpectation{
+		log: log,
+		expects: parseLineExpectation{
+			unknown: true,
+		},
+	}
+}
+
+func malformedLogExpectation(log string) LogLineWithExpectation {
+	return LogLineWithExpectation{
+		log: log,
+		expects: parseLineExpectation{
+			error: true,
+		},
+	}
+}

+ 115 - 0
Server/logging/main.go

@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2018, Psiphon Inc.
+ * All rights reserved.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+package main
+
+import (
+	"flag"
+	"fmt"
+	"os"
+	"strings"
+
+	"github.com/Psiphon-Labs/psiphon-tunnel-core/Server/logging/analysis"
+)
+
+type stringListFlag []string
+
+func (list *stringListFlag) String() string {
+	return strings.Join(*list, ", ")
+}
+
+func (list *stringListFlag) Set(flagValue string) error {
+	*list = append(*list, flagValue)
+	return nil
+}
+
+func main() {
+
+	var logFileList stringListFlag
+	var printMessages bool
+	var printMetrics bool
+	var printUnknowns bool
+	var printStructure bool
+	var printExample bool
+
+	flag.Var(
+		&logFileList,
+		"file",
+		"file to analyze; flag may be repeated to analyze multiple files")
+
+	flag.BoolVar(
+		&printMessages,
+		"messages",
+		false,
+		"display message type logs")
+
+	flag.BoolVar(
+		&printMetrics,
+		"metrics",
+		false,
+		"display metric type logs")
+
+	flag.BoolVar(
+		&printUnknowns,
+		"unknown",
+		false,
+		"display logs of an unknown type")
+
+	flag.BoolVar(
+		&printStructure,
+		"structure",
+		false,
+		"print each log model with its key graph structure")
+
+	flag.BoolVar(
+		&printExample,
+		"example",
+		false,
+		"print each log model with an example")
+
+	flag.Usage = func() {
+		fmt.Fprintf(os.Stderr,
+			"Usage:\n\n"+
+				"%s <flags>\n"+
+				os.Args[0], os.Args[0]+"\n\n")
+		fmt.Printf("\n")
+		flag.PrintDefaults()
+	}
+
+	flag.Parse()
+
+	if len(logFileList) < 1 {
+		flag.Usage()
+		os.Exit(1)
+	}
+
+	logFileStats, err := analysis.NewLogStatsFromFiles(logFileList)
+	if err != nil {
+		fmt.Printf("Error while parsing log files: %s\n", err)
+		os.Exit(1)
+	}
+
+	logFileStats.Print(printMessages, printMetrics, printUnknowns, printStructure, printExample)
+
+	fmt.Printf("Found %d messages, %d metrics and %d unknown logs with a total of %d distinct types of logs\n",
+		logFileStats.MessageLogModels.Count,
+		logFileStats.MetricsLogModels.Count,
+		logFileStats.UnknownLogModels.Count,
+		logFileStats.NumDistinctLogs())
+}