| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682 |
- /*
- * Copyright (c) 2018, Psiphon Inc.
- * All rights reserved.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- */
- // Package analysis implements heuristical frequency analysis of Psiphon Tunnel
- // Core server logs. Log lines are parsed into 3 distinct log types: message,
- // metrics and unknown. Under these log types the number of logs of each unique
- // identifier is counted. The unique identifiers are as follows:
- // message: "msg" field
- // metrics: "event_name" field
- // unknown: key graph
- package analysis
- import (
- "bufio"
- "encoding/json"
- "fmt"
- "log"
- "os"
- "reflect"
- "regexp"
- "sort"
- "github.com/sirupsen/logrus"
- )
- type LogLevel int
- const (
- LOG_LEVEL_UNKNOWN = -1
- LOG_LEVEL_DEBUG LogLevel = iota
- LOG_LEVEL_INFO
- LOG_LEVEL_WARNING
- LOG_LEVEL_ERROR
- )
- func (l LogLevel) String() string {
- switch l {
- default:
- return "Unknown"
- case LOG_LEVEL_UNKNOWN:
- return "Unknown"
- case LOG_LEVEL_DEBUG:
- return "Debug"
- case LOG_LEVEL_INFO:
- return "Info"
- case LOG_LEVEL_WARNING:
- return "Warning"
- case LOG_LEVEL_ERROR:
- return "Error"
- }
- }
- type MetricsLogEventName string
- type MessageLogKey string
- type MessageLogName string
- type MessageLogContext string
- type MessageLogError string
- type LogFields logrus.Fields
- type node map[string]interface{}
- // Models for each psiphond log type
- type LogModel interface {
- JsonString() string
- Print(bool, bool)
- }
- type BaseLogModel struct {
- Example string
- Node node
- }
- type MessageLogModel struct {
- BaseLogModel
- Msg MessageLogName
- Level LogLevel
- MessageLogContext *MessageLogContext
- MessageLogError *MessageLogError
- }
- type MetricsLogModel struct {
- BaseLogModel
- Event MetricsLogEventName
- }
- type UnknownLogModel struct {
- BaseLogModel
- }
- func (a *BaseLogModel) equal(b BaseLogModel) bool {
- return a.Node.equal(b.Node)
- }
- func (a *MessageLogModel) key() MessageLogKey {
- var errorString string
- var context string
- if a.MessageLogError != nil {
- errorString = string(*a.MessageLogError)
- }
- if a.MessageLogContext != nil {
- context = string(*a.MessageLogContext)
- }
- return MessageLogKey(fmt.Sprintf("(%s,%d, %s,%s)", a.Msg, a.Level, errorString, context))
- }
- func (a *MessageLogContext) equal(b *MessageLogContext) bool {
- if a != nil && b != nil {
- return *a == *b
- } else if a == nil && b == nil {
- return true
- }
- return false
- }
- func (a *MessageLogError) equal(b *MessageLogError) bool {
- if a != nil && b != nil {
- return *a == *b
- } else if a == nil && b == nil {
- return true
- }
- return false
- }
- func (a *MessageLogModel) equal(b MessageLogModel) bool {
- if a.Msg != b.Msg {
- return false
- } else if a.Level != b.Level {
- return false
- }
- return a.MessageLogContext.equal(b.MessageLogContext) && a.MessageLogError.equal(b.MessageLogError)
- }
- func (a *MetricsLogModel) equal(b MetricsLogModel) bool {
- return a.Event == b.Event
- }
- func (a *UnknownLogModel) equal(b UnknownLogModel) bool {
- return a.Node.equal(b.Node)
- }
- // equal returns true if both nodes have the same key graphs.
- func (a *node) equal(b node) bool {
- for k, v := range *a {
- if val, ok := b[k]; ok {
- if reflect.TypeOf(v) != reflect.TypeOf(val) {
- return false
- }
- switch m := val.(type) {
- case nil:
- return true
- case node:
- vNode := v.(node)
- return vNode.equal(m)
- case []node:
- vNode := v.([]node)
- if len(vNode) != len(m) {
- return false
- }
- for i := range m {
- if !vNode[i].equal(m[i]) {
- return false
- }
- }
- default:
- log.Fatalf("Unexpected val.(type) of %v\n", reflect.TypeOf(val))
- }
- } else {
- return false
- }
- }
- return true
- }
- func (a *BaseLogModel) JsonString() string {
- b, err := json.Marshal(a.Node)
- if err != nil {
- log.Fatal(err)
- }
- return string(b)
- }
- func (a *BaseLogModel) Print(printStructure, printExample bool) {
- if printStructure {
- fmt.Printf("Structure: %s\n", a.JsonString())
- }
- if printExample {
- fmt.Println("ExampleText: ", a.Example)
- }
- }
- func (a *MessageLogModel) Print(printStructure, printExample bool) {
- fmt.Printf("MessageLog\n")
- fmt.Printf("MessageLogName: %s\n", a.Msg)
- if a.MessageLogError != nil {
- fmt.Printf("MessageLogError: %s\n", *a.MessageLogError)
- }
- if a.MessageLogContext != nil {
- fmt.Printf("MessageLogContext: %s\n", *a.MessageLogContext)
- }
- if printStructure {
- fmt.Printf("Structure: %s\n", a.JsonString())
- }
- if printExample {
- fmt.Println("ExampleText: ", a.Example)
- }
- }
- func (a *MetricsLogModel) Print(printStructure, printExample bool) {
- fmt.Printf("MetricsLog\n")
- fmt.Printf("MetricsLogEventName: %s\n", a.Event)
- if printStructure {
- fmt.Printf("Structure: %s\n", a.JsonString())
- }
- if printExample {
- fmt.Println("ExampleText: ", a.Example)
- }
- }
- func (a *UnknownLogModel) Print(printStructure, printExample bool) {
- fmt.Printf("UnknownLog\n")
- fmt.Printf("Structure: %s\n", a.JsonString())
- if printExample {
- fmt.Println("ExampleText: ", a.Example)
- }
- }
- // Stats for each log model
- type LogModelStatsMetrics interface {
- NumLogs() uint
- }
- type LogModelStats struct {
- Count uint
- }
- type MessageLogModelStats struct {
- LogModelStats
- MessageLogModel
- }
- type MetricsLogModelStats struct {
- LogModelStats
- MetricsLogModel
- }
- type UnknownLogModelStats struct {
- LogModelStats
- UnknownLogModel
- }
- func (a MessageLogModelStats) NumLogs() uint {
- return a.Count
- }
- func (a MetricsLogModelStats) NumLogs() uint {
- return a.Count
- }
- func (a UnknownLogModelStats) NumLogs() uint {
- return a.Count
- }
- func (a *MessageLogModelStats) Print(printStructure, printExample bool) {
- a.MessageLogModel.Print(printStructure, printExample)
- }
- func (a *MetricsLogModelStats) Print(printStructure, printExample bool) {
- a.MetricsLogModel.Print(printStructure, printExample)
- }
- func (a *UnknownLogModelStats) Print(printExample bool) {
- a.UnknownLogModel.Print(true, printExample)
- }
- func safeDivide(a, b float64) float64 {
- if b != 0 {
- return a / b
- }
- return 0
- }
- func (a *MessageLogModelStats) PrintWithRelativePercent(count uint, printStructure, printExample bool) {
- a.Print(printStructure, printExample)
- fmt.Printf("Count: %d of %d\n", a.Count, count)
- fmt.Printf("Percent: %0.2f\n", safeDivide(float64(a.Count), float64(count)))
- fmt.Printf("\n")
- }
- func (a *MetricsLogModelStats) PrintWithRelativePercent(count uint, printStructure, printExample bool) {
- a.Print(printStructure, printExample)
- fmt.Printf("Count: %d of %d\n", a.Count, count)
- fmt.Printf("Percent: %0.2f\n", safeDivide(float64(a.Count), float64(count)))
- fmt.Printf("\n")
- }
- func (a *UnknownLogModelStats) PrintWithRelativePercent(count uint, printExample bool) {
- a.Print(printExample)
- fmt.Printf("Count: %d of %d\n", a.Count, count)
- fmt.Printf("Percent: %0.2f\n", safeDivide(float64(a.Count), float64(count)))
- fmt.Printf("\n")
- }
- // Log type stats
- // Aggregate log models by log type
- type LogTypeStats struct {
- Count uint
- }
- type MessageLogStats struct {
- LogTypeStats
- modelStats map[MessageLogKey]*MessageLogModelStats
- }
- type MetricsLogStats struct {
- LogTypeStats
- modelStats map[MetricsLogEventName]*MetricsLogModelStats
- }
- type UnknownLogStats struct {
- LogTypeStats
- modelStats []UnknownLogModelStats
- }
- func (a *MessageLogStats) Print() {
- for _, v := range a.Sort() {
- v.PrintWithRelativePercent(a.Count, false, false)
- }
- }
- func (a *MetricsLogStats) Print() {
- for _, v := range a.Sort() {
- v.PrintWithRelativePercent(a.Count, false, false)
- }
- }
- func (a *UnknownLogStats) Print() {
- for _, v := range a.Sort() {
- v.PrintWithRelativePercent(a.Count, true)
- }
- }
- func (a *MessageLogStats) Sort() []MessageLogModelStats {
- var s []MessageLogModelStats
- for _, v := range a.modelStats {
- if v != nil {
- s = append(s, *v)
- }
- }
- sort.Slice(s, func(i, j int) bool {
- return s[j].Count > s[i].Count
- })
- return s
- }
- func (a *MetricsLogStats) Sort() []MetricsLogModelStats {
- var s []MetricsLogModelStats
- for _, v := range a.modelStats {
- if v != nil {
- s = append(s, *v)
- }
- }
- sort.Slice(s, func(i, j int) bool {
- return s[j].Count > s[i].Count
- })
- return s
- }
- func (a *UnknownLogStats) Sort() []UnknownLogModelStats {
- var s []UnknownLogModelStats
- s = append(s, a.modelStats...)
- sort.Slice(s, func(i, j int) bool {
- return s[j].Count > s[i].Count
- })
- return s
- }
- // Log file stats
- type LogStats struct {
- MessageLogModels MessageLogStats
- MetricsLogModels MetricsLogStats
- UnknownLogModels UnknownLogStats
- }
- // NewLogStats initializes a new LogStats structure.
- func NewLogStats() (l *LogStats) {
- l = &LogStats{
- MessageLogModels: MessageLogStats{
- modelStats: make(map[MessageLogKey]*MessageLogModelStats),
- },
- MetricsLogModels: MetricsLogStats{
- modelStats: make(map[MetricsLogEventName]*MetricsLogModelStats),
- },
- UnknownLogModels: UnknownLogStats{
- modelStats: nil,
- },
- }
- return l
- }
- func NewLogStatsFromFiles(files []string) (l *LogStats, err error) {
- l = NewLogStats()
- for _, file := range files {
- err = l.ParseFile(file)
- if err != nil {
- return nil, err
- }
- }
- return l, nil
- }
- // ParseFile takes a psiphond log file as input, parses the log lines into log
- // models and updates the LogStats structure.
- func (l *LogStats) ParseFile(filename string) error {
- file, err := os.Open(filename)
- if err != nil {
- return err
- }
- defer file.Close()
- scanner := bufio.NewScanner(file)
- for scanner.Scan() {
- err := l.ParseLogLine(scanner.Text())
- if err != nil {
- return err
- }
- }
- if err := scanner.Err(); err != nil {
- return err
- }
- return nil
- }
- // ParseLogLine attempts to parse a log line into a log model and then updates the
- // LogStats structure.
- func (l *LogStats) ParseLogLine(log string) error {
- MessageLogModels := &l.MessageLogModels
- MetricsLogModels := &l.MetricsLogModels
- logModel, err := parseLogModel(log)
- if err != nil {
- return err
- }
- switch v := logModel.(type) {
- case *MessageLogModel:
- MessageLogModels.Count += 1
- if m, ok := MessageLogModels.modelStats[v.key()]; ok {
- m.Count += 1
- } else {
- MessageLogModels.modelStats[v.key()] = &MessageLogModelStats{LogModelStats{1}, *v}
- }
- case *MetricsLogModel:
- l.MetricsLogModels.Count += 1
- if m, ok := l.MetricsLogModels.modelStats[v.Event]; ok {
- m.Count += 1
- } else {
- MetricsLogModels.modelStats[v.Event] = &MetricsLogModelStats{LogModelStats{1}, *v}
- }
- case *UnknownLogModel:
- l.UnknownLogModels.Count += 1
- found := false
- for i := range l.UnknownLogModels.modelStats {
- if l.UnknownLogModels.modelStats[i].UnknownLogModel.equal(*v) {
- l.UnknownLogModels.modelStats[i].Count += 1
- found = true
- break
- }
- }
- if !found {
- l.UnknownLogModels.modelStats = append(l.UnknownLogModels.modelStats, UnknownLogModelStats{LogModelStats{1}, *v})
- }
- default:
- return fmt.Errorf("unexpected model type of %v", reflect.TypeOf(v))
- }
- return nil
- }
- func redactIpAddressesAndPorts(a string) string {
- ipAddressWithOptionalPort := regexp.MustCompile(`(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}(:(6553[0-5]|655[0-2][0-9]\d|65[0-4](\d){2}|6[0-4](\d){3}|[1-5](\d){4}|[1-9](\d){0,3}))?`)
- return ipAddressWithOptionalPort.ReplaceAllString(a, "<redacted>")
- }
- // parseLogModel attempts to parse a string into a log model. It is expected
- // that the provided string is valid JSON.
- func parseLogModel(s string) (LogModel, error) {
- var m LogFields
- err := json.Unmarshal([]byte(s), &m)
- if err != nil {
- return nil, fmt.Errorf("failed to parse log line into JSON: %s", err)
- }
- var l LogModel
- var b BaseLogModel
- b.Example = s
- b.Node = parseNode(&m)
- if m["event_name"] != nil {
- l = &MetricsLogModel{
- BaseLogModel: b,
- Event: MetricsLogEventName(m["event_name"].(string)),
- }
- } else if m["msg"] != nil && m["level"] != nil {
- var level LogLevel
- switch m["level"].(string) {
- case "debug":
- level = LOG_LEVEL_DEBUG
- case "info":
- level = LOG_LEVEL_INFO
- case "warning":
- level = LOG_LEVEL_WARNING
- case "error":
- level = LOG_LEVEL_ERROR
- default:
- return nil, fmt.Errorf("unexpected log level: %s", m["level"].(string))
- }
- var context *MessageLogContext
- var err *MessageLogError
- if val, ok := m["context"]; ok {
- c := MessageLogContext(val.(string))
- context = &c
- }
- if val, ok := m["error"]; ok {
- errorWithIpsRedacted := redactIpAddressesAndPorts(val.(string))
- e := MessageLogError(errorWithIpsRedacted)
- err = &e
- }
- l = &MessageLogModel{
- BaseLogModel: b,
- Msg: MessageLogName(m["msg"].(string)),
- Level: level,
- MessageLogContext: context,
- MessageLogError: err,
- }
- } else {
- l = &UnknownLogModel{
- BaseLogModel: b,
- }
- }
- return l, nil
- }
- // parseNode takes a map and transforms it into a graph which represents its
- // structure.
- func parseNode(m *LogFields) node {
- n := make(node)
- for k, v := range *m {
- i := parseInterface(v)
- n[k] = i
- }
- return n
- }
- // parseInterface takes an interface and transforms it into a graph of its
- // structure.
- func parseInterface(i interface{}) interface{} {
- switch v := i.(type) {
- default:
- return nil
- case map[string]interface{}:
- l := LogFields(v)
- return parseNode(&l)
- case []interface{}:
- n := make([]node, 1)
- for i := range v {
- switch p := parseInterface(v[i]).(type) {
- case node:
- n = append(n, p)
- }
- }
- return n
- }
- }
- // sortLogModelsDescending merges all log models of different types and then
- // sorts them in ascending order by the number times each occurs. Returns the
- // sorted list and the total number of logs represented by each log model in
- // the list.
- func (l *LogStats) SortLogModels(messages, metrics, unknown bool) (models []interface{}, numLogs uint) {
- var messagesSort []MessageLogModelStats
- var metricsSort []MetricsLogModelStats
- var unknownSort []UnknownLogModelStats
- if messages {
- messagesSort = l.MessageLogModels.Sort()
- messages := make([]interface{}, len(messagesSort))
- for i, v := range messagesSort {
- messages[i] = v
- }
- models = append(models, messages...)
- numLogs += l.MessageLogModels.Count
- }
- if metrics {
- metricsSort = l.MetricsLogModels.Sort()
- metrics := make([]interface{}, len(metricsSort))
- for i, v := range metricsSort {
- metrics[i] = v
- }
- models = append(models, metrics...)
- numLogs += l.MetricsLogModels.Count
- }
- if unknown {
- unknownSort = l.UnknownLogModels.Sort()
- unknown := make([]interface{}, len(unknownSort))
- for i, v := range unknownSort {
- unknown[i] = v
- }
- models = append(models, unknown...)
- numLogs += l.UnknownLogModels.Count
- }
- sort.Slice(models, func(i, j int) bool {
- a := models[i].(LogModelStatsMetrics)
- b := models[j].(LogModelStatsMetrics)
- return b.NumLogs() > a.NumLogs()
- })
- return models, numLogs
- }
- // NumDistinctLogs returns the number of unique log models contained within the
- // LogStats structure.
- func (l *LogStats) NumDistinctLogs() uint {
- return uint(len(l.MessageLogModels.modelStats) + len(l.MetricsLogModels.modelStats) + len(l.UnknownLogModels.modelStats))
- }
- func (l *LogStats) Print(messages, metrics, unknown, printStructure, printExample bool) {
- logs, numLogs := l.SortLogModels(messages, metrics, unknown)
- for _, x := range logs {
- switch v := x.(type) {
- case MessageLogModelStats:
- v.PrintWithRelativePercent(numLogs, printStructure, printExample)
- case MetricsLogModelStats:
- v.PrintWithRelativePercent(numLogs, printStructure, printExample)
- case UnknownLogModelStats:
- v.PrintWithRelativePercent(numLogs, printExample)
- }
- }
- }
|