analysis.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684
  1. /*
  2. * Copyright (c) 2018, Psiphon Inc.
  3. * All rights reserved.
  4. *
  5. * This program is free software: you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation, either version 3 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  17. *
  18. */
  19. // Package analysis implements heuristical frequency analysis of Psiphon Tunnel
  20. // Core server logs. Log lines are parsed into 3 distinct log types: message,
  21. // metrics and unknown. Under these log types the number of logs of each unique
  22. // identifier is counted. The unique identifiers are as follows:
  23. // message: "msg" field
  24. // metrics: "event_name" field
  25. // unknown: key graph
  26. package analysis
  27. import (
  28. "bufio"
  29. "encoding/json"
  30. "fmt"
  31. "log"
  32. "os"
  33. "reflect"
  34. "regexp"
  35. "sort"
  36. "github.com/sirupsen/logrus"
  37. )
  38. type LogLevel int
  39. const (
  40. LOG_LEVEL_UNKNOWN = -1
  41. LOG_LEVEL_DEBUG LogLevel = iota
  42. LOG_LEVEL_INFO
  43. LOG_LEVEL_WARNING
  44. LOG_LEVEL_ERROR
  45. )
  46. func (l LogLevel) String() string {
  47. switch l {
  48. default:
  49. return "Unknown"
  50. case LOG_LEVEL_UNKNOWN:
  51. return "Unknown"
  52. case LOG_LEVEL_DEBUG:
  53. return "Debug"
  54. case LOG_LEVEL_INFO:
  55. return "Info"
  56. case LOG_LEVEL_WARNING:
  57. return "Warning"
  58. case LOG_LEVEL_ERROR:
  59. return "Error"
  60. }
  61. }
  62. type MetricsLogEventName string
  63. type MessageLogKey string
  64. type MessageLogName string
  65. type MessageLogContext string
  66. type MessageLogError string
  67. type LogFields logrus.Fields
  68. type node map[string]interface{}
  69. // Models for each psiphond log type
  70. type LogModel interface {
  71. JsonString() string
  72. Print(bool, bool)
  73. }
  74. type BaseLogModel struct {
  75. Example string
  76. Node node
  77. }
  78. type MessageLogModel struct {
  79. BaseLogModel
  80. Msg MessageLogName
  81. Level LogLevel
  82. MessageLogContext *MessageLogContext
  83. MessageLogError *MessageLogError
  84. }
  85. type MetricsLogModel struct {
  86. BaseLogModel
  87. Event MetricsLogEventName
  88. }
  89. type UnknownLogModel struct {
  90. BaseLogModel
  91. }
  92. func (a *BaseLogModel) equal(b BaseLogModel) bool {
  93. return a.Node.equal(b.Node)
  94. }
  95. func (a *MessageLogModel) key() MessageLogKey {
  96. var errorString string
  97. var context string
  98. if a.MessageLogError != nil {
  99. errorString = string(*a.MessageLogError)
  100. }
  101. if a.MessageLogContext != nil {
  102. context = string(*a.MessageLogContext)
  103. }
  104. return MessageLogKey(fmt.Sprintf("(%s,%s, %s,%s)", MessageLogKey(a.Msg), MessageLogKey(a.Level), errorString, context))
  105. }
  106. func (a *MessageLogContext) equal(b *MessageLogContext) bool {
  107. if a != nil && b != nil {
  108. return *a == *b
  109. } else if a == nil && b == nil {
  110. return true
  111. }
  112. return false
  113. }
  114. func (a *MessageLogError) equal(b *MessageLogError) bool {
  115. if a != nil && b != nil {
  116. return *a == *b
  117. } else if a == nil && b == nil {
  118. return true
  119. }
  120. return false
  121. }
  122. func (a *MessageLogModel) equal(b MessageLogModel) bool {
  123. if a.Msg != b.Msg {
  124. return false
  125. } else if a.Level != b.Level {
  126. return false
  127. }
  128. return a.MessageLogContext.equal(b.MessageLogContext) && a.MessageLogError.equal(b.MessageLogError)
  129. }
  130. func (a *MetricsLogModel) equal(b MetricsLogModel) bool {
  131. return a.Event == b.Event
  132. }
  133. func (a *UnknownLogModel) equal(b UnknownLogModel) bool {
  134. return a.Node.equal(b.Node)
  135. }
  136. // equal returns true if both nodes have the same key graphs.
  137. func (a *node) equal(b node) bool {
  138. for k, v := range *a {
  139. if val, ok := b[k]; ok {
  140. if reflect.TypeOf(v) != reflect.TypeOf(val) {
  141. return false
  142. }
  143. switch m := val.(type) {
  144. case nil:
  145. return true
  146. case node:
  147. vNode := v.(node)
  148. return vNode.equal(m)
  149. case []node:
  150. vNode := v.([]node)
  151. if len(vNode) != len(m) {
  152. return false
  153. }
  154. for i := range m {
  155. if vNode[i].equal(m[i]) != true {
  156. return false
  157. }
  158. }
  159. default:
  160. log.Fatalf("Unexpected val.(type) of %v\n", reflect.TypeOf(val))
  161. }
  162. } else {
  163. return false
  164. }
  165. }
  166. return true
  167. }
  168. func (a *BaseLogModel) JsonString() string {
  169. b, err := json.Marshal(a.Node)
  170. if err != nil {
  171. log.Fatal(err)
  172. }
  173. return string(b)
  174. }
  175. func (a *BaseLogModel) Print(printStructure, printExample bool) {
  176. if printStructure {
  177. fmt.Printf("Structure: %s\n", a.JsonString())
  178. }
  179. if printExample {
  180. fmt.Println("ExampleText: ", a.Example)
  181. }
  182. }
  183. func (a *MessageLogModel) Print(printStructure, printExample bool) {
  184. fmt.Printf("MessageLog\n")
  185. fmt.Printf("MessageLogName: %s\n", a.Msg)
  186. if a.MessageLogError != nil {
  187. fmt.Printf("MessageLogError: %s\n", *a.MessageLogError)
  188. }
  189. if a.MessageLogContext != nil {
  190. fmt.Printf("MessageLogContext: %s\n", *a.MessageLogContext)
  191. }
  192. if printStructure {
  193. fmt.Printf("Structure: %s\n", a.JsonString())
  194. }
  195. if printExample {
  196. fmt.Println("ExampleText: ", a.Example)
  197. }
  198. }
  199. func (a *MetricsLogModel) Print(printStructure, printExample bool) {
  200. fmt.Printf("MetricsLog\n")
  201. fmt.Printf("MetricsLogEventName: %s\n", a.Event)
  202. if printStructure {
  203. fmt.Printf("Structure: %s\n", a.JsonString())
  204. }
  205. if printExample {
  206. fmt.Println("ExampleText: ", a.Example)
  207. }
  208. }
  209. func (a *UnknownLogModel) Print(printStructure, printExample bool) {
  210. fmt.Printf("UnknownLog\n")
  211. fmt.Printf("Structure: %s\n", a.JsonString())
  212. if printExample {
  213. fmt.Println("ExampleText: ", a.Example)
  214. }
  215. }
  216. // Stats for each log model
  217. type LogModelStatsMetrics interface {
  218. NumLogs() uint
  219. }
  220. type LogModelStats struct {
  221. Count uint
  222. }
  223. type MessageLogModelStats struct {
  224. LogModelStats
  225. MessageLogModel
  226. }
  227. type MetricsLogModelStats struct {
  228. LogModelStats
  229. MetricsLogModel
  230. }
  231. type UnknownLogModelStats struct {
  232. LogModelStats
  233. UnknownLogModel
  234. }
  235. func (a MessageLogModelStats) NumLogs() uint {
  236. return a.Count
  237. }
  238. func (a MetricsLogModelStats) NumLogs() uint {
  239. return a.Count
  240. }
  241. func (a UnknownLogModelStats) NumLogs() uint {
  242. return a.Count
  243. }
  244. func (a *MessageLogModelStats) Print(printStructure, printExample bool) {
  245. a.MessageLogModel.Print(printStructure, printExample)
  246. }
  247. func (a *MetricsLogModelStats) Print(printStructure, printExample bool) {
  248. a.MetricsLogModel.Print(printStructure, printExample)
  249. }
  250. func (a *UnknownLogModelStats) Print(printExample bool) {
  251. a.UnknownLogModel.Print(true, printExample)
  252. }
  253. func safeDivide(a, b float64) float64 {
  254. if b != 0 {
  255. return a / b
  256. }
  257. return 0
  258. }
  259. func (a *MessageLogModelStats) PrintWithRelativePercent(count uint, printStructure, printExample bool) {
  260. a.Print(printStructure, printExample)
  261. fmt.Printf("Count: %d of %d\n", a.Count, count)
  262. fmt.Printf("Percent: %0.2f\n", safeDivide(float64(a.Count), float64(count)))
  263. fmt.Printf("\n")
  264. }
  265. func (a *MetricsLogModelStats) PrintWithRelativePercent(count uint, printStructure, printExample bool) {
  266. a.Print(printStructure, printExample)
  267. fmt.Printf("Count: %d of %d\n", a.Count, count)
  268. fmt.Printf("Percent: %0.2f\n", safeDivide(float64(a.Count), float64(count)))
  269. fmt.Printf("\n")
  270. }
  271. func (a *UnknownLogModelStats) PrintWithRelativePercent(count uint, printExample bool) {
  272. a.Print(printExample)
  273. fmt.Printf("Count: %d of %d\n", a.Count, count)
  274. fmt.Printf("Percent: %0.2f\n", safeDivide(float64(a.Count), float64(count)))
  275. fmt.Printf("\n")
  276. }
  277. // Log type stats
  278. // Aggregate log models by log type
  279. type LogTypeStats struct {
  280. Count uint
  281. }
  282. type MessageLogStats struct {
  283. LogTypeStats
  284. modelStats map[MessageLogKey]*MessageLogModelStats
  285. }
  286. type MetricsLogStats struct {
  287. LogTypeStats
  288. modelStats map[MetricsLogEventName]*MetricsLogModelStats
  289. }
  290. type UnknownLogStats struct {
  291. LogTypeStats
  292. modelStats []UnknownLogModelStats
  293. }
  294. func (a *MessageLogStats) Print() {
  295. for _, v := range a.Sort() {
  296. v.PrintWithRelativePercent(a.Count, false, false)
  297. }
  298. }
  299. func (a *MetricsLogStats) Print() {
  300. for _, v := range a.Sort() {
  301. v.PrintWithRelativePercent(a.Count, false, false)
  302. }
  303. }
  304. func (a *UnknownLogStats) Print() {
  305. for _, v := range a.Sort() {
  306. v.PrintWithRelativePercent(a.Count, true)
  307. }
  308. }
  309. func (a *MessageLogStats) Sort() []MessageLogModelStats {
  310. var s []MessageLogModelStats
  311. for _, v := range a.modelStats {
  312. if v != nil {
  313. s = append(s, *v)
  314. }
  315. }
  316. sort.Slice(s, func(i, j int) bool {
  317. return s[j].Count > s[i].Count
  318. })
  319. return s
  320. }
  321. func (a *MetricsLogStats) Sort() []MetricsLogModelStats {
  322. var s []MetricsLogModelStats
  323. for _, v := range a.modelStats {
  324. if v != nil {
  325. s = append(s, *v)
  326. }
  327. }
  328. sort.Slice(s, func(i, j int) bool {
  329. return s[j].Count > s[i].Count
  330. })
  331. return s
  332. }
  333. func (a *UnknownLogStats) Sort() []UnknownLogModelStats {
  334. var s []UnknownLogModelStats
  335. for _, v := range a.modelStats {
  336. s = append(s, v)
  337. }
  338. sort.Slice(s, func(i, j int) bool {
  339. return s[j].Count > s[i].Count
  340. })
  341. return s
  342. }
  343. // Log file stats
  344. type LogStats struct {
  345. MessageLogModels MessageLogStats
  346. MetricsLogModels MetricsLogStats
  347. UnknownLogModels UnknownLogStats
  348. }
  349. // NewLogStats initializes a new LogStats structure.
  350. func NewLogStats() (l *LogStats) {
  351. l = &LogStats{
  352. MessageLogModels: MessageLogStats{
  353. modelStats: make(map[MessageLogKey]*MessageLogModelStats),
  354. },
  355. MetricsLogModels: MetricsLogStats{
  356. modelStats: make(map[MetricsLogEventName]*MetricsLogModelStats),
  357. },
  358. UnknownLogModels: UnknownLogStats{
  359. modelStats: nil,
  360. },
  361. }
  362. return l
  363. }
  364. func NewLogStatsFromFiles(files []string) (l *LogStats, err error) {
  365. l = NewLogStats()
  366. for _, file := range files {
  367. err = l.ParseFile(file)
  368. if err != nil {
  369. return nil, err
  370. }
  371. }
  372. return l, nil
  373. }
  374. // ParseFile takes a psiphond log file as input, parses the log lines into log
  375. // models and updates the LogStats structure.
  376. func (l *LogStats) ParseFile(filename string) error {
  377. file, err := os.Open(filename)
  378. if err != nil {
  379. return err
  380. }
  381. defer file.Close()
  382. scanner := bufio.NewScanner(file)
  383. for scanner.Scan() {
  384. err := l.ParseLogLine(scanner.Text())
  385. if err != nil {
  386. return err
  387. }
  388. }
  389. if err := scanner.Err(); err != nil {
  390. return err
  391. }
  392. return nil
  393. }
  394. // ParseLogLine attempts to parse a log line into a log model and then updates the
  395. // LogStats structure.
  396. func (l *LogStats) ParseLogLine(log string) error {
  397. MessageLogModels := &l.MessageLogModels
  398. MetricsLogModels := &l.MetricsLogModels
  399. logModel, err := parseLogModel(log)
  400. if err != nil {
  401. return err
  402. }
  403. switch v := logModel.(type) {
  404. case *MessageLogModel:
  405. MessageLogModels.Count += 1
  406. if m, ok := MessageLogModels.modelStats[v.key()]; ok {
  407. m.Count += 1
  408. } else {
  409. MessageLogModels.modelStats[v.key()] = &MessageLogModelStats{LogModelStats{1}, *v}
  410. }
  411. case *MetricsLogModel:
  412. l.MetricsLogModels.Count += 1
  413. if m, ok := l.MetricsLogModels.modelStats[v.Event]; ok {
  414. m.Count += 1
  415. } else {
  416. MetricsLogModels.modelStats[v.Event] = &MetricsLogModelStats{LogModelStats{1}, *v}
  417. }
  418. case *UnknownLogModel:
  419. l.UnknownLogModels.Count += 1
  420. found := false
  421. for i := range l.UnknownLogModels.modelStats {
  422. if l.UnknownLogModels.modelStats[i].UnknownLogModel.equal(*v) {
  423. l.UnknownLogModels.modelStats[i].Count += 1
  424. found = true
  425. break
  426. }
  427. }
  428. if !found {
  429. l.UnknownLogModels.modelStats = append(l.UnknownLogModels.modelStats, UnknownLogModelStats{LogModelStats{1}, *v})
  430. }
  431. default:
  432. return fmt.Errorf("Unexpected model type of %v\n", reflect.TypeOf(v))
  433. }
  434. return nil
  435. }
  436. func redactIpAddressesAndPorts(a string) string {
  437. ipAddressWithOptionalPort := regexp.MustCompile(`(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}(:(6553[0-5]|655[0-2][0-9]\d|65[0-4](\d){2}|6[0-4](\d){3}|[1-5](\d){4}|[1-9](\d){0,3}))?`)
  438. return ipAddressWithOptionalPort.ReplaceAllString(a, "<redacted>")
  439. }
  440. // parseLogModel attempts to parse a string into a log model. It is expected
  441. // that the provided string is valid JSON.
  442. func parseLogModel(s string) (LogModel, error) {
  443. var m LogFields
  444. err := json.Unmarshal([]byte(s), &m)
  445. if err != nil {
  446. return nil, fmt.Errorf("Failed to parse log line into JSON: %s", err)
  447. }
  448. var l LogModel
  449. var b BaseLogModel
  450. b.Example = s
  451. b.Node = parseNode(&m)
  452. if m["event_name"] != nil {
  453. l = &MetricsLogModel{
  454. BaseLogModel: b,
  455. Event: MetricsLogEventName(m["event_name"].(string)),
  456. }
  457. } else if m["msg"] != nil && m["level"] != nil {
  458. var level LogLevel
  459. switch m["level"].(string) {
  460. case "debug":
  461. level = LOG_LEVEL_DEBUG
  462. case "info":
  463. level = LOG_LEVEL_INFO
  464. case "warning":
  465. level = LOG_LEVEL_WARNING
  466. case "error":
  467. level = LOG_LEVEL_ERROR
  468. default:
  469. return nil, fmt.Errorf("Unexpected log level: %s\n", m["level"].(string))
  470. }
  471. var context *MessageLogContext
  472. var err *MessageLogError
  473. if val, ok := m["context"]; ok {
  474. c := MessageLogContext(val.(string))
  475. context = &c
  476. }
  477. if val, ok := m["error"]; ok {
  478. errorWithIpsRedacted := redactIpAddressesAndPorts(val.(string))
  479. e := MessageLogError(errorWithIpsRedacted)
  480. err = &e
  481. }
  482. l = &MessageLogModel{
  483. BaseLogModel: b,
  484. Msg: MessageLogName(m["msg"].(string)),
  485. Level: level,
  486. MessageLogContext: context,
  487. MessageLogError: err,
  488. }
  489. } else {
  490. l = &UnknownLogModel{
  491. BaseLogModel: b,
  492. }
  493. }
  494. return l, nil
  495. }
  496. // parseNode takes a map and transforms it into a graph which represents its
  497. // structure.
  498. func parseNode(m *LogFields) node {
  499. n := make(node)
  500. for k, v := range *m {
  501. i := parseInterface(v)
  502. n[k] = i
  503. }
  504. return n
  505. }
  506. // parseInterface takes an interface and transforms it into a graph of its
  507. // structure.
  508. func parseInterface(i interface{}) interface{} {
  509. switch v := i.(type) {
  510. default:
  511. return nil
  512. case map[string]interface{}:
  513. l := LogFields(v)
  514. return parseNode(&l)
  515. case []interface{}:
  516. n := make([]node, 1)
  517. for i := range v {
  518. switch p := parseInterface(v[i]).(type) {
  519. case node:
  520. n = append(n, p)
  521. }
  522. }
  523. return n
  524. }
  525. }
  526. // sortLogModelsDescending merges all log models of different types and then
  527. // sorts them in ascending order by the number times each occurs. Returns the
  528. // sorted list and the total number of logs represented by each log model in
  529. // the list.
  530. func (l *LogStats) SortLogModels(messages, metrics, unknown bool) (models []interface{}, numLogs uint) {
  531. var messagesSort []MessageLogModelStats
  532. var metricsSort []MetricsLogModelStats
  533. var unknownSort []UnknownLogModelStats
  534. if messages {
  535. messagesSort = l.MessageLogModels.Sort()
  536. messages := make([]interface{}, len(messagesSort))
  537. for i, v := range messagesSort {
  538. messages[i] = v
  539. }
  540. models = append(models, messages...)
  541. numLogs += l.MessageLogModels.Count
  542. }
  543. if metrics {
  544. metricsSort = l.MetricsLogModels.Sort()
  545. metrics := make([]interface{}, len(metricsSort))
  546. for i, v := range metricsSort {
  547. metrics[i] = v
  548. }
  549. models = append(models, metrics...)
  550. numLogs += l.MetricsLogModels.Count
  551. }
  552. if unknown {
  553. unknownSort = l.UnknownLogModels.Sort()
  554. unknown := make([]interface{}, len(unknownSort))
  555. for i, v := range unknownSort {
  556. unknown[i] = v
  557. }
  558. models = append(models, unknown...)
  559. numLogs += l.UnknownLogModels.Count
  560. }
  561. sort.Slice(models, func(i, j int) bool {
  562. a := models[i].(LogModelStatsMetrics)
  563. b := models[j].(LogModelStatsMetrics)
  564. return b.NumLogs() > a.NumLogs()
  565. })
  566. return models, numLogs
  567. }
  568. // NumDistinctLogs returns the number of unique log models contained within the
  569. // LogStats structure.
  570. func (l *LogStats) NumDistinctLogs() uint {
  571. return uint(len(l.MessageLogModels.modelStats) + len(l.MetricsLogModels.modelStats) + len(l.UnknownLogModels.modelStats))
  572. }
  573. func (l *LogStats) Print(messages, metrics, unknown, printStructure, printExample bool) {
  574. logs, numLogs := l.SortLogModels(messages, metrics, unknown)
  575. for _, x := range logs {
  576. switch v := x.(type) {
  577. case MessageLogModelStats:
  578. v.PrintWithRelativePercent(numLogs, printStructure, printExample)
  579. case MetricsLogModelStats:
  580. v.PrintWithRelativePercent(numLogs, printStructure, printExample)
  581. case UnknownLogModelStats:
  582. v.PrintWithRelativePercent(numLogs, printExample)
  583. }
  584. }
  585. }