analysis.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682
  1. /*
  2. * Copyright (c) 2018, Psiphon Inc.
  3. * All rights reserved.
  4. *
  5. * This program is free software: you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation, either version 3 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  17. *
  18. */
  19. // Package analysis implements heuristical frequency analysis of Psiphon Tunnel
  20. // Core server logs. Log lines are parsed into 3 distinct log types: message,
  21. // metrics and unknown. Under these log types the number of logs of each unique
  22. // identifier is counted. The unique identifiers are as follows:
  23. // message: "msg" field
  24. // metrics: "event_name" field
  25. // unknown: key graph
  26. package analysis
  27. import (
  28. "bufio"
  29. "encoding/json"
  30. "fmt"
  31. "log"
  32. "os"
  33. "reflect"
  34. "regexp"
  35. "sort"
  36. "github.com/sirupsen/logrus"
  37. )
  38. type LogLevel int
  39. const (
  40. LOG_LEVEL_UNKNOWN = -1
  41. LOG_LEVEL_DEBUG LogLevel = iota
  42. LOG_LEVEL_INFO
  43. LOG_LEVEL_WARNING
  44. LOG_LEVEL_ERROR
  45. )
  46. func (l LogLevel) String() string {
  47. switch l {
  48. default:
  49. return "Unknown"
  50. case LOG_LEVEL_UNKNOWN:
  51. return "Unknown"
  52. case LOG_LEVEL_DEBUG:
  53. return "Debug"
  54. case LOG_LEVEL_INFO:
  55. return "Info"
  56. case LOG_LEVEL_WARNING:
  57. return "Warning"
  58. case LOG_LEVEL_ERROR:
  59. return "Error"
  60. }
  61. }
  62. type MetricsLogEventName string
  63. type MessageLogKey string
  64. type MessageLogName string
  65. type MessageLogContext string
  66. type MessageLogError string
  67. type LogFields logrus.Fields
  68. type node map[string]interface{}
  69. // Models for each psiphond log type
  70. type LogModel interface {
  71. JsonString() string
  72. Print(bool, bool)
  73. }
  74. type BaseLogModel struct {
  75. Example string
  76. Node node
  77. }
  78. type MessageLogModel struct {
  79. BaseLogModel
  80. Msg MessageLogName
  81. Level LogLevel
  82. MessageLogContext *MessageLogContext
  83. MessageLogError *MessageLogError
  84. }
  85. type MetricsLogModel struct {
  86. BaseLogModel
  87. Event MetricsLogEventName
  88. }
  89. type UnknownLogModel struct {
  90. BaseLogModel
  91. }
  92. func (a *BaseLogModel) equal(b BaseLogModel) bool {
  93. return a.Node.equal(b.Node)
  94. }
  95. func (a *MessageLogModel) key() MessageLogKey {
  96. var errorString string
  97. var context string
  98. if a.MessageLogError != nil {
  99. errorString = string(*a.MessageLogError)
  100. }
  101. if a.MessageLogContext != nil {
  102. context = string(*a.MessageLogContext)
  103. }
  104. return MessageLogKey(fmt.Sprintf("(%s,%d, %s,%s)", a.Msg, a.Level, errorString, context))
  105. }
  106. func (a *MessageLogContext) equal(b *MessageLogContext) bool {
  107. if a != nil && b != nil {
  108. return *a == *b
  109. } else if a == nil && b == nil {
  110. return true
  111. }
  112. return false
  113. }
  114. func (a *MessageLogError) equal(b *MessageLogError) bool {
  115. if a != nil && b != nil {
  116. return *a == *b
  117. } else if a == nil && b == nil {
  118. return true
  119. }
  120. return false
  121. }
  122. func (a *MessageLogModel) equal(b MessageLogModel) bool {
  123. if a.Msg != b.Msg {
  124. return false
  125. } else if a.Level != b.Level {
  126. return false
  127. }
  128. return a.MessageLogContext.equal(b.MessageLogContext) && a.MessageLogError.equal(b.MessageLogError)
  129. }
  130. func (a *MetricsLogModel) equal(b MetricsLogModel) bool {
  131. return a.Event == b.Event
  132. }
  133. func (a *UnknownLogModel) equal(b UnknownLogModel) bool {
  134. return a.Node.equal(b.Node)
  135. }
  136. // equal returns true if both nodes have the same key graphs.
  137. func (a *node) equal(b node) bool {
  138. for k, v := range *a {
  139. if val, ok := b[k]; ok {
  140. if reflect.TypeOf(v) != reflect.TypeOf(val) {
  141. return false
  142. }
  143. switch m := val.(type) {
  144. case nil:
  145. return true
  146. case node:
  147. vNode := v.(node)
  148. return vNode.equal(m)
  149. case []node:
  150. vNode := v.([]node)
  151. if len(vNode) != len(m) {
  152. return false
  153. }
  154. for i := range m {
  155. if !vNode[i].equal(m[i]) {
  156. return false
  157. }
  158. }
  159. default:
  160. log.Fatalf("Unexpected val.(type) of %v\n", reflect.TypeOf(val))
  161. }
  162. } else {
  163. return false
  164. }
  165. }
  166. return true
  167. }
  168. func (a *BaseLogModel) JsonString() string {
  169. b, err := json.Marshal(a.Node)
  170. if err != nil {
  171. log.Fatal(err)
  172. }
  173. return string(b)
  174. }
  175. func (a *BaseLogModel) Print(printStructure, printExample bool) {
  176. if printStructure {
  177. fmt.Printf("Structure: %s\n", a.JsonString())
  178. }
  179. if printExample {
  180. fmt.Println("ExampleText: ", a.Example)
  181. }
  182. }
  183. func (a *MessageLogModel) Print(printStructure, printExample bool) {
  184. fmt.Printf("MessageLog\n")
  185. fmt.Printf("MessageLogName: %s\n", a.Msg)
  186. if a.MessageLogError != nil {
  187. fmt.Printf("MessageLogError: %s\n", *a.MessageLogError)
  188. }
  189. if a.MessageLogContext != nil {
  190. fmt.Printf("MessageLogContext: %s\n", *a.MessageLogContext)
  191. }
  192. if printStructure {
  193. fmt.Printf("Structure: %s\n", a.JsonString())
  194. }
  195. if printExample {
  196. fmt.Println("ExampleText: ", a.Example)
  197. }
  198. }
  199. func (a *MetricsLogModel) Print(printStructure, printExample bool) {
  200. fmt.Printf("MetricsLog\n")
  201. fmt.Printf("MetricsLogEventName: %s\n", a.Event)
  202. if printStructure {
  203. fmt.Printf("Structure: %s\n", a.JsonString())
  204. }
  205. if printExample {
  206. fmt.Println("ExampleText: ", a.Example)
  207. }
  208. }
  209. func (a *UnknownLogModel) Print(printStructure, printExample bool) {
  210. fmt.Printf("UnknownLog\n")
  211. fmt.Printf("Structure: %s\n", a.JsonString())
  212. if printExample {
  213. fmt.Println("ExampleText: ", a.Example)
  214. }
  215. }
  216. // Stats for each log model
  217. type LogModelStatsMetrics interface {
  218. NumLogs() uint
  219. }
  220. type LogModelStats struct {
  221. Count uint
  222. }
  223. type MessageLogModelStats struct {
  224. LogModelStats
  225. MessageLogModel
  226. }
  227. type MetricsLogModelStats struct {
  228. LogModelStats
  229. MetricsLogModel
  230. }
  231. type UnknownLogModelStats struct {
  232. LogModelStats
  233. UnknownLogModel
  234. }
  235. func (a MessageLogModelStats) NumLogs() uint {
  236. return a.Count
  237. }
  238. func (a MetricsLogModelStats) NumLogs() uint {
  239. return a.Count
  240. }
  241. func (a UnknownLogModelStats) NumLogs() uint {
  242. return a.Count
  243. }
  244. func (a *MessageLogModelStats) Print(printStructure, printExample bool) {
  245. a.MessageLogModel.Print(printStructure, printExample)
  246. }
  247. func (a *MetricsLogModelStats) Print(printStructure, printExample bool) {
  248. a.MetricsLogModel.Print(printStructure, printExample)
  249. }
  250. func (a *UnknownLogModelStats) Print(printExample bool) {
  251. a.UnknownLogModel.Print(true, printExample)
  252. }
  253. func safeDivide(a, b float64) float64 {
  254. if b != 0 {
  255. return a / b
  256. }
  257. return 0
  258. }
  259. func (a *MessageLogModelStats) PrintWithRelativePercent(count uint, printStructure, printExample bool) {
  260. a.Print(printStructure, printExample)
  261. fmt.Printf("Count: %d of %d\n", a.Count, count)
  262. fmt.Printf("Percent: %0.2f\n", safeDivide(float64(a.Count), float64(count)))
  263. fmt.Printf("\n")
  264. }
  265. func (a *MetricsLogModelStats) PrintWithRelativePercent(count uint, printStructure, printExample bool) {
  266. a.Print(printStructure, printExample)
  267. fmt.Printf("Count: %d of %d\n", a.Count, count)
  268. fmt.Printf("Percent: %0.2f\n", safeDivide(float64(a.Count), float64(count)))
  269. fmt.Printf("\n")
  270. }
  271. func (a *UnknownLogModelStats) PrintWithRelativePercent(count uint, printExample bool) {
  272. a.Print(printExample)
  273. fmt.Printf("Count: %d of %d\n", a.Count, count)
  274. fmt.Printf("Percent: %0.2f\n", safeDivide(float64(a.Count), float64(count)))
  275. fmt.Printf("\n")
  276. }
  277. // Log type stats
  278. // Aggregate log models by log type
  279. type LogTypeStats struct {
  280. Count uint
  281. }
  282. type MessageLogStats struct {
  283. LogTypeStats
  284. modelStats map[MessageLogKey]*MessageLogModelStats
  285. }
  286. type MetricsLogStats struct {
  287. LogTypeStats
  288. modelStats map[MetricsLogEventName]*MetricsLogModelStats
  289. }
  290. type UnknownLogStats struct {
  291. LogTypeStats
  292. modelStats []UnknownLogModelStats
  293. }
  294. func (a *MessageLogStats) Print() {
  295. for _, v := range a.Sort() {
  296. v.PrintWithRelativePercent(a.Count, false, false)
  297. }
  298. }
  299. func (a *MetricsLogStats) Print() {
  300. for _, v := range a.Sort() {
  301. v.PrintWithRelativePercent(a.Count, false, false)
  302. }
  303. }
  304. func (a *UnknownLogStats) Print() {
  305. for _, v := range a.Sort() {
  306. v.PrintWithRelativePercent(a.Count, true)
  307. }
  308. }
  309. func (a *MessageLogStats) Sort() []MessageLogModelStats {
  310. var s []MessageLogModelStats
  311. for _, v := range a.modelStats {
  312. if v != nil {
  313. s = append(s, *v)
  314. }
  315. }
  316. sort.Slice(s, func(i, j int) bool {
  317. return s[j].Count > s[i].Count
  318. })
  319. return s
  320. }
  321. func (a *MetricsLogStats) Sort() []MetricsLogModelStats {
  322. var s []MetricsLogModelStats
  323. for _, v := range a.modelStats {
  324. if v != nil {
  325. s = append(s, *v)
  326. }
  327. }
  328. sort.Slice(s, func(i, j int) bool {
  329. return s[j].Count > s[i].Count
  330. })
  331. return s
  332. }
  333. func (a *UnknownLogStats) Sort() []UnknownLogModelStats {
  334. var s []UnknownLogModelStats
  335. s = append(s, a.modelStats...)
  336. sort.Slice(s, func(i, j int) bool {
  337. return s[j].Count > s[i].Count
  338. })
  339. return s
  340. }
  341. // Log file stats
  342. type LogStats struct {
  343. MessageLogModels MessageLogStats
  344. MetricsLogModels MetricsLogStats
  345. UnknownLogModels UnknownLogStats
  346. }
  347. // NewLogStats initializes a new LogStats structure.
  348. func NewLogStats() (l *LogStats) {
  349. l = &LogStats{
  350. MessageLogModels: MessageLogStats{
  351. modelStats: make(map[MessageLogKey]*MessageLogModelStats),
  352. },
  353. MetricsLogModels: MetricsLogStats{
  354. modelStats: make(map[MetricsLogEventName]*MetricsLogModelStats),
  355. },
  356. UnknownLogModels: UnknownLogStats{
  357. modelStats: nil,
  358. },
  359. }
  360. return l
  361. }
  362. func NewLogStatsFromFiles(files []string) (l *LogStats, err error) {
  363. l = NewLogStats()
  364. for _, file := range files {
  365. err = l.ParseFile(file)
  366. if err != nil {
  367. return nil, err
  368. }
  369. }
  370. return l, nil
  371. }
  372. // ParseFile takes a psiphond log file as input, parses the log lines into log
  373. // models and updates the LogStats structure.
  374. func (l *LogStats) ParseFile(filename string) error {
  375. file, err := os.Open(filename)
  376. if err != nil {
  377. return err
  378. }
  379. defer file.Close()
  380. scanner := bufio.NewScanner(file)
  381. for scanner.Scan() {
  382. err := l.ParseLogLine(scanner.Text())
  383. if err != nil {
  384. return err
  385. }
  386. }
  387. if err := scanner.Err(); err != nil {
  388. return err
  389. }
  390. return nil
  391. }
  392. // ParseLogLine attempts to parse a log line into a log model and then updates the
  393. // LogStats structure.
  394. func (l *LogStats) ParseLogLine(log string) error {
  395. MessageLogModels := &l.MessageLogModels
  396. MetricsLogModels := &l.MetricsLogModels
  397. logModel, err := parseLogModel(log)
  398. if err != nil {
  399. return err
  400. }
  401. switch v := logModel.(type) {
  402. case *MessageLogModel:
  403. MessageLogModels.Count += 1
  404. if m, ok := MessageLogModels.modelStats[v.key()]; ok {
  405. m.Count += 1
  406. } else {
  407. MessageLogModels.modelStats[v.key()] = &MessageLogModelStats{LogModelStats{1}, *v}
  408. }
  409. case *MetricsLogModel:
  410. l.MetricsLogModels.Count += 1
  411. if m, ok := l.MetricsLogModels.modelStats[v.Event]; ok {
  412. m.Count += 1
  413. } else {
  414. MetricsLogModels.modelStats[v.Event] = &MetricsLogModelStats{LogModelStats{1}, *v}
  415. }
  416. case *UnknownLogModel:
  417. l.UnknownLogModels.Count += 1
  418. found := false
  419. for i := range l.UnknownLogModels.modelStats {
  420. if l.UnknownLogModels.modelStats[i].UnknownLogModel.equal(*v) {
  421. l.UnknownLogModels.modelStats[i].Count += 1
  422. found = true
  423. break
  424. }
  425. }
  426. if !found {
  427. l.UnknownLogModels.modelStats = append(l.UnknownLogModels.modelStats, UnknownLogModelStats{LogModelStats{1}, *v})
  428. }
  429. default:
  430. return fmt.Errorf("unexpected model type of %v", reflect.TypeOf(v))
  431. }
  432. return nil
  433. }
  434. func redactIpAddressesAndPorts(a string) string {
  435. ipAddressWithOptionalPort := regexp.MustCompile(`(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}(:(6553[0-5]|655[0-2][0-9]\d|65[0-4](\d){2}|6[0-4](\d){3}|[1-5](\d){4}|[1-9](\d){0,3}))?`)
  436. return ipAddressWithOptionalPort.ReplaceAllString(a, "<redacted>")
  437. }
  438. // parseLogModel attempts to parse a string into a log model. It is expected
  439. // that the provided string is valid JSON.
  440. func parseLogModel(s string) (LogModel, error) {
  441. var m LogFields
  442. err := json.Unmarshal([]byte(s), &m)
  443. if err != nil {
  444. return nil, fmt.Errorf("failed to parse log line into JSON: %s", err)
  445. }
  446. var l LogModel
  447. var b BaseLogModel
  448. b.Example = s
  449. b.Node = parseNode(&m)
  450. if m["event_name"] != nil {
  451. l = &MetricsLogModel{
  452. BaseLogModel: b,
  453. Event: MetricsLogEventName(m["event_name"].(string)),
  454. }
  455. } else if m["msg"] != nil && m["level"] != nil {
  456. var level LogLevel
  457. switch m["level"].(string) {
  458. case "debug":
  459. level = LOG_LEVEL_DEBUG
  460. case "info":
  461. level = LOG_LEVEL_INFO
  462. case "warning":
  463. level = LOG_LEVEL_WARNING
  464. case "error":
  465. level = LOG_LEVEL_ERROR
  466. default:
  467. return nil, fmt.Errorf("unexpected log level: %s", m["level"].(string))
  468. }
  469. var context *MessageLogContext
  470. var err *MessageLogError
  471. if val, ok := m["context"]; ok {
  472. c := MessageLogContext(val.(string))
  473. context = &c
  474. }
  475. if val, ok := m["error"]; ok {
  476. errorWithIpsRedacted := redactIpAddressesAndPorts(val.(string))
  477. e := MessageLogError(errorWithIpsRedacted)
  478. err = &e
  479. }
  480. l = &MessageLogModel{
  481. BaseLogModel: b,
  482. Msg: MessageLogName(m["msg"].(string)),
  483. Level: level,
  484. MessageLogContext: context,
  485. MessageLogError: err,
  486. }
  487. } else {
  488. l = &UnknownLogModel{
  489. BaseLogModel: b,
  490. }
  491. }
  492. return l, nil
  493. }
  494. // parseNode takes a map and transforms it into a graph which represents its
  495. // structure.
  496. func parseNode(m *LogFields) node {
  497. n := make(node)
  498. for k, v := range *m {
  499. i := parseInterface(v)
  500. n[k] = i
  501. }
  502. return n
  503. }
  504. // parseInterface takes an interface and transforms it into a graph of its
  505. // structure.
  506. func parseInterface(i interface{}) interface{} {
  507. switch v := i.(type) {
  508. default:
  509. return nil
  510. case map[string]interface{}:
  511. l := LogFields(v)
  512. return parseNode(&l)
  513. case []interface{}:
  514. n := make([]node, 1)
  515. for i := range v {
  516. switch p := parseInterface(v[i]).(type) {
  517. case node:
  518. n = append(n, p)
  519. }
  520. }
  521. return n
  522. }
  523. }
  524. // sortLogModelsDescending merges all log models of different types and then
  525. // sorts them in ascending order by the number times each occurs. Returns the
  526. // sorted list and the total number of logs represented by each log model in
  527. // the list.
  528. func (l *LogStats) SortLogModels(messages, metrics, unknown bool) (models []interface{}, numLogs uint) {
  529. var messagesSort []MessageLogModelStats
  530. var metricsSort []MetricsLogModelStats
  531. var unknownSort []UnknownLogModelStats
  532. if messages {
  533. messagesSort = l.MessageLogModels.Sort()
  534. messages := make([]interface{}, len(messagesSort))
  535. for i, v := range messagesSort {
  536. messages[i] = v
  537. }
  538. models = append(models, messages...)
  539. numLogs += l.MessageLogModels.Count
  540. }
  541. if metrics {
  542. metricsSort = l.MetricsLogModels.Sort()
  543. metrics := make([]interface{}, len(metricsSort))
  544. for i, v := range metricsSort {
  545. metrics[i] = v
  546. }
  547. models = append(models, metrics...)
  548. numLogs += l.MetricsLogModels.Count
  549. }
  550. if unknown {
  551. unknownSort = l.UnknownLogModels.Sort()
  552. unknown := make([]interface{}, len(unknownSort))
  553. for i, v := range unknownSort {
  554. unknown[i] = v
  555. }
  556. models = append(models, unknown...)
  557. numLogs += l.UnknownLogModels.Count
  558. }
  559. sort.Slice(models, func(i, j int) bool {
  560. a := models[i].(LogModelStatsMetrics)
  561. b := models[j].(LogModelStatsMetrics)
  562. return b.NumLogs() > a.NumLogs()
  563. })
  564. return models, numLogs
  565. }
  566. // NumDistinctLogs returns the number of unique log models contained within the
  567. // LogStats structure.
  568. func (l *LogStats) NumDistinctLogs() uint {
  569. return uint(len(l.MessageLogModels.modelStats) + len(l.MetricsLogModels.modelStats) + len(l.UnknownLogModels.modelStats))
  570. }
  571. func (l *LogStats) Print(messages, metrics, unknown, printStructure, printExample bool) {
  572. logs, numLogs := l.SortLogModels(messages, metrics, unknown)
  573. for _, x := range logs {
  574. switch v := x.(type) {
  575. case MessageLogModelStats:
  576. v.PrintWithRelativePercent(numLogs, printStructure, printExample)
  577. case MetricsLogModelStats:
  578. v.PrintWithRelativePercent(numLogs, printStructure, printExample)
  579. case UnknownLogModelStats:
  580. v.PrintWithRelativePercent(numLogs, printExample)
  581. }
  582. }
  583. }