analysis.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677
  1. /*
  2. * Copyright (c) 2018, Psiphon Inc.
  3. * All rights reserved.
  4. *
  5. * This program is free software: you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation, either version 3 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  17. *
  18. */
  19. // Package analysis implements heuristical frequency analysis of Psiphon Tunnel
  20. // Core server logs. Log lines are parsed into 3 distinct log types: message,
  21. // metrics and unknown. Under these log types the number of logs of each unique
  22. // identifier is counted. The unique identifiers are as follows:
  23. // message: "msg" field
  24. // metrics: "event_name" field
  25. // unknown: key graph
  26. package analysis
  27. import (
  28. "bufio"
  29. "encoding/json"
  30. "fmt"
  31. "log"
  32. "os"
  33. "reflect"
  34. "sort"
  35. "github.com/sirupsen/logrus"
  36. )
  37. type LogLevel int
  38. const (
  39. LOG_LEVEL_UNKNOWN = -1
  40. LOG_LEVEL_DEBUG LogLevel = iota
  41. LOG_LEVEL_INFO
  42. LOG_LEVEL_WARNING
  43. LOG_LEVEL_ERROR
  44. )
  45. func (l LogLevel) String() string {
  46. switch l {
  47. default:
  48. return "Unknown"
  49. case LOG_LEVEL_UNKNOWN:
  50. return "Unknown"
  51. case LOG_LEVEL_DEBUG:
  52. return "Debug"
  53. case LOG_LEVEL_INFO:
  54. return "Info"
  55. case LOG_LEVEL_WARNING:
  56. return "Warning"
  57. case LOG_LEVEL_ERROR:
  58. return "Error"
  59. }
  60. }
  61. type MetricsLogEventName string
  62. type MessageLogKey string
  63. type MessageLogName string
  64. type MessageLogContext string
  65. type MessageLogError string
  66. type LogFields logrus.Fields
  67. type node map[string]interface{}
  68. // Models for each psiphond log type
  69. type LogModel interface {
  70. JsonString() string
  71. Print(bool, bool)
  72. }
  73. type BaseLogModel struct {
  74. Example string
  75. Node node
  76. }
  77. type MessageLogModel struct {
  78. BaseLogModel
  79. Msg MessageLogName
  80. Level LogLevel
  81. MessageLogContext *MessageLogContext
  82. MessageLogError *MessageLogError
  83. }
  84. type MetricsLogModel struct {
  85. BaseLogModel
  86. Event MetricsLogEventName
  87. }
  88. type UnknownLogModel struct {
  89. BaseLogModel
  90. }
  91. func (a *BaseLogModel) equal(b BaseLogModel) bool {
  92. return a.Node.equal(b.Node)
  93. }
  94. func (a *MessageLogModel) key() MessageLogKey {
  95. var errorString string
  96. var context string
  97. if a.MessageLogError != nil {
  98. errorString = string(*a.MessageLogError)
  99. }
  100. if a.MessageLogContext != nil {
  101. context = string(*a.MessageLogContext)
  102. }
  103. return MessageLogKey(fmt.Sprintf("(%s,%s, %s,%s)", MessageLogKey(a.Msg), MessageLogKey(a.Level), errorString, context))
  104. }
  105. func (a *MessageLogContext) equal(b *MessageLogContext) bool {
  106. if a != nil && b != nil {
  107. return *a == *b
  108. } else if a == nil && b == nil {
  109. return true
  110. }
  111. return false
  112. }
  113. func (a *MessageLogError) equal(b *MessageLogError) bool {
  114. if a != nil && b != nil {
  115. return *a == *b
  116. } else if a == nil && b == nil {
  117. return true
  118. }
  119. return false
  120. }
  121. func (a *MessageLogModel) equal(b MessageLogModel) bool {
  122. if a.Msg != b.Msg {
  123. return false
  124. } else if a.Level != b.Level {
  125. return false
  126. }
  127. return a.MessageLogContext.equal(b.MessageLogContext) && a.MessageLogError.equal(b.MessageLogError)
  128. }
  129. func (a *MetricsLogModel) equal(b MetricsLogModel) bool {
  130. return a.Event == b.Event
  131. }
  132. func (a *UnknownLogModel) equal(b UnknownLogModel) bool {
  133. return a.Node.equal(b.Node)
  134. }
  135. // equal returns true if both nodes have the same key graphs.
  136. func (a *node) equal(b node) bool {
  137. for k, v := range *a {
  138. if val, ok := b[k]; ok {
  139. if reflect.TypeOf(v) != reflect.TypeOf(val) {
  140. return false
  141. }
  142. switch m := val.(type) {
  143. case nil:
  144. return true
  145. case node:
  146. vNode := v.(node)
  147. return vNode.equal(m)
  148. case []node:
  149. vNode := v.([]node)
  150. if len(vNode) != len(m) {
  151. return false
  152. }
  153. for i := range m {
  154. if vNode[i].equal(m[i]) != true {
  155. return false
  156. }
  157. }
  158. default:
  159. log.Fatalf("Unexpected val.(type) of %v\n", reflect.TypeOf(val))
  160. }
  161. } else {
  162. return false
  163. }
  164. }
  165. return true
  166. }
  167. func (a *BaseLogModel) JsonString() string {
  168. b, err := json.Marshal(a.Node)
  169. if err != nil {
  170. log.Fatal(err)
  171. }
  172. return string(b)
  173. }
  174. func (a *BaseLogModel) Print(printStructure, printExample bool) {
  175. if printStructure {
  176. fmt.Printf("Structure: %s\n", a.JsonString())
  177. }
  178. if printExample {
  179. fmt.Println("ExampleText: ", a.Example)
  180. }
  181. }
  182. func (a *MessageLogModel) Print(printStructure, printExample bool) {
  183. fmt.Printf("MessageLog\n")
  184. fmt.Printf("MessageLogName: %s\n", a.Msg)
  185. if a.MessageLogError != nil {
  186. fmt.Printf("MessageLogError: %s\n", *a.MessageLogError)
  187. }
  188. if a.MessageLogContext != nil {
  189. fmt.Printf("MessageLogContext: %s\n", *a.MessageLogContext)
  190. }
  191. if printStructure {
  192. fmt.Printf("Structure: %s\n", a.JsonString())
  193. }
  194. if printExample {
  195. fmt.Println("ExampleText: ", a.Example)
  196. }
  197. }
  198. func (a *MetricsLogModel) Print(printStructure, printExample bool) {
  199. fmt.Printf("MetricsLog\n")
  200. fmt.Printf("MetricsLogEventName: %s\n", a.Event)
  201. if printStructure {
  202. fmt.Printf("Structure: %s\n", a.JsonString())
  203. }
  204. if printExample {
  205. fmt.Println("ExampleText: ", a.Example)
  206. }
  207. }
  208. func (a *UnknownLogModel) Print(printStructure, printExample bool) {
  209. fmt.Printf("UnknownLog\n")
  210. fmt.Printf("Structure: %s\n", a.JsonString())
  211. if printExample {
  212. fmt.Println("ExampleText: ", a.Example)
  213. }
  214. }
  215. // Stats for each log model
  216. type LogModelStatsMetrics interface {
  217. NumLogs() uint
  218. }
  219. type LogModelStats struct {
  220. Count uint
  221. }
  222. type MessageLogModelStats struct {
  223. LogModelStats
  224. MessageLogModel
  225. }
  226. type MetricsLogModelStats struct {
  227. LogModelStats
  228. MetricsLogModel
  229. }
  230. type UnknownLogModelStats struct {
  231. LogModelStats
  232. UnknownLogModel
  233. }
  234. func (a MessageLogModelStats) NumLogs() uint {
  235. return a.Count
  236. }
  237. func (a MetricsLogModelStats) NumLogs() uint {
  238. return a.Count
  239. }
  240. func (a UnknownLogModelStats) NumLogs() uint {
  241. return a.Count
  242. }
  243. func (a *MessageLogModelStats) Print(printStructure, printExample bool) {
  244. a.MessageLogModel.Print(printStructure, printExample)
  245. }
  246. func (a *MetricsLogModelStats) Print(printStructure, printExample bool) {
  247. a.MetricsLogModel.Print(printStructure, printExample)
  248. }
  249. func (a *UnknownLogModelStats) Print(printExample bool) {
  250. a.UnknownLogModel.Print(true, printExample)
  251. }
  252. func safeDivide(a, b float64) float64 {
  253. if b != 0 {
  254. return a / b
  255. }
  256. return 0
  257. }
  258. func (a *MessageLogModelStats) PrintWithRelativePercent(count uint, printStructure, printExample bool) {
  259. a.Print(printStructure, printExample)
  260. fmt.Printf("Count: %d of %d\n", a.Count, count)
  261. fmt.Printf("Percent: %0.2f\n", safeDivide(float64(a.Count), float64(count)))
  262. fmt.Printf("\n")
  263. }
  264. func (a *MetricsLogModelStats) PrintWithRelativePercent(count uint, printStructure, printExample bool) {
  265. a.Print(printStructure, printExample)
  266. fmt.Printf("Count: %d of %d\n", a.Count, count)
  267. fmt.Printf("Percent: %0.2f\n", safeDivide(float64(a.Count), float64(count)))
  268. fmt.Printf("\n")
  269. }
  270. func (a *UnknownLogModelStats) PrintWithRelativePercent(count uint, printExample bool) {
  271. a.Print(printExample)
  272. fmt.Printf("Count: %d of %d\n", a.Count, count)
  273. fmt.Printf("Percent: %0.2f\n", safeDivide(float64(a.Count), float64(count)))
  274. fmt.Printf("\n")
  275. }
  276. // Log type stats
  277. // Aggregate log models by log type
  278. type LogTypeStats struct {
  279. Count uint
  280. }
  281. type MessageLogStats struct {
  282. LogTypeStats
  283. modelStats map[MessageLogKey]*MessageLogModelStats
  284. }
  285. type MetricsLogStats struct {
  286. LogTypeStats
  287. modelStats map[MetricsLogEventName]*MetricsLogModelStats
  288. }
  289. type UnknownLogStats struct {
  290. LogTypeStats
  291. modelStats []UnknownLogModelStats
  292. }
  293. func (a *MessageLogStats) Print() {
  294. for _, v := range a.Sort() {
  295. v.PrintWithRelativePercent(a.Count, false, false)
  296. }
  297. }
  298. func (a *MetricsLogStats) Print() {
  299. for _, v := range a.Sort() {
  300. v.PrintWithRelativePercent(a.Count, false, false)
  301. }
  302. }
  303. func (a *UnknownLogStats) Print() {
  304. for _, v := range a.Sort() {
  305. v.PrintWithRelativePercent(a.Count, true)
  306. }
  307. }
  308. func (a *MessageLogStats) Sort() []MessageLogModelStats {
  309. var s []MessageLogModelStats
  310. for _, v := range a.modelStats {
  311. if v != nil {
  312. s = append(s, *v)
  313. }
  314. }
  315. sort.Slice(s, func(i, j int) bool {
  316. return s[j].Count > s[i].Count
  317. })
  318. return s
  319. }
  320. func (a *MetricsLogStats) Sort() []MetricsLogModelStats {
  321. var s []MetricsLogModelStats
  322. for _, v := range a.modelStats {
  323. if v != nil {
  324. s = append(s, *v)
  325. }
  326. }
  327. sort.Slice(s, func(i, j int) bool {
  328. return s[j].Count > s[i].Count
  329. })
  330. return s
  331. }
  332. func (a *UnknownLogStats) Sort() []UnknownLogModelStats {
  333. var s []UnknownLogModelStats
  334. for _, v := range a.modelStats {
  335. s = append(s, v)
  336. }
  337. sort.Slice(s, func(i, j int) bool {
  338. return s[j].Count > s[i].Count
  339. })
  340. return s
  341. }
  342. // Log file stats
  343. type LogStats struct {
  344. MessageLogModels MessageLogStats
  345. MetricsLogModels MetricsLogStats
  346. UnknownLogModels UnknownLogStats
  347. }
  348. // NewLogStats initializes a new LogStats structure.
  349. func NewLogStats() (l *LogStats) {
  350. l = &LogStats{
  351. MessageLogModels: MessageLogStats{
  352. modelStats: make(map[MessageLogKey]*MessageLogModelStats),
  353. },
  354. MetricsLogModels: MetricsLogStats{
  355. modelStats: make(map[MetricsLogEventName]*MetricsLogModelStats),
  356. },
  357. UnknownLogModels: UnknownLogStats{
  358. modelStats: nil,
  359. },
  360. }
  361. return l
  362. }
  363. func NewLogStatsFromFiles(files []string) (l *LogStats, err error) {
  364. l = NewLogStats()
  365. for _, file := range files {
  366. err = l.ParseFile(file)
  367. if err != nil {
  368. return nil, err
  369. }
  370. }
  371. return l, nil
  372. }
  373. // ParseFile takes a psiphond log file as input, parses the log lines into log
  374. // models and updates the LogStats structure.
  375. func (l *LogStats) ParseFile(filename string) error {
  376. file, err := os.Open(filename)
  377. if err != nil {
  378. return err
  379. }
  380. defer file.Close()
  381. scanner := bufio.NewScanner(file)
  382. for scanner.Scan() {
  383. err := l.ParseLogLine(scanner.Text())
  384. if err != nil {
  385. return err
  386. }
  387. }
  388. if err := scanner.Err(); err != nil {
  389. return err
  390. }
  391. return nil
  392. }
  393. // ParseLogLine attempts to parse a log line into a log model and then updates the
  394. // LogStats structure.
  395. func (l *LogStats) ParseLogLine(log string) error {
  396. MessageLogModels := &l.MessageLogModels
  397. MetricsLogModels := &l.MetricsLogModels
  398. logModel, err := parseLogModel(log)
  399. if err != nil {
  400. return err
  401. }
  402. switch v := logModel.(type) {
  403. case *MessageLogModel:
  404. MessageLogModels.Count += 1
  405. if m, ok := MessageLogModels.modelStats[v.key()]; ok {
  406. m.Count += 1
  407. } else {
  408. MessageLogModels.modelStats[v.key()] = &MessageLogModelStats{LogModelStats{1}, *v}
  409. }
  410. case *MetricsLogModel:
  411. l.MetricsLogModels.Count += 1
  412. if m, ok := l.MetricsLogModels.modelStats[v.Event]; ok {
  413. m.Count += 1
  414. } else {
  415. MetricsLogModels.modelStats[v.Event] = &MetricsLogModelStats{LogModelStats{1}, *v}
  416. }
  417. case *UnknownLogModel:
  418. l.UnknownLogModels.Count += 1
  419. found := false
  420. for i := range l.UnknownLogModels.modelStats {
  421. if l.UnknownLogModels.modelStats[i].UnknownLogModel.equal(*v) {
  422. l.UnknownLogModels.modelStats[i].Count += 1
  423. found = true
  424. break
  425. }
  426. }
  427. if !found {
  428. l.UnknownLogModels.modelStats = append(l.UnknownLogModels.modelStats, UnknownLogModelStats{LogModelStats{1}, *v})
  429. }
  430. default:
  431. return fmt.Errorf("Unexpected model type of %v\n", reflect.TypeOf(v))
  432. }
  433. return nil
  434. }
  435. // parseLogModel attempts to parse a string into a log model. It is expected
  436. // that the provided string is valid JSON.
  437. func parseLogModel(s string) (LogModel, error) {
  438. var m LogFields
  439. err := json.Unmarshal([]byte(s), &m)
  440. if err != nil {
  441. return nil, fmt.Errorf("Failed to parse log line into JSON: %s", err)
  442. }
  443. var l LogModel
  444. var b BaseLogModel
  445. b.Example = s
  446. b.Node = parseNode(&m)
  447. if m["event_name"] != nil {
  448. l = &MetricsLogModel{
  449. BaseLogModel: b,
  450. Event: MetricsLogEventName(m["event_name"].(string)),
  451. }
  452. } else if m["msg"] != nil && m["level"] != nil {
  453. var level LogLevel
  454. switch m["level"].(string) {
  455. case "debug":
  456. level = LOG_LEVEL_DEBUG
  457. case "info":
  458. level = LOG_LEVEL_INFO
  459. case "warning":
  460. level = LOG_LEVEL_WARNING
  461. case "error":
  462. level = LOG_LEVEL_ERROR
  463. default:
  464. return nil, fmt.Errorf("Unexpected log level: %s\n", m["level"].(string))
  465. }
  466. var context *MessageLogContext
  467. var err *MessageLogError
  468. if val, ok := m["context"]; ok {
  469. c := MessageLogContext(val.(string))
  470. context = &c
  471. }
  472. if val, ok := m["error"]; ok {
  473. e := MessageLogError(val.(string))
  474. err = &e
  475. }
  476. l = &MessageLogModel{
  477. BaseLogModel: b,
  478. Msg: MessageLogName(m["msg"].(string)),
  479. Level: level,
  480. MessageLogContext: context,
  481. MessageLogError: err,
  482. }
  483. } else {
  484. l = &UnknownLogModel{
  485. BaseLogModel: b,
  486. }
  487. }
  488. return l, nil
  489. }
  490. // parseNode takes a map and transforms it into a graph which represents its
  491. // structure.
  492. func parseNode(m *LogFields) node {
  493. n := make(node)
  494. for k, v := range *m {
  495. i := parseInterface(v)
  496. n[k] = i
  497. }
  498. return n
  499. }
  500. // parseInterface takes an interface and transforms it into a graph of its
  501. // structure.
  502. func parseInterface(i interface{}) interface{} {
  503. switch v := i.(type) {
  504. default:
  505. return nil
  506. case map[string]interface{}:
  507. l := LogFields(v)
  508. return parseNode(&l)
  509. case []interface{}:
  510. n := make([]node, 1)
  511. for i := range v {
  512. switch p := parseInterface(v[i]).(type) {
  513. case node:
  514. n = append(n, p)
  515. }
  516. }
  517. return n
  518. }
  519. }
  520. // sortLogModelsDescending merges all log models of different types and then
  521. // sorts them in ascending order by the number times each occurs. Returns the
  522. // sorted list and the total number of logs represented by each log model in
  523. // the list.
  524. func (l *LogStats) SortLogModels(messages, metrics, unknown bool) (models []interface{}, numLogs uint) {
  525. var messagesSort []MessageLogModelStats
  526. var metricsSort []MetricsLogModelStats
  527. var unknownSort []UnknownLogModelStats
  528. if messages {
  529. messagesSort = l.MessageLogModels.Sort()
  530. messages := make([]interface{}, len(messagesSort))
  531. for i, v := range messagesSort {
  532. messages[i] = v
  533. }
  534. models = append(models, messages...)
  535. numLogs += l.MessageLogModels.Count
  536. }
  537. if metrics {
  538. metricsSort = l.MetricsLogModels.Sort()
  539. metrics := make([]interface{}, len(metricsSort))
  540. for i, v := range metricsSort {
  541. metrics[i] = v
  542. }
  543. models = append(models, metrics...)
  544. numLogs += l.MetricsLogModels.Count
  545. }
  546. if unknown {
  547. unknownSort = l.UnknownLogModels.Sort()
  548. unknown := make([]interface{}, len(unknownSort))
  549. for i, v := range unknownSort {
  550. unknown[i] = v
  551. }
  552. models = append(models, unknown...)
  553. numLogs += l.UnknownLogModels.Count
  554. }
  555. sort.Slice(models, func(i, j int) bool {
  556. a := models[i].(LogModelStatsMetrics)
  557. b := models[j].(LogModelStatsMetrics)
  558. return b.NumLogs() > a.NumLogs()
  559. })
  560. return models, numLogs
  561. }
  562. // NumDistinctLogs returns the number of unique log models contained within the
  563. // LogStats structure.
  564. func (l *LogStats) NumDistinctLogs() uint {
  565. return uint(len(l.MessageLogModels.modelStats) + len(l.MetricsLogModels.modelStats) + len(l.UnknownLogModels.modelStats))
  566. }
  567. func (l *LogStats) Print(messages, metrics, unknown, printStructure, printExample bool) {
  568. logs, numLogs := l.SortLogModels(messages, metrics, unknown)
  569. for _, x := range logs {
  570. switch v := x.(type) {
  571. case MessageLogModelStats:
  572. v.PrintWithRelativePercent(numLogs, printStructure, printExample)
  573. case MetricsLogModelStats:
  574. v.PrintWithRelativePercent(numLogs, printStructure, printExample)
  575. case UnknownLogModelStats:
  576. v.PrintWithRelativePercent(numLogs, printExample)
  577. }
  578. }
  579. }