history.go 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177
  1. /*
  2. * Copyright (c) 2020, Psiphon Inc.
  3. * All rights reserved.
  4. *
  5. * This program is free software: you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation, either version 3 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  17. *
  18. */
  19. package obfuscator
  20. import (
  21. "encoding/hex"
  22. "time"
  23. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common"
  24. lrucache "github.com/cognusion/go-cache-lru"
  25. )
  26. const (
  27. HISTORY_SEED_TTL = 24 * time.Hour
  28. HISTORY_SEED_MAX_ENTRIES = 1000000
  29. HISTORY_CLIENT_IP_TTL = 2 * time.Minute
  30. HISTORY_CLIENT_IP_MAX_ENTRIES = 10000
  31. )
  32. // SeedHistory maintains a history of recently observed obfuscation seed values.
  33. // This history is used to identify duplicate seed messages.
  34. //
  35. // As a heurististic to exclude expected duplicates, due to, for example, meek
  36. // retries, the source client IP is retained for comparison for a short
  37. // duration -- long enough to cover meek retries without retaining client
  38. // IPs in memory long past a client connection lifetime.
  39. type SeedHistory struct {
  40. seedTTL time.Duration
  41. seedToTime *lrucache.Cache
  42. seedToClientIP *lrucache.Cache
  43. }
  44. type SeedHistoryConfig struct {
  45. SeedTTL time.Duration
  46. SeedMaxEntries int
  47. ClientIPTTL time.Duration
  48. ClientIPMaxEntries int
  49. }
  50. // NewSeedHistory creates a new SeedHistory. Config is optional.
  51. func NewSeedHistory(config *SeedHistoryConfig) *SeedHistory {
  52. // Default TTL and MAX_ENTRIES are tuned to provide an effective history size
  53. // while bounding the amount of memory that will be used. While a
  54. // probabilistic data structure such as a Bloom filter would provide a
  55. // smaller memory footprint, we wish to avoid the associated risk of false
  56. // positives.
  57. //
  58. // Limitation: As go-cache-lru does not currently support iterating over all
  59. // items (without making a full copy of the entire cache), the client IP with
  60. // shorter TTL is stored in a second, smaller cache instead of the same cache
  61. // with a a pruner. This incurs some additional overhead, as the seed key is
  62. // stored twice, once in each cache.
  63. useConfig := SeedHistoryConfig{
  64. SeedTTL: HISTORY_SEED_TTL,
  65. SeedMaxEntries: HISTORY_SEED_MAX_ENTRIES,
  66. ClientIPTTL: HISTORY_CLIENT_IP_TTL,
  67. ClientIPMaxEntries: HISTORY_CLIENT_IP_MAX_ENTRIES,
  68. }
  69. if config != nil {
  70. if config.SeedTTL != 0 {
  71. useConfig.SeedTTL = config.SeedTTL
  72. }
  73. if config.SeedMaxEntries != 0 {
  74. useConfig.SeedMaxEntries = config.SeedMaxEntries
  75. }
  76. if config.ClientIPTTL != 0 {
  77. useConfig.ClientIPTTL = config.ClientIPTTL
  78. }
  79. if config.ClientIPMaxEntries != 0 {
  80. useConfig.ClientIPMaxEntries = config.ClientIPMaxEntries
  81. }
  82. }
  83. return &SeedHistory{
  84. seedTTL: useConfig.SeedTTL,
  85. seedToTime: lrucache.NewWithLRU(
  86. useConfig.SeedTTL,
  87. 1*time.Minute,
  88. useConfig.SeedMaxEntries),
  89. seedToClientIP: lrucache.NewWithLRU(
  90. useConfig.ClientIPTTL,
  91. 30*time.Second,
  92. useConfig.ClientIPMaxEntries),
  93. }
  94. }
  95. // AddNew calls AddNewWithTTL using the SeedTTL that was specified in the
  96. // SeedHistoryConfig.
  97. func (h *SeedHistory) AddNew(
  98. strictMode bool,
  99. clientIP string,
  100. seedType string,
  101. seed []byte) (bool, *common.LogFields) {
  102. return h.AddNewWithTTL(
  103. strictMode, clientIP, seedType, seed, lrucache.DefaultExpiration)
  104. }
  105. // AddNewWithTTL adds a new seed value to the history, set to expire with the
  106. // specified TTL. If the seed value is already in the history, and an expected
  107. // case such as a meek retry is ruled out (or strictMode is on), AddNew
  108. // returns false.
  109. //
  110. // When a duplicate seed is found, a common.LogFields instance is returned,
  111. // populated with event data. Log fields may be returned in either the false
  112. // or true case.
  113. func (h *SeedHistory) AddNewWithTTL(
  114. strictMode bool,
  115. clientIP string,
  116. seedType string,
  117. seed []byte,
  118. TTL time.Duration) (bool, *common.LogFields) {
  119. key := string(seed)
  120. // Limitation: go-cache-lru does not currently support atomically setting if
  121. // a key is unset and otherwise _returning the corresponding value_. There is
  122. // an unlikely possibility that this Add and the following Get don't see the
  123. // same existing key/value state.
  124. now := time.Now()
  125. if h.seedToTime.Add(key, now, TTL) == nil {
  126. // Seed was not already in cache
  127. // TODO: if TTL < SeedHistory.ClientIPTTL, use the shorter TTL here
  128. h.seedToClientIP.Set(key, clientIP, lrucache.DefaultExpiration)
  129. return true, nil
  130. }
  131. previousTime, ok := h.seedToTime.Get(key)
  132. if !ok {
  133. // Inconsistent Add/Get state: assume cache item just expired.
  134. previousTime = now.Add(-h.seedTTL)
  135. }
  136. logFields := common.LogFields{
  137. "duplicate_seed": hex.EncodeToString(seed),
  138. "duplicate_seed_type": seedType,
  139. "duplicate_elapsed_time_ms": int64(time.Since(previousTime.(time.Time)) / time.Millisecond),
  140. }
  141. previousClientIP, ok := h.seedToClientIP.Get(key)
  142. if ok {
  143. if clientIP == previousClientIP.(string) {
  144. logFields["duplicate_client_ip"] = "equal"
  145. return !strictMode, &logFields
  146. } else {
  147. logFields["duplicate_client_ip"] = "unequal"
  148. return false, &logFields
  149. }
  150. }
  151. logFields["duplicate_client_ip"] = "unknown"
  152. return false, &logFields
  153. }