blocklist.go 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204
  1. /*
  2. * Copyright (c) 2019, Psiphon Inc.
  3. * All rights reserved.
  4. *
  5. * This program is free software: you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation, either version 3 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  17. *
  18. */
  19. package server
  20. import (
  21. "encoding/csv"
  22. "fmt"
  23. "io"
  24. "net"
  25. "os"
  26. "sync/atomic"
  27. "time"
  28. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common"
  29. )
  30. // Blocklist provides a fast lookup of IP addresses that are candidates for
  31. // egress blocking. This is intended to be used to block malware and other
  32. // malicious traffic.
  33. //
  34. // The Reload function supports hot reloading of rules data while the server
  35. // is running.
  36. //
  37. // Limitations: currently supports only IPv4 addresses, and is implemented
  38. // with an in-memory Go map, which limits the practical size of the blocklist.
  39. type Blocklist struct {
  40. common.ReloadableFile
  41. loaded int32
  42. data atomic.Value
  43. }
  44. // BlocklistTag indicates the source containing an IP address and the subject,
  45. // or name of the suspected malicious traffic.
  46. type BlocklistTag struct {
  47. Source string
  48. Subject string
  49. }
  50. type blocklistData struct {
  51. lookup map[[net.IPv4len]byte][]BlocklistTag
  52. internedStrings map[string]string
  53. }
  54. // NewBlocklist creates a new block list.
  55. //
  56. // The input file must be a 3 field comma-delimited and optional quote-escaped
  57. // CSV. Fields: <IPv4 address>,<source>,<subject>.
  58. //
  59. // IP addresses may appear multiple times in the input file; each distinct
  60. // source/subject is associated with the IP address and returned in the Lookup
  61. // tag list.
  62. func NewBlocklist(filename string) (*Blocklist, error) {
  63. blocklist := &Blocklist{}
  64. blocklist.ReloadableFile = common.NewReloadableFile(
  65. filename,
  66. false,
  67. func(_ []byte, _ time.Time) error {
  68. newData, err := loadBlocklistFromFile(filename)
  69. if err != nil {
  70. return common.ContextError(err)
  71. }
  72. blocklist.data.Store(newData)
  73. atomic.StoreInt32(&blocklist.loaded, 1)
  74. return nil
  75. })
  76. _, err := blocklist.Reload()
  77. if err != nil {
  78. return nil, common.ContextError(err)
  79. }
  80. return blocklist, nil
  81. }
  82. // Lookup returns the blocklist tags for any IP address that is on the
  83. // blocklist, or returns nil for any IP address not on the blocklist. Lookup
  84. // may be called oncurrently. The caller must not modify the return value.
  85. func (b *Blocklist) Lookup(IPAddress net.IP) []BlocklistTag {
  86. // When not configured, no blocklist is loaded/initialized.
  87. if atomic.LoadInt32(&b.loaded) != 1 {
  88. return nil
  89. }
  90. var key [net.IPv4len]byte
  91. IPv4Address := IPAddress.To4()
  92. if IPv4Address == nil {
  93. return nil
  94. }
  95. copy(key[:], IPv4Address)
  96. // As data is an atomic.Value, it's not necessary to call
  97. // ReloadableFile.RLock/ReloadableFile.RUnlock in this case.
  98. tags, ok := b.data.Load().(*blocklistData).lookup[key]
  99. if !ok {
  100. return nil
  101. }
  102. return tags
  103. }
  104. func loadBlocklistFromFile(filename string) (*blocklistData, error) {
  105. data := newBlocklistData()
  106. file, err := os.Open(filename)
  107. if err != nil {
  108. return nil, common.ContextError(err)
  109. }
  110. defer file.Close()
  111. reader := csv.NewReader(file)
  112. reader.FieldsPerRecord = 3
  113. reader.Comment = '#'
  114. reader.ReuseRecord = true
  115. for {
  116. record, err := reader.Read()
  117. if err == io.EOF {
  118. break
  119. } else if err != nil {
  120. return nil, common.ContextError(err)
  121. }
  122. IPAddress := net.ParseIP(record[0])
  123. if IPAddress == nil {
  124. return nil, common.ContextError(
  125. fmt.Errorf("invalid IP address: %s", record[0]))
  126. }
  127. IPv4Address := IPAddress.To4()
  128. if IPAddress == nil {
  129. return nil, common.ContextError(
  130. fmt.Errorf("invalid IPv4 address: %s", record[0]))
  131. }
  132. var key [net.IPv4len]byte
  133. copy(key[:], IPv4Address)
  134. // Intern the source and subject strings so we only store one copy of
  135. // each in memory. These values are expected to repeat often.
  136. source := data.internString(record[1])
  137. subject := data.internString(record[2])
  138. tag := BlocklistTag{
  139. Source: source,
  140. Subject: subject,
  141. }
  142. tags := data.lookup[key]
  143. found := false
  144. for _, existingTag := range tags {
  145. if tag == existingTag {
  146. found = true
  147. break
  148. }
  149. }
  150. if !found {
  151. data.lookup[key] = append(tags, tag)
  152. }
  153. }
  154. return data, nil
  155. }
  156. func newBlocklistData() *blocklistData {
  157. return &blocklistData{
  158. lookup: make(map[[net.IPv4len]byte][]BlocklistTag),
  159. internedStrings: make(map[string]string),
  160. }
  161. }
  162. func (data *blocklistData) internString(str string) string {
  163. if internedStr, ok := data.internedStrings[str]; ok {
  164. return internedStr
  165. }
  166. data.internedStrings[str] = str
  167. return str
  168. }