blocklist.go 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203
  1. /*
  2. * Copyright (c) 2019, Psiphon Inc.
  3. * All rights reserved.
  4. *
  5. * This program is free software: you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation, either version 3 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  17. *
  18. */
  19. package server
  20. import (
  21. "encoding/csv"
  22. "fmt"
  23. "io"
  24. "net"
  25. "os"
  26. "sync/atomic"
  27. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common"
  28. )
  29. // Blocklist provides a fast lookup of IP addresses that are candidates for
  30. // egress blocking. This is intended to be used to block malware and other
  31. // malicious traffic.
  32. //
  33. // The Reload function supports hot reloading of rules data while the server
  34. // is running.
  35. //
  36. // Limitations: currently supports only IPv4 addresses, and is implemented
  37. // with an in-memory Go map, which limits the practical size of the blocklist.
  38. type Blocklist struct {
  39. common.ReloadableFile
  40. loaded int32
  41. data atomic.Value
  42. }
  43. // BlocklistTag indicates the source containing an IP address and the subject,
  44. // or name of the suspected malicious traffic.
  45. type BlocklistTag struct {
  46. Source string
  47. Subject string
  48. }
  49. type blocklistData struct {
  50. lookup map[[net.IPv4len]byte][]BlocklistTag
  51. internedStrings map[string]string
  52. }
  53. // NewBlocklist creates a new block list.
  54. //
  55. // The input file must be a 3 field comma-delimited and optional quote-escaped
  56. // CSV. Fields: <IPv4 address>,<source>,<subject>.
  57. //
  58. // IP addresses may appear multiple times in the input file; each distinct
  59. // source/subject is associated with the IP address and returned in the Lookup
  60. // tag list.
  61. func NewBlocklist(filename string) (*Blocklist, error) {
  62. blocklist := &Blocklist{}
  63. blocklist.ReloadableFile = common.NewReloadableFile(
  64. filename,
  65. false,
  66. func(_ []byte) error {
  67. newData, err := loadBlocklistFromFile(filename)
  68. if err != nil {
  69. return common.ContextError(err)
  70. }
  71. blocklist.data.Store(newData)
  72. atomic.StoreInt32(&blocklist.loaded, 1)
  73. return nil
  74. })
  75. _, err := blocklist.Reload()
  76. if err != nil {
  77. return nil, common.ContextError(err)
  78. }
  79. return blocklist, nil
  80. }
  81. // Lookup returns the blocklist tags for any IP address that is on the
  82. // blocklist, or returns nil for any IP address not on the blocklist. Lookup
  83. // may be called oncurrently. The caller must not modify the return value.
  84. func (b *Blocklist) Lookup(IPAddress net.IP) []BlocklistTag {
  85. // When not configured, no blocklist is loaded/initialized.
  86. if atomic.LoadInt32(&b.loaded) != 1 {
  87. return nil
  88. }
  89. var key [net.IPv4len]byte
  90. IPv4Address := IPAddress.To4()
  91. if IPv4Address == nil {
  92. return nil
  93. }
  94. copy(key[:], IPv4Address)
  95. // As data is an atomic.Value, it's not necessary to call
  96. // ReloadableFile.RLock/ReloadableFile.RUnlock in this case.
  97. tags, ok := b.data.Load().(*blocklistData).lookup[key]
  98. if !ok {
  99. return nil
  100. }
  101. return tags
  102. }
  103. func loadBlocklistFromFile(filename string) (*blocklistData, error) {
  104. data := newBlocklistData()
  105. file, err := os.Open(filename)
  106. if err != nil {
  107. return nil, common.ContextError(err)
  108. }
  109. defer file.Close()
  110. reader := csv.NewReader(file)
  111. reader.FieldsPerRecord = 3
  112. reader.Comment = '#'
  113. reader.ReuseRecord = true
  114. for {
  115. record, err := reader.Read()
  116. if err == io.EOF {
  117. break
  118. } else if err != nil {
  119. return nil, common.ContextError(err)
  120. }
  121. IPAddress := net.ParseIP(record[0])
  122. if IPAddress == nil {
  123. return nil, common.ContextError(
  124. fmt.Errorf("invalid IP address: %s", record[0]))
  125. }
  126. IPv4Address := IPAddress.To4()
  127. if IPAddress == nil {
  128. return nil, common.ContextError(
  129. fmt.Errorf("invalid IPv4 address: %s", record[0]))
  130. }
  131. var key [net.IPv4len]byte
  132. copy(key[:], IPv4Address)
  133. // Intern the source and subject strings so we only store one copy of
  134. // each in memory. These values are expected to repeat often.
  135. source := data.internString(record[1])
  136. subject := data.internString(record[2])
  137. tag := BlocklistTag{
  138. Source: source,
  139. Subject: subject,
  140. }
  141. tags := data.lookup[key]
  142. found := false
  143. for _, existingTag := range tags {
  144. if tag == existingTag {
  145. found = true
  146. break
  147. }
  148. }
  149. if !found {
  150. data.lookup[key] = append(tags, tag)
  151. }
  152. }
  153. return data, nil
  154. }
  155. func newBlocklistData() *blocklistData {
  156. return &blocklistData{
  157. lookup: make(map[[net.IPv4len]byte][]BlocklistTag),
  158. internedStrings: make(map[string]string),
  159. }
  160. }
  161. func (data *blocklistData) internString(str string) string {
  162. if internedStr, ok := data.internedStrings[str]; ok {
  163. return internedStr
  164. }
  165. data.internedStrings[str] = str
  166. return str
  167. }