blocklist.go 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252
  1. /*
  2. * Copyright (c) 2019, Psiphon Inc.
  3. * All rights reserved.
  4. *
  5. * This program is free software: you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation, either version 3 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  17. *
  18. */
  19. package server
  20. import (
  21. "encoding/csv"
  22. "io"
  23. "net"
  24. "os"
  25. "sync/atomic"
  26. "time"
  27. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common"
  28. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/errors"
  29. "github.com/miekg/dns"
  30. )
  31. // Blocklist provides a fast lookup of IP addresses and domains that are
  32. // candidates for egress blocking. This is intended to be used to block
  33. // malware and other malicious traffic.
  34. //
  35. // The Reload function supports hot reloading of rules data while the server
  36. // is running.
  37. //
  38. // Limitations: the blocklist is implemented with in-memory Go maps, which
  39. // limits the practical size of the blocklist.
  40. type Blocklist struct {
  41. common.ReloadableFile
  42. loaded int32
  43. data atomic.Value
  44. }
  45. // BlocklistTag indicates the source containing an IP address and the subject,
  46. // or name of the suspected malicious traffic.
  47. type BlocklistTag struct {
  48. Source string
  49. Subject string
  50. }
  51. type blocklistData struct {
  52. lookupIP map[[net.IPv6len]byte][]BlocklistTag
  53. lookupDomain map[string][]BlocklistTag
  54. internedStrings map[string]string
  55. }
  56. // NewBlocklist creates a new block list.
  57. //
  58. // The input file must be a 3 field comma-delimited and optional quote-escaped
  59. // CSV. Fields: <IPv4 address>,<source>,<subject>.
  60. //
  61. // IP addresses may appear multiple times in the input file; each distinct
  62. // source/subject is associated with the IP address and returned in the Lookup
  63. // tag list.
  64. func NewBlocklist(filename string) (*Blocklist, error) {
  65. blocklist := &Blocklist{}
  66. blocklist.ReloadableFile = common.NewReloadableFile(
  67. filename,
  68. false,
  69. func(_ []byte, _ time.Time) error {
  70. newData, err := loadBlocklistFromFile(filename)
  71. if err != nil {
  72. return errors.Trace(err)
  73. }
  74. blocklist.data.Store(newData)
  75. atomic.StoreInt32(&blocklist.loaded, 1)
  76. return nil
  77. })
  78. _, err := blocklist.Reload()
  79. if err != nil {
  80. return nil, errors.Trace(err)
  81. }
  82. return blocklist, nil
  83. }
  84. // LookupIP returns the blocklist tags for any IP address that is on the
  85. // blocklist, or returns nil for any IP address not on the blocklist. Lookup
  86. // may be called concurrently. The caller must not modify the return value.
  87. func (b *Blocklist) LookupIP(IPAddress net.IP) []BlocklistTag {
  88. // When not configured, no blocklist is loaded/initialized.
  89. if atomic.LoadInt32(&b.loaded) != 1 {
  90. return nil
  91. }
  92. // IPAddress may be an IPv4 or IPv6 address. To16 will return the 16-byte
  93. // representation of an IPv4 address, with the net.v4InV6Prefix prefix.
  94. var key [net.IPv6len]byte
  95. IPAddress16 := IPAddress.To16()
  96. if IPAddress16 == nil {
  97. return nil
  98. }
  99. copy(key[:], IPAddress16)
  100. // As data is an atomic.Value, it's not necessary to call
  101. // ReloadableFile.RLock/ReloadableFile.RUnlock in this case.
  102. tags, ok := b.data.Load().(*blocklistData).lookupIP[key]
  103. if !ok {
  104. return nil
  105. }
  106. return tags
  107. }
  108. // LookupDomain returns the blocklist tags for any domain that is on the
  109. // blocklist, or returns nil for any domain not on the blocklist. Lookup may
  110. // be called concurrently. The caller must not modify the return value.
  111. func (b *Blocklist) LookupDomain(domain string) []BlocklistTag {
  112. if atomic.LoadInt32(&b.loaded) != 1 {
  113. return nil
  114. }
  115. // Domains parsed out of DNS queries will be fully-qualified domain names,
  116. // while list entries do not end in a dot.
  117. if len(domain) > 0 && domain[len(domain)-1] == '.' {
  118. domain = domain[:len(domain)-1]
  119. }
  120. tags, ok := b.data.Load().(*blocklistData).lookupDomain[domain]
  121. if !ok {
  122. return nil
  123. }
  124. return tags
  125. }
  126. func loadBlocklistFromFile(filename string) (*blocklistData, error) {
  127. data := newBlocklistData()
  128. file, err := os.Open(filename)
  129. if err != nil {
  130. return nil, errors.Trace(err)
  131. }
  132. defer file.Close()
  133. reader := csv.NewReader(file)
  134. reader.FieldsPerRecord = 3
  135. reader.Comment = '#'
  136. reader.ReuseRecord = true
  137. for {
  138. record, err := reader.Read()
  139. if err == io.EOF {
  140. break
  141. } else if err != nil {
  142. return nil, errors.Trace(err)
  143. }
  144. // Intern the source and subject strings so we only store one copy of
  145. // each in memory. These values are expected to repeat often.
  146. source := data.internString(record[1])
  147. subject := data.internString(record[2])
  148. tag := BlocklistTag{
  149. Source: source,
  150. Subject: subject,
  151. }
  152. IPAddress := net.ParseIP(record[0])
  153. if IPAddress != nil {
  154. IPAddress16 := IPAddress.To16()
  155. if IPAddress16 == nil {
  156. return nil, errors.Tracef("invalid IP address: %s", record[0])
  157. }
  158. var key [net.IPv6len]byte
  159. copy(key[:], IPAddress16)
  160. tags := data.lookupIP[key]
  161. found := false
  162. for _, existingTag := range tags {
  163. if tag == existingTag {
  164. found = true
  165. break
  166. }
  167. }
  168. if !found {
  169. data.lookupIP[key] = append(tags, tag)
  170. }
  171. } else {
  172. if _, ok := dns.IsDomainName(record[0]); !ok {
  173. return nil, errors.Tracef("invalid domain name: %s", record[0])
  174. }
  175. key := record[0]
  176. tags := data.lookupDomain[key]
  177. found := false
  178. for _, existingTag := range tags {
  179. if tag == existingTag {
  180. found = true
  181. break
  182. }
  183. }
  184. if !found {
  185. data.lookupDomain[key] = append(tags, tag)
  186. }
  187. }
  188. }
  189. return data, nil
  190. }
  191. func newBlocklistData() *blocklistData {
  192. return &blocklistData{
  193. lookupIP: make(map[[net.IPv6len]byte][]BlocklistTag),
  194. lookupDomain: make(map[string][]BlocklistTag),
  195. internedStrings: make(map[string]string),
  196. }
  197. }
  198. func (data *blocklistData) internString(str string) string {
  199. if internedStr, ok := data.internedStrings[str]; ok {
  200. return internedStr
  201. }
  202. data.internedStrings[str] = str
  203. return str
  204. }