/* * Copyright (c) 2019, Psiphon Inc. * All rights reserved. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . * */ package server import ( "encoding/csv" "io" "net" "os" "sync/atomic" "time" "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common" "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/errors" "github.com/miekg/dns" ) // Blocklist provides a fast lookup of IP addresses and domains that are // candidates for egress blocking. This is intended to be used to block // malware and other malicious traffic. // // The Reload function supports hot reloading of rules data while the server // is running. // // Limitations: the blocklist is implemented with in-memory Go maps, which // limits the practical size of the blocklist. type Blocklist struct { common.ReloadableFile loaded int32 data atomic.Value } // BlocklistTag indicates the source containing an IP address and the subject, // or name of the suspected malicious traffic. type BlocklistTag struct { Source string Subject string } type blocklistData struct { lookupIP map[[net.IPv6len]byte][]BlocklistTag lookupDomain map[string][]BlocklistTag internedStrings map[string]string } // NewBlocklist creates a new block list. // // The input file must be a 3 field comma-delimited and optional quote-escaped // CSV. Fields: ,,. // // IP addresses may appear multiple times in the input file; each distinct // source/subject is associated with the IP address and returned in the Lookup // tag list. func NewBlocklist(filename string) (*Blocklist, error) { blocklist := &Blocklist{} blocklist.ReloadableFile = common.NewReloadableFile( filename, false, func(_ []byte, _ time.Time) error { newData, err := loadBlocklistFromFile(filename) if err != nil { return errors.Trace(err) } blocklist.data.Store(newData) atomic.StoreInt32(&blocklist.loaded, 1) return nil }) _, err := blocklist.Reload() if err != nil { return nil, errors.Trace(err) } return blocklist, nil } // LookupIP returns the blocklist tags for any IP address that is on the // blocklist, or returns nil for any IP address not on the blocklist. Lookup // may be called concurrently. The caller must not modify the return value. func (b *Blocklist) LookupIP(IPAddress net.IP) []BlocklistTag { // When not configured, no blocklist is loaded/initialized. if atomic.LoadInt32(&b.loaded) != 1 { return nil } // IPAddress may be an IPv4 or IPv6 address. To16 will return the 16-byte // representation of an IPv4 address, with the net.v4InV6Prefix prefix. var key [net.IPv6len]byte IPAddress16 := IPAddress.To16() if IPAddress16 == nil { return nil } copy(key[:], IPAddress16) // As data is an atomic.Value, it's not necessary to call // ReloadableFile.RLock/ReloadableFile.RUnlock in this case. tags, ok := b.data.Load().(*blocklistData).lookupIP[key] if !ok { return nil } return tags } // LookupDomain returns the blocklist tags for any domain that is on the // blocklist, or returns nil for any domain not on the blocklist. Lookup may // be called concurrently. The caller must not modify the return value. func (b *Blocklist) LookupDomain(domain string) []BlocklistTag { if atomic.LoadInt32(&b.loaded) != 1 { return nil } // Domains parsed out of DNS queries will be fully-qualified domain names, // while list entries do not end in a dot. if len(domain) > 0 && domain[len(domain)-1] == '.' { domain = domain[:len(domain)-1] } tags, ok := b.data.Load().(*blocklistData).lookupDomain[domain] if !ok { return nil } return tags } func loadBlocklistFromFile(filename string) (*blocklistData, error) { data := newBlocklistData() file, err := os.Open(filename) if err != nil { return nil, errors.Trace(err) } defer file.Close() reader := csv.NewReader(file) reader.FieldsPerRecord = 3 reader.Comment = '#' reader.ReuseRecord = true for { record, err := reader.Read() if err == io.EOF { break } else if err != nil { return nil, errors.Trace(err) } // Intern the source and subject strings so we only store one copy of // each in memory. These values are expected to repeat often. source := data.internString(record[1]) subject := data.internString(record[2]) tag := BlocklistTag{ Source: source, Subject: subject, } IPAddress := net.ParseIP(record[0]) if IPAddress != nil { IPAddress16 := IPAddress.To16() if IPAddress16 == nil { return nil, errors.Tracef("invalid IP address: %s", record[0]) } var key [net.IPv6len]byte copy(key[:], IPAddress16) tags := data.lookupIP[key] found := false for _, existingTag := range tags { if tag == existingTag { found = true break } } if !found { data.lookupIP[key] = append(tags, tag) } } else { if _, ok := dns.IsDomainName(record[0]); !ok { return nil, errors.Tracef("invalid domain name: %s", record[0]) } key := record[0] tags := data.lookupDomain[key] found := false for _, existingTag := range tags { if tag == existingTag { found = true break } } if !found { data.lookupDomain[key] = append(tags, tag) } } } return data, nil } func newBlocklistData() *blocklistData { return &blocklistData{ lookupIP: make(map[[net.IPv6len]byte][]BlocklistTag), lookupDomain: make(map[string][]BlocklistTag), internedStrings: make(map[string]string), } } func (data *blocklistData) internString(str string) string { if internedStr, ok := data.internedStrings[str]; ok { return internedStr } data.internedStrings[str] = str return str }