geoip.go 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323
  1. /*
  2. * Copyright (c) 2016, Psiphon Inc.
  3. * All rights reserved.
  4. *
  5. * This program is free software: you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation, either version 3 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  17. *
  18. */
  19. package server
  20. import (
  21. "crypto/hmac"
  22. "crypto/sha256"
  23. "fmt"
  24. "io"
  25. "net"
  26. "os"
  27. "path/filepath"
  28. "strconv"
  29. "strings"
  30. "time"
  31. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common"
  32. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/errors"
  33. maxminddb "github.com/oschwald/maxminddb-golang"
  34. cache "github.com/patrickmn/go-cache"
  35. )
  36. const (
  37. GEOIP_SESSION_CACHE_TTL = 60 * time.Minute
  38. GEOIP_UNKNOWN_VALUE = "None"
  39. )
  40. // GeoIPData is GeoIP data for a client session. Individual client
  41. // IP addresses are neither logged nor explicitly referenced during a session.
  42. // The GeoIP country, city, and ISP corresponding to a client IP address are
  43. // resolved and then logged along with usage stats. The DiscoveryValue is
  44. // a special value derived from the client IP that's used to compartmentalize
  45. // discoverable servers (see calculateDiscoveryValue for details).
  46. type GeoIPData struct {
  47. Country string
  48. City string
  49. ISP string
  50. ASN string
  51. ASO string
  52. DiscoveryValue int
  53. }
  54. // NewGeoIPData returns a GeoIPData initialized with the expected
  55. // GEOIP_UNKNOWN_VALUE values to be used when GeoIP lookup fails.
  56. func NewGeoIPData() GeoIPData {
  57. return GeoIPData{
  58. Country: GEOIP_UNKNOWN_VALUE,
  59. City: GEOIP_UNKNOWN_VALUE,
  60. ISP: GEOIP_UNKNOWN_VALUE,
  61. ASN: GEOIP_UNKNOWN_VALUE,
  62. ASO: GEOIP_UNKNOWN_VALUE,
  63. }
  64. }
  65. // SetLogFields adds the GeoIPData fields to LogFields, following Psiphon
  66. // metric field name and format conventions.
  67. func (g GeoIPData) SetLogFields(logFields LogFields) {
  68. g.SetLogFieldsWithPrefix("", logFields)
  69. }
  70. func (g GeoIPData) SetLogFieldsWithPrefix(prefix string, logFields LogFields) {
  71. // In psi_web, the space replacement was done to accommodate space
  72. // delimited logging, which is no longer required; we retain the
  73. // transformation so that stats aggregation isn't impacted.
  74. logFields[prefix+"client_region"] = strings.Replace(g.Country, " ", "_", -1)
  75. logFields[prefix+"client_city"] = strings.Replace(g.City, " ", "_", -1)
  76. logFields[prefix+"client_isp"] = strings.Replace(g.ISP, " ", "_", -1)
  77. logFields[prefix+"client_asn"] = strings.Replace(g.ASN, " ", "_", -1)
  78. logFields[prefix+"client_aso"] = strings.Replace(g.ASO, " ", "_", -1)
  79. }
  80. // GeoIPService implements GeoIP lookup and session/GeoIP caching.
  81. // Lookup is via a MaxMind database; the ReloadDatabase function
  82. // supports hot reloading of MaxMind data while the server is
  83. // running.
  84. type GeoIPService struct {
  85. databases []*geoIPDatabase
  86. sessionCache *cache.Cache
  87. discoveryValueHMACKey string
  88. }
  89. type geoIPDatabase struct {
  90. common.ReloadableFile
  91. filename string
  92. tempFilename string
  93. tempFileSuffix int64
  94. maxMindReader *maxminddb.Reader
  95. }
  96. // NewGeoIPService initializes a new GeoIPService.
  97. func NewGeoIPService(
  98. databaseFilenames []string,
  99. discoveryValueHMACKey string) (*GeoIPService, error) {
  100. geoIP := &GeoIPService{
  101. databases: make([]*geoIPDatabase, len(databaseFilenames)),
  102. sessionCache: cache.New(GEOIP_SESSION_CACHE_TTL, 1*time.Minute),
  103. discoveryValueHMACKey: discoveryValueHMACKey,
  104. }
  105. for i, filename := range databaseFilenames {
  106. database := &geoIPDatabase{
  107. filename: filename,
  108. }
  109. database.ReloadableFile = common.NewReloadableFile(
  110. filename,
  111. false,
  112. func(_ []byte, _ time.Time) error {
  113. // In order to safely mmap the database file, a temporary copy
  114. // is made and that copy is mmapped. The original file may be
  115. // repaved without affecting the mmap; upon hot reload, a new
  116. // temporary copy is made and once it is successful, the old
  117. // mmap is closed and previous temporary file deleted.
  118. //
  119. // On any reload error, database state remains the same.
  120. src, err := os.Open(database.filename)
  121. if err != nil {
  122. return errors.Trace(err)
  123. }
  124. tempFileSuffix := database.tempFileSuffix + 1
  125. tempFilename := fmt.Sprintf(
  126. "%s.%d",
  127. filepath.Join(os.TempDir(), filepath.Base(database.filename)),
  128. tempFileSuffix)
  129. dst, err := os.Create(tempFilename)
  130. if err != nil {
  131. src.Close()
  132. return errors.Trace(err)
  133. }
  134. _, err = io.Copy(dst, src)
  135. src.Close()
  136. dst.Close()
  137. if err != nil {
  138. _ = os.Remove(tempFilename)
  139. return errors.Trace(err)
  140. }
  141. maxMindReader, err := maxminddb.Open(tempFilename)
  142. if err != nil {
  143. _ = os.Remove(tempFilename)
  144. return errors.Trace(err)
  145. }
  146. if database.maxMindReader != nil {
  147. database.maxMindReader.Close()
  148. _ = os.Remove(database.tempFilename)
  149. }
  150. database.maxMindReader = maxMindReader
  151. database.tempFilename = tempFilename
  152. database.tempFileSuffix = tempFileSuffix
  153. return nil
  154. })
  155. _, err := database.Reload()
  156. if err != nil {
  157. return nil, errors.Trace(err)
  158. }
  159. geoIP.databases[i] = database
  160. }
  161. return geoIP, nil
  162. }
  163. // Reloaders gets the list of reloadable databases in use
  164. // by the GeoIPService. This list is used to hot reload
  165. // these databases.
  166. func (geoIP *GeoIPService) Reloaders() []common.Reloader {
  167. reloaders := make([]common.Reloader, len(geoIP.databases))
  168. for i, database := range geoIP.databases {
  169. reloaders[i] = database
  170. }
  171. return reloaders
  172. }
  173. // Lookup determines a GeoIPData for a given client IP address.
  174. func (geoIP *GeoIPService) Lookup(ipAddress string) GeoIPData {
  175. result := NewGeoIPData()
  176. ip := net.ParseIP(ipAddress)
  177. if ip == nil || len(geoIP.databases) == 0 {
  178. return result
  179. }
  180. var geoIPFields struct {
  181. Country struct {
  182. ISOCode string `maxminddb:"iso_code"`
  183. } `maxminddb:"country"`
  184. City struct {
  185. Names map[string]string `maxminddb:"names"`
  186. } `maxminddb:"city"`
  187. ISP string `maxminddb:"isp"`
  188. ASN int `maxminddb:"autonomous_system_number"`
  189. ASO string `maxminddb:"autonomous_system_organization"`
  190. }
  191. geoIPFields.ASN = -1
  192. // Each database will populate geoIPFields with the values it contains. In the
  193. // current MaxMind deployment, the City database populates Country and City and
  194. // the separate ISP database populates ISP.
  195. for _, database := range geoIP.databases {
  196. database.ReloadableFile.RLock()
  197. err := database.maxMindReader.Lookup(ip, &geoIPFields)
  198. database.ReloadableFile.RUnlock()
  199. if err != nil {
  200. log.WithTraceFields(LogFields{"error": err}).Warning("GeoIP lookup failed")
  201. }
  202. }
  203. if geoIPFields.Country.ISOCode != "" {
  204. result.Country = geoIPFields.Country.ISOCode
  205. }
  206. name, ok := geoIPFields.City.Names["en"]
  207. if ok && name != "" {
  208. result.City = name
  209. }
  210. if geoIPFields.ISP != "" {
  211. result.ISP = geoIPFields.ISP
  212. }
  213. if geoIPFields.ASN != -1 {
  214. result.ASN = strconv.Itoa(geoIPFields.ASN)
  215. }
  216. if geoIPFields.ASO != "" {
  217. result.ASO = geoIPFields.ASO
  218. }
  219. result.DiscoveryValue = calculateDiscoveryValue(
  220. geoIP.discoveryValueHMACKey, ipAddress)
  221. return result
  222. }
  223. // SetSessionCache adds the sessionID/geoIPData pair to the
  224. // session cache. This value will not expire; the caller must
  225. // call MarkSessionCacheToExpire to initiate expiry.
  226. // Calling SetSessionCache for an existing sessionID will
  227. // replace the previous value and reset any expiry.
  228. func (geoIP *GeoIPService) SetSessionCache(sessionID string, geoIPData GeoIPData) {
  229. geoIP.sessionCache.Set(sessionID, geoIPData, cache.NoExpiration)
  230. }
  231. // MarkSessionCacheToExpire initiates expiry for an existing
  232. // session cache entry, if the session ID is found in the cache.
  233. // Concurrency note: SetSessionCache and MarkSessionCacheToExpire
  234. // should not be called concurrently for a single session ID.
  235. func (geoIP *GeoIPService) MarkSessionCacheToExpire(sessionID string) {
  236. geoIPData, found := geoIP.sessionCache.Get(sessionID)
  237. // Note: potential race condition between Get and Set. In practice,
  238. // the tunnel server won't clobber a SetSessionCache value by calling
  239. // MarkSessionCacheToExpire concurrently.
  240. if found {
  241. geoIP.sessionCache.Set(sessionID, geoIPData, cache.DefaultExpiration)
  242. }
  243. }
  244. // GetSessionCache returns the cached GeoIPData for the
  245. // specified session ID; a blank GeoIPData is returned
  246. // if the session ID is not found in the cache.
  247. func (geoIP *GeoIPService) GetSessionCache(sessionID string) GeoIPData {
  248. geoIPData, found := geoIP.sessionCache.Get(sessionID)
  249. if !found {
  250. return NewGeoIPData()
  251. }
  252. return geoIPData.(GeoIPData)
  253. }
  254. // InSessionCache returns whether the session ID is present
  255. // in the session cache.
  256. func (geoIP *GeoIPService) InSessionCache(sessionID string) bool {
  257. _, found := geoIP.sessionCache.Get(sessionID)
  258. return found
  259. }
  260. // calculateDiscoveryValue derives a value from the client IP address to be
  261. // used as input in the server discovery algorithm. Since we do not explicitly
  262. // store the client IP address, we must derive the value here and store it for
  263. // later use by the discovery algorithm.
  264. // See https://bitbucket.org/psiphon/psiphon-circumvention-system/src/tip/Automation/psi_ops_discovery.py
  265. // for full details.
  266. func calculateDiscoveryValue(discoveryValueHMACKey, ipAddress string) int {
  267. // From: psi_ops_discovery.calculate_ip_address_strategy_value:
  268. // # Mix bits from all octets of the client IP address to determine the
  269. // # bucket. An HMAC is used to prevent pre-calculation of buckets for IPs.
  270. // return ord(hmac.new(HMAC_KEY, ip_address, hashlib.sha256).digest()[0])
  271. // TODO: use 3-octet algorithm?
  272. hash := hmac.New(sha256.New, []byte(discoveryValueHMACKey))
  273. hash.Write([]byte(ipAddress))
  274. return int(hash.Sum(nil)[0])
  275. }