geoip.go 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286
  1. /*
  2. * Copyright (c) 2016, Psiphon Inc.
  3. * All rights reserved.
  4. *
  5. * This program is free software: you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation, either version 3 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  17. *
  18. */
  19. package server
  20. import (
  21. "crypto/hmac"
  22. "crypto/sha256"
  23. "fmt"
  24. "io"
  25. "net"
  26. "os"
  27. "path/filepath"
  28. "time"
  29. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common"
  30. maxminddb "github.com/oschwald/maxminddb-golang"
  31. cache "github.com/patrickmn/go-cache"
  32. )
  33. const (
  34. GEOIP_SESSION_CACHE_TTL = 60 * time.Minute
  35. GEOIP_UNKNOWN_VALUE = "None"
  36. )
  37. // GeoIPData is GeoIP data for a client session. Individual client
  38. // IP addresses are neither logged nor explicitly referenced during a session.
  39. // The GeoIP country, city, and ISP corresponding to a client IP address are
  40. // resolved and then logged along with usage stats. The DiscoveryValue is
  41. // a special value derived from the client IP that's used to compartmentalize
  42. // discoverable servers (see calculateDiscoveryValue for details).
  43. type GeoIPData struct {
  44. Country string
  45. City string
  46. ISP string
  47. DiscoveryValue int
  48. }
  49. // NewGeoIPData returns a GeoIPData initialized with the expected
  50. // GEOIP_UNKNOWN_VALUE values to be used when GeoIP lookup fails.
  51. func NewGeoIPData() GeoIPData {
  52. return GeoIPData{
  53. Country: GEOIP_UNKNOWN_VALUE,
  54. City: GEOIP_UNKNOWN_VALUE,
  55. ISP: GEOIP_UNKNOWN_VALUE,
  56. }
  57. }
  58. // GeoIPService implements GeoIP lookup and session/GeoIP caching.
  59. // Lookup is via a MaxMind database; the ReloadDatabase function
  60. // supports hot reloading of MaxMind data while the server is
  61. // running.
  62. type GeoIPService struct {
  63. databases []*geoIPDatabase
  64. sessionCache *cache.Cache
  65. discoveryValueHMACKey string
  66. }
  67. type geoIPDatabase struct {
  68. common.ReloadableFile
  69. filename string
  70. tempFilename string
  71. tempFileSuffix int64
  72. maxMindReader *maxminddb.Reader
  73. }
  74. // NewGeoIPService initializes a new GeoIPService.
  75. func NewGeoIPService(
  76. databaseFilenames []string,
  77. discoveryValueHMACKey string) (*GeoIPService, error) {
  78. geoIP := &GeoIPService{
  79. databases: make([]*geoIPDatabase, len(databaseFilenames)),
  80. sessionCache: cache.New(GEOIP_SESSION_CACHE_TTL, 1*time.Minute),
  81. discoveryValueHMACKey: discoveryValueHMACKey,
  82. }
  83. for i, filename := range databaseFilenames {
  84. database := &geoIPDatabase{
  85. filename: filename,
  86. }
  87. database.ReloadableFile = common.NewReloadableFile(
  88. filename,
  89. false,
  90. func(_ []byte, _ time.Time) error {
  91. // In order to safely mmap the database file, a temporary copy
  92. // is made and that copy is mmapped. The original file may be
  93. // repaved without affecting the mmap; upon hot reload, a new
  94. // temporary copy is made and once it is successful, the old
  95. // mmap is closed and previous temporary file deleted.
  96. //
  97. // On any reload error, database state remains the same.
  98. src, err := os.Open(database.filename)
  99. if err != nil {
  100. return common.ContextError(err)
  101. }
  102. tempFileSuffix := database.tempFileSuffix + 1
  103. tempFilename := fmt.Sprintf(
  104. "%s.%d",
  105. filepath.Join(os.TempDir(), filepath.Base(database.filename)),
  106. tempFileSuffix)
  107. dst, err := os.Create(tempFilename)
  108. if err != nil {
  109. src.Close()
  110. return common.ContextError(err)
  111. }
  112. _, err = io.Copy(dst, src)
  113. src.Close()
  114. dst.Close()
  115. if err != nil {
  116. _ = os.Remove(tempFilename)
  117. return common.ContextError(err)
  118. }
  119. maxMindReader, err := maxminddb.Open(tempFilename)
  120. if err != nil {
  121. _ = os.Remove(tempFilename)
  122. return common.ContextError(err)
  123. }
  124. if database.maxMindReader != nil {
  125. database.maxMindReader.Close()
  126. _ = os.Remove(database.tempFilename)
  127. }
  128. database.maxMindReader = maxMindReader
  129. database.tempFilename = tempFilename
  130. database.tempFileSuffix = tempFileSuffix
  131. return nil
  132. })
  133. _, err := database.Reload()
  134. if err != nil {
  135. return nil, common.ContextError(err)
  136. }
  137. geoIP.databases[i] = database
  138. }
  139. return geoIP, nil
  140. }
  141. // Reloaders gets the list of reloadable databases in use
  142. // by the GeoIPService. This list is used to hot reload
  143. // these databases.
  144. func (geoIP *GeoIPService) Reloaders() []common.Reloader {
  145. reloaders := make([]common.Reloader, len(geoIP.databases))
  146. for i, database := range geoIP.databases {
  147. reloaders[i] = database
  148. }
  149. return reloaders
  150. }
  151. // Lookup determines a GeoIPData for a given client IP address.
  152. func (geoIP *GeoIPService) Lookup(ipAddress string) GeoIPData {
  153. result := NewGeoIPData()
  154. ip := net.ParseIP(ipAddress)
  155. if ip == nil || len(geoIP.databases) == 0 {
  156. return result
  157. }
  158. var geoIPFields struct {
  159. Country struct {
  160. ISOCode string `maxminddb:"iso_code"`
  161. } `maxminddb:"country"`
  162. City struct {
  163. Names map[string]string `maxminddb:"names"`
  164. } `maxminddb:"city"`
  165. ISP string `maxminddb:"isp"`
  166. }
  167. // Each database will populate geoIPFields with the values it contains. In the
  168. // current MaxMind deployment, the City database populates Country and City and
  169. // the separate ISP database populates ISP.
  170. for _, database := range geoIP.databases {
  171. database.ReloadableFile.RLock()
  172. err := database.maxMindReader.Lookup(ip, &geoIPFields)
  173. database.ReloadableFile.RUnlock()
  174. if err != nil {
  175. log.WithContextFields(LogFields{"error": err}).Warning("GeoIP lookup failed")
  176. }
  177. }
  178. if geoIPFields.Country.ISOCode != "" {
  179. result.Country = geoIPFields.Country.ISOCode
  180. }
  181. name, ok := geoIPFields.City.Names["en"]
  182. if ok && name != "" {
  183. result.City = name
  184. }
  185. if geoIPFields.ISP != "" {
  186. result.ISP = geoIPFields.ISP
  187. }
  188. result.DiscoveryValue = calculateDiscoveryValue(
  189. geoIP.discoveryValueHMACKey, ipAddress)
  190. return result
  191. }
  192. // SetSessionCache adds the sessionID/geoIPData pair to the
  193. // session cache. This value will not expire; the caller must
  194. // call MarkSessionCacheToExpire to initiate expiry.
  195. // Calling SetSessionCache for an existing sessionID will
  196. // replace the previous value and reset any expiry.
  197. func (geoIP *GeoIPService) SetSessionCache(sessionID string, geoIPData GeoIPData) {
  198. geoIP.sessionCache.Set(sessionID, geoIPData, cache.NoExpiration)
  199. }
  200. // MarkSessionCacheToExpire initiates expiry for an existing
  201. // session cache entry, if the session ID is found in the cache.
  202. // Concurrency note: SetSessionCache and MarkSessionCacheToExpire
  203. // should not be called concurrently for a single session ID.
  204. func (geoIP *GeoIPService) MarkSessionCacheToExpire(sessionID string) {
  205. geoIPData, found := geoIP.sessionCache.Get(sessionID)
  206. // Note: potential race condition between Get and Set. In practice,
  207. // the tunnel server won't clobber a SetSessionCache value by calling
  208. // MarkSessionCacheToExpire concurrently.
  209. if found {
  210. geoIP.sessionCache.Set(sessionID, geoIPData, cache.DefaultExpiration)
  211. }
  212. }
  213. // GetSessionCache returns the cached GeoIPData for the
  214. // specified session ID; a blank GeoIPData is returned
  215. // if the session ID is not found in the cache.
  216. func (geoIP *GeoIPService) GetSessionCache(sessionID string) GeoIPData {
  217. geoIPData, found := geoIP.sessionCache.Get(sessionID)
  218. if !found {
  219. return NewGeoIPData()
  220. }
  221. return geoIPData.(GeoIPData)
  222. }
  223. // InSessionCache returns whether the session ID is present
  224. // in the session cache.
  225. func (geoIP *GeoIPService) InSessionCache(sessionID string) bool {
  226. _, found := geoIP.sessionCache.Get(sessionID)
  227. return found
  228. }
  229. // calculateDiscoveryValue derives a value from the client IP address to be
  230. // used as input in the server discovery algorithm. Since we do not explicitly
  231. // store the client IP address, we must derive the value here and store it for
  232. // later use by the discovery algorithm.
  233. // See https://bitbucket.org/psiphon/psiphon-circumvention-system/src/tip/Automation/psi_ops_discovery.py
  234. // for full details.
  235. func calculateDiscoveryValue(discoveryValueHMACKey, ipAddress string) int {
  236. // From: psi_ops_discovery.calculate_ip_address_strategy_value:
  237. // # Mix bits from all octets of the client IP address to determine the
  238. // # bucket. An HMAC is used to prevent pre-calculation of buckets for IPs.
  239. // return ord(hmac.new(HMAC_KEY, ip_address, hashlib.sha256).digest()[0])
  240. // TODO: use 3-octet algorithm?
  241. hash := hmac.New(sha256.New, []byte(discoveryValueHMACKey))
  242. hash.Write([]byte(ipAddress))
  243. return int(hash.Sum(nil)[0])
  244. }