Просмотр исходного кода

Merge pull request #337 from rod-hynes/master

Update discovery server partitioning
Rod Hynes 9 лет назад
Родитель
Сommit
ddd113ce92
3 измененных файлов с 161 добавлено и 9 удалено
  1. 2 0
      .travis.yml
  2. 24 9
      psiphon/server/psinet/psinet.go
  3. 135 0
      psiphon/server/psinet/psinet_test.go

+ 2 - 0
.travis.yml

@@ -14,12 +14,14 @@ script:
 - go test -race -v ./common/protocol
 - go test -race -v ./transferstats
 - go test -race -v ./server
+- go test -race -v ./server/psinet
 - go test -race -v
 - go test -v -covermode=count -coverprofile=common.coverprofile ./common
 - go test -v -covermode=count -coverprofile=osl.coverprofile ./common/osl
 - go test -v -covermode=count -coverprofile=protocol.coverprofile ./common/protocol
 - go test -v -covermode=count -coverprofile=transferstats.coverprofile ./transferstats
 - go test -v -covermode=count -coverprofile=server.coverprofile ./server
+- go test -v -covermode=count -coverprofile=psinet.coverprofile ./server/psinet
 - go test -v -covermode=count -coverprofile=psiphon.coverprofile
 - $HOME/gopath/bin/gover
 - $HOME/gopath/bin/goveralls -coverprofile=gover.coverprofile -service=travis-ci -repotoken $COVERALLS_TOKEN

+ 24 - 9
psiphon/server/psinet/psinet.go

@@ -274,7 +274,8 @@ func (db *Database) GetHttpsRequestRegexes(sponsorID string) []map[string]string
 }
 
 // DiscoverServers selects new encoded server entries to be "discovered" by
-// the client, using the discoveryValue as the input into the discovery algorithm.
+// the client, using the discoveryValue -- a function of the client's IP
+// address -- as the input into the discovery algorithm.
 // The server list (db.Servers) loaded from JSON is stored as an array instead of
 // a map to ensure servers are discovered deterministically. Each iteration over a
 // map in go is seeded with a random value which causes non-deterministic ordering.
@@ -307,7 +308,9 @@ func (db *Database) DiscoverServers(discoveryValue int) []string {
 			}
 		}
 	}
-	servers = selectServers(candidateServers, discoveryValue)
+
+	timeInSeconds := int(discoveryDate.Unix())
+	servers = selectServers(candidateServers, timeInSeconds, discoveryValue)
 
 	encodedServerEntries := make([]string, 0)
 
@@ -333,15 +336,14 @@ func (db *Database) DiscoverServers(discoveryValue int) []string {
 // both aspects determine which server is selected. IP address is given the
 // priority: if there are only a couple of servers, for example, IP address alone
 // determines the outcome.
-func selectServers(servers []Server, discoveryValue int) []Server {
+func selectServers(servers []Server, timeInSeconds, discoveryValue int) []Server {
 	TIME_GRANULARITY := 3600
 
 	if len(servers) == 0 {
 		return nil
 	}
 
-	// Current time truncated to an hour
-	timeInSeconds := int(time.Now().Unix())
+	// Time truncated to an hour
 	timeStrategyValue := timeInSeconds / TIME_GRANULARITY
 
 	// Divide servers into buckets. The bucket count is chosen such that the number
@@ -350,6 +352,7 @@ func selectServers(servers []Server, discoveryValue int) []Server {
 
 	// NOTE: this code assumes that the range of possible timeStrategyValues
 	// and discoveryValues are sufficient to index to all bucket items.
+
 	bucketCount := calculateBucketCount(len(servers))
 
 	buckets := bucketizeServerList(servers, bucketCount)
@@ -378,14 +381,26 @@ func calculateBucketCount(length int) int {
 	return int(math.Ceil(math.Sqrt(float64(length))))
 }
 
-// Create bucketCount nearly equal sized buckets.
+// bucketizeServerList creates nearly equal sized slices of the input list.
 func bucketizeServerList(servers []Server, bucketCount int) [][]Server {
 
+	// This code creates the same partitions as legacy servers:
+	// https://bitbucket.org/psiphon/psiphon-circumvention-system/src/03bc1a7e51e7c85a816e370bb3a6c755fd9c6fee/Automation/psi_ops_discovery.py
+	//
+	// Both use the same algorithm from:
+	// http://stackoverflow.com/questions/2659900/python-slicing-a-list-into-n-nearly-equal-length-partitions
+
+	// TODO: this partition is constant for fixed Database content, so it could
+	// be done once and cached in the Database ReloadableFile reloadAction.
+
 	buckets := make([][]Server, bucketCount)
 
-	for index, server := range servers {
-		bucketIndex := index % bucketCount
-		buckets[bucketIndex] = append(buckets[bucketIndex], server)
+	division := float64(len(servers)) / float64(bucketCount)
+
+	for i := 0; i < bucketCount; i++ {
+		start := int((division * float64(i)) + 0.5)
+		end := int((division * (float64(i) + 1)) + 0.5)
+		buckets[i] = servers[start:end]
 	}
 
 	return buckets

+ 135 - 0
psiphon/server/psinet/psinet_test.go

@@ -0,0 +1,135 @@
+/*
+ * Copyright (c) 2017, Psiphon Inc.
+ * All rights reserved.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+package psinet
+
+import (
+	"fmt"
+	"testing"
+	"time"
+)
+
+func TestDiscoveryBuckets(t *testing.T) {
+
+	checkBuckets := func(buckets [][]Server, expectedIDs [][]int) {
+		if len(buckets) != len(expectedIDs) {
+			t.Errorf(
+				"unexpected bucket count: got %d expected %d",
+				len(buckets), len(expectedIDs))
+			return
+		}
+		for i := 0; i < len(buckets); i++ {
+			if len(buckets[i]) != len(expectedIDs[i]) {
+				t.Errorf(
+					"unexpected bucket %d size: got %d expected %d",
+					i, len(buckets[i]), len(expectedIDs[i]))
+				return
+			}
+			for j := 0; j < len(buckets[i]); j++ {
+				expectedID := fmt.Sprintf("%d", expectedIDs[i][j])
+				if buckets[i][j].Id != expectedID {
+					t.Errorf(
+						"unexpected bucket %d item %d: got %s expected %s",
+						i, j, buckets[i][j].Id, expectedID)
+					return
+				}
+			}
+		}
+	}
+
+	// Partition test cases from:
+	// http://stackoverflow.com/questions/2659900/python-slicing-a-list-into-n-nearly-equal-length-partitions
+
+	servers := make([]Server, 0)
+	for i := 0; i < 105; i++ {
+		servers = append(servers, Server{Id: fmt.Sprintf("%d", i)})
+	}
+
+	t.Run("5 servers, 5 buckets", func(t *testing.T) {
+		checkBuckets(
+			bucketizeServerList(servers[0:5], 5),
+			[][]int{{0}, {1}, {2}, {3}, {4}})
+	})
+
+	t.Run("5 servers, 2 buckets", func(t *testing.T) {
+		checkBuckets(
+			bucketizeServerList(servers[0:5], 2),
+			[][]int{{0, 1, 2}, {3, 4}})
+	})
+
+	t.Run("5 servers, 3 buckets", func(t *testing.T) {
+		checkBuckets(
+			bucketizeServerList(servers[0:5], 3),
+			[][]int{{0, 1}, {2}, {3, 4}})
+	})
+
+	t.Run("105 servers, 10 buckets", func(t *testing.T) {
+		checkBuckets(
+			bucketizeServerList(servers, 10),
+			[][]int{
+				{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
+				{11, 12, 13, 14, 15, 16, 17, 18, 19, 20},
+				{21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31},
+				{32, 33, 34, 35, 36, 37, 38, 39, 40, 41},
+				{42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52},
+				{53, 54, 55, 56, 57, 58, 59, 60, 61, 62},
+				{63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73},
+				{74, 75, 76, 77, 78, 79, 80, 81, 82, 83},
+				{84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94},
+				{95, 96, 97, 98, 99, 100, 101, 102, 103, 104},
+			})
+	})
+
+	t.Run("repeatedly discover with fixed IP address", func(t *testing.T) {
+
+		// For a IP address values, only one bucket should be used; with enough
+		// iterations, all and only the items in a single bucket should be discovered.
+
+		discoveredServers := make(map[string]bool)
+
+		// discoveryValue is derived from the client's IP address and indexes the bucket;
+		// a value of 0 always maps to the first bucket.
+		discoveryValue := 0
+
+		for i := 0; i < 1000; i++ {
+			for _, server := range selectServers(servers, i*int(time.Hour/time.Second), discoveryValue) {
+				discoveredServers[server.Id] = true
+			}
+		}
+
+		bucketCount := calculateBucketCount(len(servers))
+
+		buckets := bucketizeServerList(servers, bucketCount)
+
+		if len(buckets[0]) != len(discoveredServers) {
+			t.Errorf(
+				"unexpected discovered server count: got %d expected %d",
+				len(discoveredServers), len(buckets[0]))
+			return
+		}
+
+		for _, bucketServer := range buckets[0] {
+			if _, ok := discoveredServers[bucketServer.Id]; !ok {
+				t.Errorf("unexpected missing discovery server: %s", bucketServer.Id)
+				return
+			}
+		}
+	})
+
+}