dataStore_bolt.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522
  1. //go:build !PSIPHON_USE_BADGER_DB && !PSIPHON_USE_FILES_DB
  2. // +build !PSIPHON_USE_BADGER_DB,!PSIPHON_USE_FILES_DB
  3. /*
  4. * Copyright (c) 2018, Psiphon Inc.
  5. * All rights reserved.
  6. *
  7. * This program is free software: you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation, either version 3 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  19. *
  20. */
  21. package psiphon
  22. import (
  23. std_errors "errors"
  24. "fmt"
  25. "os"
  26. "path/filepath"
  27. "runtime/debug"
  28. "sync/atomic"
  29. "time"
  30. "github.com/Psiphon-Labs/bolt"
  31. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common"
  32. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/errors"
  33. )
  34. const (
  35. OPEN_DB_RETRIES = 2
  36. )
  37. type datastoreDB struct {
  38. boltDB *bolt.DB
  39. filename string
  40. isFailed int32
  41. }
  42. type datastoreTx struct {
  43. db *datastoreDB
  44. boltTx *bolt.Tx
  45. }
  46. type datastoreBucket struct {
  47. db *datastoreDB
  48. boltBucket *bolt.Bucket
  49. }
  50. type datastoreCursor struct {
  51. db *datastoreDB
  52. boltCursor *bolt.Cursor
  53. }
  54. func datastoreOpenDB(
  55. rootDataDirectory string, retryAndReset bool) (*datastoreDB, error) {
  56. var db *datastoreDB
  57. var err error
  58. attempts := 1
  59. if retryAndReset {
  60. attempts += OPEN_DB_RETRIES
  61. }
  62. reset := false
  63. for attempt := 0; attempt < attempts; attempt++ {
  64. db, err = tryDatastoreOpenDB(rootDataDirectory, reset)
  65. if err == nil {
  66. break
  67. }
  68. NoticeWarning("tryDatastoreOpenDB failed: %s", err)
  69. // The datastore file may be corrupt, so, in subsequent iterations,
  70. // set the "reset" flag and attempt to delete the file and try again.
  71. //
  72. // Don't reset the datastore when open failed due to timeout obtaining
  73. // the file lock, as the datastore is simply locked by another
  74. // process and not corrupt. As the file lock is advisory, deleting
  75. // the file would succeed despite the lock. In this case, still retry
  76. // in case the the lock is released.
  77. reset = !std_errors.Is(err, bolt.ErrTimeout)
  78. }
  79. return db, err
  80. }
  81. func tryDatastoreOpenDB(
  82. rootDataDirectory string, reset bool) (retdb *datastoreDB, reterr error) {
  83. // Testing indicates that the bolt Check function can raise SIGSEGV due to
  84. // invalid mmap buffer accesses in cases such as opening a valid but
  85. // truncated datastore file.
  86. //
  87. // To handle this, we temporarily set SetPanicOnFault in order to treat the
  88. // fault as a panic, recover any panic, and return an error which will result
  89. // in a retry with reset.
  90. // Begin recovery preamble
  91. panicOnFault := debug.SetPanicOnFault(true)
  92. defer debug.SetPanicOnFault(panicOnFault)
  93. defer func() {
  94. if r := recover(); r != nil {
  95. retdb = nil
  96. reterr = errors.Tracef("panic: %v", r)
  97. }
  98. }()
  99. // End recovery preamble
  100. filename := filepath.Join(rootDataDirectory, "psiphon.boltdb")
  101. if reset {
  102. NoticeWarning("tryDatastoreOpenDB: reset")
  103. os.Remove(filename)
  104. }
  105. // A typical Psiphon datastore will not have a large, fragmented freelist.
  106. // For this reason, we're not setting FreelistType to FreelistMapType or
  107. // enabling NoFreelistSync. The latter option has a trade-off of slower
  108. // start up time.
  109. //
  110. // Monitor freelist stats in DataStoreMetrics in diagnostics and consider
  111. // setting these options if necessary.
  112. newDB, err := bolt.Open(filename, 0600, &bolt.Options{Timeout: 1 * time.Second})
  113. if err != nil {
  114. return nil, errors.Trace(err)
  115. }
  116. // Run consistency checks on datastore and emit errors for diagnostics
  117. // purposes. We assume this will complete quickly for typical size Psiphon
  118. // datastores and wait for the check to complete before proceeding.
  119. err = newDB.View(func(tx *bolt.Tx) error {
  120. return tx.SynchronousCheck()
  121. })
  122. if err != nil {
  123. return nil, errors.Trace(err)
  124. }
  125. err = newDB.Update(func(tx *bolt.Tx) error {
  126. requiredBuckets := [][]byte{
  127. datastoreServerEntriesBucket,
  128. datastoreServerEntryTagsBucket,
  129. datastoreServerEntryTombstoneTagsBucket,
  130. datastoreUrlETagsBucket,
  131. datastoreKeyValueBucket,
  132. datastoreRemoteServerListStatsBucket,
  133. datastoreFailedTunnelStatsBucket,
  134. datastoreSLOKsBucket,
  135. datastoreTacticsBucket,
  136. datastoreSpeedTestSamplesBucket,
  137. datastoreDialParametersBucket,
  138. }
  139. for _, bucket := range requiredBuckets {
  140. _, err := tx.CreateBucketIfNotExists(bucket)
  141. if err != nil {
  142. return err
  143. }
  144. }
  145. return nil
  146. })
  147. if err != nil {
  148. return nil, errors.Trace(err)
  149. }
  150. // Cleanup obsolete buckets
  151. err = newDB.Update(func(tx *bolt.Tx) error {
  152. obsoleteBuckets := [][]byte{
  153. []byte("tunnelStats"),
  154. []byte("rankedServerEntries"),
  155. []byte("splitTunnelRouteETags"),
  156. []byte("splitTunnelRouteData"),
  157. }
  158. for _, obsoleteBucket := range obsoleteBuckets {
  159. if tx.Bucket(obsoleteBucket) != nil {
  160. err := tx.DeleteBucket(obsoleteBucket)
  161. if err != nil {
  162. NoticeWarning("DeleteBucket %s error: %s", obsoleteBucket, err)
  163. // Continue, since this is not fatal
  164. }
  165. }
  166. }
  167. return nil
  168. })
  169. if err != nil {
  170. return nil, errors.Trace(err)
  171. }
  172. return &datastoreDB{
  173. boltDB: newDB,
  174. filename: filename,
  175. }, nil
  176. }
  177. var errDatastoreFailed = std_errors.New("datastore has failed")
  178. func (db *datastoreDB) isDatastoreFailed() bool {
  179. return atomic.LoadInt32(&db.isFailed) == 1
  180. }
  181. func (db *datastoreDB) setDatastoreFailed(r interface{}) {
  182. atomic.StoreInt32(&db.isFailed, 1)
  183. NoticeWarning("Datastore failed: %s", errors.Tracef("panic: %v", r))
  184. }
  185. func (db *datastoreDB) close() error {
  186. // Limitation: there is no panic recover in this case. We assume boltDB.Close
  187. // does not make mmap accesses and prefer to not continue with the datastore
  188. // file in a locked or open state. We also assume that any locks aquired by
  189. // boltDB.Close, held by transactions, will be released even if the
  190. // transaction panics and the database is in the failed state.
  191. return db.boltDB.Close()
  192. }
  193. func (db *datastoreDB) getDataStoreMetrics() string {
  194. fileSize := int64(0)
  195. fileInfo, err := os.Stat(db.filename)
  196. if err == nil {
  197. fileSize = fileInfo.Size()
  198. }
  199. stats := db.boltDB.Stats()
  200. return fmt.Sprintf("filesize %s | freepages %d | freealloc %s | txcount %d | writes %d | writetime %s",
  201. common.FormatByteCount(uint64(fileSize)),
  202. stats.FreePageN,
  203. common.FormatByteCount(uint64(stats.FreeAlloc)),
  204. stats.TxN,
  205. stats.TxStats.Write,
  206. stats.TxStats.WriteTime)
  207. }
  208. func (db *datastoreDB) view(fn func(tx *datastoreTx) error) (reterr error) {
  209. // Any bolt function that performs mmap buffer accesses can raise SIGBUS due
  210. // to underlying storage changes, such as a truncation of the datastore file
  211. // or removal or network attached storage, etc.
  212. //
  213. // To handle this, we temporarily set SetPanicOnFault in order to treat the
  214. // fault as a panic, recover any panic to avoid crashing the process, and
  215. // putting this datastoreDB instance into a failed state. All subsequent
  216. // calls to this datastoreDBinstance or its related datastoreTx and
  217. // datastoreBucket instances will fail.
  218. // Begin recovery preamble
  219. if db.isDatastoreFailed() {
  220. return errDatastoreFailed
  221. }
  222. panicOnFault := debug.SetPanicOnFault(true)
  223. defer debug.SetPanicOnFault(panicOnFault)
  224. defer func() {
  225. if r := recover(); r != nil {
  226. db.setDatastoreFailed(r)
  227. reterr = errDatastoreFailed
  228. }
  229. }()
  230. // End recovery preamble
  231. return db.boltDB.View(
  232. func(tx *bolt.Tx) error {
  233. err := fn(&datastoreTx{db: db, boltTx: tx})
  234. if err != nil {
  235. return errors.Trace(err)
  236. }
  237. return nil
  238. })
  239. }
  240. func (db *datastoreDB) update(fn func(tx *datastoreTx) error) (reterr error) {
  241. // Begin recovery preamble
  242. if db.isDatastoreFailed() {
  243. return errDatastoreFailed
  244. }
  245. panicOnFault := debug.SetPanicOnFault(true)
  246. defer debug.SetPanicOnFault(panicOnFault)
  247. defer func() {
  248. if r := recover(); r != nil {
  249. db.setDatastoreFailed(r)
  250. reterr = errDatastoreFailed
  251. }
  252. }()
  253. // End recovery preamble
  254. return db.boltDB.Update(
  255. func(tx *bolt.Tx) error {
  256. err := fn(&datastoreTx{db: db, boltTx: tx})
  257. if err != nil {
  258. return errors.Trace(err)
  259. }
  260. return nil
  261. })
  262. }
  263. func (tx *datastoreTx) bucket(name []byte) (retbucket *datastoreBucket) {
  264. // Begin recovery preamble
  265. if tx.db.isDatastoreFailed() {
  266. return &datastoreBucket{db: tx.db, boltBucket: nil}
  267. }
  268. panicOnFault := debug.SetPanicOnFault(true)
  269. defer debug.SetPanicOnFault(panicOnFault)
  270. defer func() {
  271. if r := recover(); r != nil {
  272. tx.db.setDatastoreFailed(r)
  273. retbucket = &datastoreBucket{db: tx.db, boltBucket: nil}
  274. }
  275. }()
  276. // End recovery preamble
  277. return &datastoreBucket{db: tx.db, boltBucket: tx.boltTx.Bucket(name)}
  278. }
  279. func (tx *datastoreTx) clearBucket(name []byte) (reterr error) {
  280. // Begin recovery preamble
  281. if tx.db.isDatastoreFailed() {
  282. return errDatastoreFailed
  283. }
  284. panicOnFault := debug.SetPanicOnFault(true)
  285. defer debug.SetPanicOnFault(panicOnFault)
  286. defer func() {
  287. if r := recover(); r != nil {
  288. tx.db.setDatastoreFailed(r)
  289. reterr = errDatastoreFailed
  290. }
  291. }()
  292. // End recovery preamble
  293. err := tx.boltTx.DeleteBucket(name)
  294. if err != nil {
  295. return errors.Trace(err)
  296. }
  297. _, err = tx.boltTx.CreateBucket(name)
  298. if err != nil {
  299. return errors.Trace(err)
  300. }
  301. return nil
  302. }
  303. func (b *datastoreBucket) get(key []byte) (retvalue []byte) {
  304. // Begin recovery preamble
  305. if b.db.isDatastoreFailed() {
  306. return nil
  307. }
  308. panicOnFault := debug.SetPanicOnFault(true)
  309. defer debug.SetPanicOnFault(panicOnFault)
  310. defer func() {
  311. if r := recover(); r != nil {
  312. b.db.setDatastoreFailed(r)
  313. retvalue = nil
  314. }
  315. }()
  316. // End recovery preamble
  317. return b.boltBucket.Get(key)
  318. }
  319. func (b *datastoreBucket) put(key, value []byte) (reterr error) {
  320. // Begin recovery preamble
  321. if b.db.isDatastoreFailed() {
  322. return errDatastoreFailed
  323. }
  324. panicOnFault := debug.SetPanicOnFault(true)
  325. defer debug.SetPanicOnFault(panicOnFault)
  326. defer func() {
  327. if r := recover(); r != nil {
  328. b.db.setDatastoreFailed(r)
  329. reterr = errDatastoreFailed
  330. }
  331. }()
  332. // End recovery preamble
  333. err := b.boltBucket.Put(key, value)
  334. if err != nil {
  335. return errors.Trace(err)
  336. }
  337. return nil
  338. }
  339. func (b *datastoreBucket) delete(key []byte) (reterr error) {
  340. // Begin recovery preamble
  341. if b.db.isDatastoreFailed() {
  342. return errDatastoreFailed
  343. }
  344. panicOnFault := debug.SetPanicOnFault(true)
  345. defer debug.SetPanicOnFault(panicOnFault)
  346. defer func() {
  347. if r := recover(); r != nil {
  348. b.db.setDatastoreFailed(r)
  349. reterr = errDatastoreFailed
  350. }
  351. }()
  352. // End recovery preamble
  353. err := b.boltBucket.Delete(key)
  354. if err != nil {
  355. return errors.Trace(err)
  356. }
  357. return nil
  358. }
  359. func (b *datastoreBucket) cursor() (retcursor datastoreCursor) {
  360. // Begin recovery preamble
  361. if b.db.isDatastoreFailed() {
  362. return datastoreCursor{db: b.db, boltCursor: nil}
  363. }
  364. panicOnFault := debug.SetPanicOnFault(true)
  365. defer debug.SetPanicOnFault(panicOnFault)
  366. defer func() {
  367. if r := recover(); r != nil {
  368. b.db.setDatastoreFailed(r)
  369. retcursor = datastoreCursor{db: b.db, boltCursor: nil}
  370. }
  371. }()
  372. // End recovery preamble
  373. return datastoreCursor{db: b.db, boltCursor: b.boltBucket.Cursor()}
  374. }
  375. func (c *datastoreCursor) firstKey() (retkey []byte) {
  376. // Begin recovery preamble
  377. if c.db.isDatastoreFailed() {
  378. return nil
  379. }
  380. panicOnFault := debug.SetPanicOnFault(true)
  381. defer debug.SetPanicOnFault(panicOnFault)
  382. defer func() {
  383. if r := recover(); r != nil {
  384. c.db.setDatastoreFailed(r)
  385. retkey = nil
  386. }
  387. }()
  388. // End recovery preamble
  389. key, _ := c.boltCursor.First()
  390. return key
  391. }
  392. func (c *datastoreCursor) nextKey() (retkey []byte) {
  393. // Begin recovery preamble
  394. if c.db.isDatastoreFailed() {
  395. return nil
  396. }
  397. panicOnFault := debug.SetPanicOnFault(true)
  398. defer debug.SetPanicOnFault(panicOnFault)
  399. defer func() {
  400. if r := recover(); r != nil {
  401. c.db.setDatastoreFailed(r)
  402. retkey = nil
  403. }
  404. }()
  405. // End recovery preamble
  406. key, _ := c.boltCursor.Next()
  407. return key
  408. }
  409. func (c *datastoreCursor) first() (retkey, retvalue []byte) {
  410. // Begin recovery preamble
  411. if c.db.isDatastoreFailed() {
  412. return nil, nil
  413. }
  414. panicOnFault := debug.SetPanicOnFault(true)
  415. defer debug.SetPanicOnFault(panicOnFault)
  416. defer func() {
  417. if r := recover(); r != nil {
  418. c.db.setDatastoreFailed(r)
  419. retkey = nil
  420. retvalue = nil
  421. }
  422. }()
  423. // End recovery preamble
  424. return c.boltCursor.First()
  425. }
  426. func (c *datastoreCursor) next() (retkey, retvalue []byte) {
  427. // Begin recovery preamble
  428. if c.db.isDatastoreFailed() {
  429. return nil, nil
  430. }
  431. panicOnFault := debug.SetPanicOnFault(true)
  432. defer debug.SetPanicOnFault(panicOnFault)
  433. defer func() {
  434. if r := recover(); r != nil {
  435. c.db.setDatastoreFailed(r)
  436. retkey = nil
  437. retvalue = nil
  438. }
  439. }()
  440. // End recovery preamble
  441. return c.boltCursor.Next()
  442. }
  443. func (c *datastoreCursor) close() {
  444. // BoltDB doesn't close cursors.
  445. }