dataStore_bolt.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527
  1. //go:build !PSIPHON_USE_BADGER_DB && !PSIPHON_USE_FILES_DB
  2. // +build !PSIPHON_USE_BADGER_DB,!PSIPHON_USE_FILES_DB
  3. /*
  4. * Copyright (c) 2018, Psiphon Inc.
  5. * All rights reserved.
  6. *
  7. * This program is free software: you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation, either version 3 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  19. *
  20. */
  21. package psiphon
  22. import (
  23. std_errors "errors"
  24. "fmt"
  25. "os"
  26. "path/filepath"
  27. "runtime/debug"
  28. "sync/atomic"
  29. "time"
  30. "github.com/Psiphon-Labs/bolt"
  31. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common"
  32. "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/errors"
  33. )
  34. const (
  35. OPEN_DB_RETRIES = 2
  36. )
  37. type datastoreDB struct {
  38. boltDB *bolt.DB
  39. filename string
  40. isFailed int32
  41. }
  42. type datastoreTx struct {
  43. db *datastoreDB
  44. boltTx *bolt.Tx
  45. }
  46. type datastoreBucket struct {
  47. db *datastoreDB
  48. boltBucket *bolt.Bucket
  49. }
  50. type datastoreCursor struct {
  51. db *datastoreDB
  52. boltCursor *bolt.Cursor
  53. }
  54. func datastoreOpenDB(
  55. rootDataDirectory string, retryAndReset bool) (*datastoreDB, error) {
  56. var db *datastoreDB
  57. var err error
  58. attempts := 1
  59. if retryAndReset {
  60. attempts += OPEN_DB_RETRIES
  61. }
  62. reset := false
  63. for attempt := 0; attempt < attempts; attempt++ {
  64. db, err = tryDatastoreOpenDB(rootDataDirectory, reset)
  65. if err == nil {
  66. break
  67. }
  68. NoticeWarning("tryDatastoreOpenDB failed: %s", err)
  69. // The datastore file may be corrupt, so, in subsequent iterations,
  70. // set the "reset" flag and attempt to delete the file and try again.
  71. //
  72. // Don't reset the datastore when open failed due to timeout obtaining
  73. // the file lock, as the datastore is simply locked by another
  74. // process and not corrupt. As the file lock is advisory, deleting
  75. // the file would succeed despite the lock. In this case, still retry
  76. // in case the the lock is released.
  77. reset = !std_errors.Is(err, bolt.ErrTimeout)
  78. }
  79. return db, err
  80. }
  81. func tryDatastoreOpenDB(
  82. rootDataDirectory string, reset bool) (retdb *datastoreDB, reterr error) {
  83. // Testing indicates that the bolt Check function can raise SIGSEGV due to
  84. // invalid mmap buffer accesses in cases such as opening a valid but
  85. // truncated datastore file.
  86. //
  87. // To handle this, we temporarily set SetPanicOnFault in order to treat the
  88. // fault as a panic, recover any panic, and return an error which will result
  89. // in a retry with reset.
  90. //
  91. // Limitation: another potential crash case is "fatal error: out of
  92. // memory" due to bolt.freelist.read attempting to allocate a slice using
  93. // a corrupted size value on disk. There is no way to recover from this
  94. // fatal.
  95. // Begin recovery preamble
  96. panicOnFault := debug.SetPanicOnFault(true)
  97. defer debug.SetPanicOnFault(panicOnFault)
  98. defer func() {
  99. if r := recover(); r != nil {
  100. retdb = nil
  101. reterr = errors.Tracef("panic: %v", r)
  102. }
  103. }()
  104. // End recovery preamble
  105. filename := filepath.Join(rootDataDirectory, "psiphon.boltdb")
  106. if reset {
  107. NoticeWarning("tryDatastoreOpenDB: reset")
  108. os.Remove(filename)
  109. }
  110. // A typical Psiphon datastore will not have a large, fragmented freelist.
  111. // For this reason, we're not setting FreelistType to FreelistMapType or
  112. // enabling NoFreelistSync. The latter option has a trade-off of slower
  113. // start up time.
  114. //
  115. // Monitor freelist stats in DataStoreMetrics in diagnostics and consider
  116. // setting these options if necessary.
  117. newDB, err := bolt.Open(filename, 0600, &bolt.Options{Timeout: 1 * time.Second})
  118. if err != nil {
  119. return nil, errors.Trace(err)
  120. }
  121. // Run consistency checks on datastore and emit errors for diagnostics
  122. // purposes. We assume this will complete quickly for typical size Psiphon
  123. // datastores and wait for the check to complete before proceeding.
  124. err = newDB.View(func(tx *bolt.Tx) error {
  125. return tx.SynchronousCheck()
  126. })
  127. if err != nil {
  128. return nil, errors.Trace(err)
  129. }
  130. err = newDB.Update(func(tx *bolt.Tx) error {
  131. requiredBuckets := [][]byte{
  132. datastoreServerEntriesBucket,
  133. datastoreServerEntryTagsBucket,
  134. datastoreServerEntryTombstoneTagsBucket,
  135. datastoreUrlETagsBucket,
  136. datastoreKeyValueBucket,
  137. datastoreRemoteServerListStatsBucket,
  138. datastoreFailedTunnelStatsBucket,
  139. datastoreSLOKsBucket,
  140. datastoreTacticsBucket,
  141. datastoreSpeedTestSamplesBucket,
  142. datastoreDialParametersBucket,
  143. }
  144. for _, bucket := range requiredBuckets {
  145. _, err := tx.CreateBucketIfNotExists(bucket)
  146. if err != nil {
  147. return err
  148. }
  149. }
  150. return nil
  151. })
  152. if err != nil {
  153. return nil, errors.Trace(err)
  154. }
  155. // Cleanup obsolete buckets
  156. err = newDB.Update(func(tx *bolt.Tx) error {
  157. obsoleteBuckets := [][]byte{
  158. []byte("tunnelStats"),
  159. []byte("rankedServerEntries"),
  160. []byte("splitTunnelRouteETags"),
  161. []byte("splitTunnelRouteData"),
  162. }
  163. for _, obsoleteBucket := range obsoleteBuckets {
  164. if tx.Bucket(obsoleteBucket) != nil {
  165. err := tx.DeleteBucket(obsoleteBucket)
  166. if err != nil {
  167. NoticeWarning("DeleteBucket %s error: %s", obsoleteBucket, err)
  168. // Continue, since this is not fatal
  169. }
  170. }
  171. }
  172. return nil
  173. })
  174. if err != nil {
  175. return nil, errors.Trace(err)
  176. }
  177. return &datastoreDB{
  178. boltDB: newDB,
  179. filename: filename,
  180. }, nil
  181. }
  182. var errDatastoreFailed = std_errors.New("datastore has failed")
  183. func (db *datastoreDB) isDatastoreFailed() bool {
  184. return atomic.LoadInt32(&db.isFailed) == 1
  185. }
  186. func (db *datastoreDB) setDatastoreFailed(r interface{}) {
  187. atomic.StoreInt32(&db.isFailed, 1)
  188. NoticeWarning("Datastore failed: %s", errors.Tracef("panic: %v", r))
  189. }
  190. func (db *datastoreDB) close() error {
  191. // Limitation: there is no panic recover in this case. We assume boltDB.Close
  192. // does not make mmap accesses and prefer to not continue with the datastore
  193. // file in a locked or open state. We also assume that any locks aquired by
  194. // boltDB.Close, held by transactions, will be released even if the
  195. // transaction panics and the database is in the failed state.
  196. return db.boltDB.Close()
  197. }
  198. func (db *datastoreDB) getDataStoreMetrics() string {
  199. fileSize := int64(0)
  200. fileInfo, err := os.Stat(db.filename)
  201. if err == nil {
  202. fileSize = fileInfo.Size()
  203. }
  204. stats := db.boltDB.Stats()
  205. return fmt.Sprintf("filesize %s | freepages %d | freealloc %s | txcount %d | writes %d | writetime %s",
  206. common.FormatByteCount(uint64(fileSize)),
  207. stats.FreePageN,
  208. common.FormatByteCount(uint64(stats.FreeAlloc)),
  209. stats.TxN,
  210. stats.TxStats.Write,
  211. stats.TxStats.WriteTime)
  212. }
  213. func (db *datastoreDB) view(fn func(tx *datastoreTx) error) (reterr error) {
  214. // Any bolt function that performs mmap buffer accesses can raise SIGBUS due
  215. // to underlying storage changes, such as a truncation of the datastore file
  216. // or removal or network attached storage, etc.
  217. //
  218. // To handle this, we temporarily set SetPanicOnFault in order to treat the
  219. // fault as a panic, recover any panic to avoid crashing the process, and
  220. // putting this datastoreDB instance into a failed state. All subsequent
  221. // calls to this datastoreDBinstance or its related datastoreTx and
  222. // datastoreBucket instances will fail.
  223. // Begin recovery preamble
  224. if db.isDatastoreFailed() {
  225. return errDatastoreFailed
  226. }
  227. panicOnFault := debug.SetPanicOnFault(true)
  228. defer debug.SetPanicOnFault(panicOnFault)
  229. defer func() {
  230. if r := recover(); r != nil {
  231. db.setDatastoreFailed(r)
  232. reterr = errDatastoreFailed
  233. }
  234. }()
  235. // End recovery preamble
  236. return db.boltDB.View(
  237. func(tx *bolt.Tx) error {
  238. err := fn(&datastoreTx{db: db, boltTx: tx})
  239. if err != nil {
  240. return errors.Trace(err)
  241. }
  242. return nil
  243. })
  244. }
  245. func (db *datastoreDB) update(fn func(tx *datastoreTx) error) (reterr error) {
  246. // Begin recovery preamble
  247. if db.isDatastoreFailed() {
  248. return errDatastoreFailed
  249. }
  250. panicOnFault := debug.SetPanicOnFault(true)
  251. defer debug.SetPanicOnFault(panicOnFault)
  252. defer func() {
  253. if r := recover(); r != nil {
  254. db.setDatastoreFailed(r)
  255. reterr = errDatastoreFailed
  256. }
  257. }()
  258. // End recovery preamble
  259. return db.boltDB.Update(
  260. func(tx *bolt.Tx) error {
  261. err := fn(&datastoreTx{db: db, boltTx: tx})
  262. if err != nil {
  263. return errors.Trace(err)
  264. }
  265. return nil
  266. })
  267. }
  268. func (tx *datastoreTx) bucket(name []byte) (retbucket *datastoreBucket) {
  269. // Begin recovery preamble
  270. if tx.db.isDatastoreFailed() {
  271. return &datastoreBucket{db: tx.db, boltBucket: nil}
  272. }
  273. panicOnFault := debug.SetPanicOnFault(true)
  274. defer debug.SetPanicOnFault(panicOnFault)
  275. defer func() {
  276. if r := recover(); r != nil {
  277. tx.db.setDatastoreFailed(r)
  278. retbucket = &datastoreBucket{db: tx.db, boltBucket: nil}
  279. }
  280. }()
  281. // End recovery preamble
  282. return &datastoreBucket{db: tx.db, boltBucket: tx.boltTx.Bucket(name)}
  283. }
  284. func (tx *datastoreTx) clearBucket(name []byte) (reterr error) {
  285. // Begin recovery preamble
  286. if tx.db.isDatastoreFailed() {
  287. return errDatastoreFailed
  288. }
  289. panicOnFault := debug.SetPanicOnFault(true)
  290. defer debug.SetPanicOnFault(panicOnFault)
  291. defer func() {
  292. if r := recover(); r != nil {
  293. tx.db.setDatastoreFailed(r)
  294. reterr = errDatastoreFailed
  295. }
  296. }()
  297. // End recovery preamble
  298. err := tx.boltTx.DeleteBucket(name)
  299. if err != nil {
  300. return errors.Trace(err)
  301. }
  302. _, err = tx.boltTx.CreateBucket(name)
  303. if err != nil {
  304. return errors.Trace(err)
  305. }
  306. return nil
  307. }
  308. func (b *datastoreBucket) get(key []byte) (retvalue []byte) {
  309. // Begin recovery preamble
  310. if b.db.isDatastoreFailed() {
  311. return nil
  312. }
  313. panicOnFault := debug.SetPanicOnFault(true)
  314. defer debug.SetPanicOnFault(panicOnFault)
  315. defer func() {
  316. if r := recover(); r != nil {
  317. b.db.setDatastoreFailed(r)
  318. retvalue = nil
  319. }
  320. }()
  321. // End recovery preamble
  322. return b.boltBucket.Get(key)
  323. }
  324. func (b *datastoreBucket) put(key, value []byte) (reterr error) {
  325. // Begin recovery preamble
  326. if b.db.isDatastoreFailed() {
  327. return errDatastoreFailed
  328. }
  329. panicOnFault := debug.SetPanicOnFault(true)
  330. defer debug.SetPanicOnFault(panicOnFault)
  331. defer func() {
  332. if r := recover(); r != nil {
  333. b.db.setDatastoreFailed(r)
  334. reterr = errDatastoreFailed
  335. }
  336. }()
  337. // End recovery preamble
  338. err := b.boltBucket.Put(key, value)
  339. if err != nil {
  340. return errors.Trace(err)
  341. }
  342. return nil
  343. }
  344. func (b *datastoreBucket) delete(key []byte) (reterr error) {
  345. // Begin recovery preamble
  346. if b.db.isDatastoreFailed() {
  347. return errDatastoreFailed
  348. }
  349. panicOnFault := debug.SetPanicOnFault(true)
  350. defer debug.SetPanicOnFault(panicOnFault)
  351. defer func() {
  352. if r := recover(); r != nil {
  353. b.db.setDatastoreFailed(r)
  354. reterr = errDatastoreFailed
  355. }
  356. }()
  357. // End recovery preamble
  358. err := b.boltBucket.Delete(key)
  359. if err != nil {
  360. return errors.Trace(err)
  361. }
  362. return nil
  363. }
  364. func (b *datastoreBucket) cursor() (retcursor datastoreCursor) {
  365. // Begin recovery preamble
  366. if b.db.isDatastoreFailed() {
  367. return datastoreCursor{db: b.db, boltCursor: nil}
  368. }
  369. panicOnFault := debug.SetPanicOnFault(true)
  370. defer debug.SetPanicOnFault(panicOnFault)
  371. defer func() {
  372. if r := recover(); r != nil {
  373. b.db.setDatastoreFailed(r)
  374. retcursor = datastoreCursor{db: b.db, boltCursor: nil}
  375. }
  376. }()
  377. // End recovery preamble
  378. return datastoreCursor{db: b.db, boltCursor: b.boltBucket.Cursor()}
  379. }
  380. func (c *datastoreCursor) firstKey() (retkey []byte) {
  381. // Begin recovery preamble
  382. if c.db.isDatastoreFailed() {
  383. return nil
  384. }
  385. panicOnFault := debug.SetPanicOnFault(true)
  386. defer debug.SetPanicOnFault(panicOnFault)
  387. defer func() {
  388. if r := recover(); r != nil {
  389. c.db.setDatastoreFailed(r)
  390. retkey = nil
  391. }
  392. }()
  393. // End recovery preamble
  394. key, _ := c.boltCursor.First()
  395. return key
  396. }
  397. func (c *datastoreCursor) nextKey() (retkey []byte) {
  398. // Begin recovery preamble
  399. if c.db.isDatastoreFailed() {
  400. return nil
  401. }
  402. panicOnFault := debug.SetPanicOnFault(true)
  403. defer debug.SetPanicOnFault(panicOnFault)
  404. defer func() {
  405. if r := recover(); r != nil {
  406. c.db.setDatastoreFailed(r)
  407. retkey = nil
  408. }
  409. }()
  410. // End recovery preamble
  411. key, _ := c.boltCursor.Next()
  412. return key
  413. }
  414. func (c *datastoreCursor) first() (retkey, retvalue []byte) {
  415. // Begin recovery preamble
  416. if c.db.isDatastoreFailed() {
  417. return nil, nil
  418. }
  419. panicOnFault := debug.SetPanicOnFault(true)
  420. defer debug.SetPanicOnFault(panicOnFault)
  421. defer func() {
  422. if r := recover(); r != nil {
  423. c.db.setDatastoreFailed(r)
  424. retkey = nil
  425. retvalue = nil
  426. }
  427. }()
  428. // End recovery preamble
  429. return c.boltCursor.First()
  430. }
  431. func (c *datastoreCursor) next() (retkey, retvalue []byte) {
  432. // Begin recovery preamble
  433. if c.db.isDatastoreFailed() {
  434. return nil, nil
  435. }
  436. panicOnFault := debug.SetPanicOnFault(true)
  437. defer debug.SetPanicOnFault(panicOnFault)
  438. defer func() {
  439. if r := recover(); r != nil {
  440. c.db.setDatastoreFailed(r)
  441. retkey = nil
  442. retvalue = nil
  443. }
  444. }()
  445. // End recovery preamble
  446. return c.boltCursor.Next()
  447. }
  448. func (c *datastoreCursor) close() {
  449. // BoltDB doesn't close cursors.
  450. }