Commit 50dd615b authored by Luke Champine's avatar Luke Champine Committed by GitHub

Merge pull request #1582 from NebulousLabs/hostdb-scanner

Hostdb scanner
parents 4cd7ba36 9322f6b1
......@@ -79,12 +79,20 @@ type FileInfo struct {
// aggregates the host's external settings and metrics with its public key.
type HostDBEntry struct {
HostExternalSettings
PublicKey types.SiaPublicKey `json:"publickey"`
// ScanHistory is the set of scans performed on the host. It should always
// be ordered according to the scan's Timestamp, oldest to newest.
ScanHistory HostDBScans `json:"scanhistory"`
// FirstSeen is the last block height at which this host was announced.
FirstSeen types.BlockHeight `json:"firstseen"`
// Measurements that have been taken on the host. The most recent
// measurements are kept in full detail, historic ones are compressed into
// the historic values.
HistoricDowntime time.Duration `json:"historicdowntime"`
HistoricUptime time.Duration `json:"historicuptime"`
ScanHistory HostDBScans `json:"scanhistory"`
// The public key of the host, stored separately to minimize risk of certain
// MitM based vulnerabilities.
PublicKey types.SiaPublicKey `json:"publickey"`
}
// HostDBScan represents a single scan event.
......
......@@ -11,10 +11,18 @@ const (
// cannot successfully get a random number.
defaultScanSleep = 1*time.Hour + 37*time.Minute
// maxHostDowntime specifies the maximum amount of time that a host is
// allowed to be offline while still being in the hostdb.
maxHostDowntime = 30 * 24 * time.Hour
// maxScanSleep is the maximum amount of time that the hostdb will sleep
// between performing scans of the hosts.
maxScanSleep = 4 * time.Hour
// minScans specifies the number of scans that a host should have before the
// scans start getting compressed.
minScans = 20
// minScanSleep is the minimum amount of time that the hostdb will sleep
// between performing scans of the hosts.
minScanSleep = 1*time.Hour + 20*time.Minute
......@@ -39,7 +47,7 @@ var (
// hostCheckupQuantity specifies the number of hosts that get scanned every
// time there is a regular scanning operation.
hostCheckupQuantity = build.Select(build.Var{
Standard: int(250),
Standard: int(200),
Dev: int(6),
Testing: int(5),
}).(int)
......
......@@ -4,18 +4,6 @@
// set of hosts it has found and updates who is online.
package hostdb
// TODO: Not sure what happens with hosts that fail their first scan. Is it
// possible for them to get scored inappropriately? If they start behind, can
// they scan back into the set of good hosts?
// TODO: Scan history should be truncated.
// TODO: Investigate why hosts that seem to be online can fail scans, and figure
// out a more robust way to not miss hosts.
// TODO: Refine the method by which the hostdb selects which hosts to scan
// during its regular scanning period.
import (
"errors"
"fmt"
......
......@@ -3,7 +3,6 @@ package hostdb
import (
"math"
"math/big"
"time"
"github.com/NebulousLabs/Sia/build"
"github.com/NebulousLabs/Sia/modules"
......@@ -256,8 +255,8 @@ func (hdb *HostDB) uptimeAdjustments(entry modules.HostDBEntry) float64 {
// Compute the total measured uptime and total measured downtime for this
// host.
var uptime time.Duration
var downtime time.Duration
downtime := entry.HistoricDowntime
uptime := entry.HistoricUptime
recentTime := entry.ScanHistory[0].Timestamp
recentSuccess := entry.ScanHistory[0].Success
for _, scan := range entry.ScanHistory[1:] {
......
......@@ -126,6 +126,39 @@ func (hdb *HostDB) updateEntry(entry modules.HostDBEntry, netErr error) {
newEntry.ScanHistory = append(newEntry.ScanHistory, modules.HostDBScan{Timestamp: time.Now(), Success: netErr == nil})
}
// If the host's earliest scan is more than a month old and there is no
// recent uptime, mark the host for deletion.
var recentUptime bool
for _, scan := range entry.ScanHistory {
if scan.Success {
recentUptime = true
}
}
// If the host has been offline for too long, delete the host from the
// hostdb.
if time.Now().Sub(newEntry.ScanHistory[0].Timestamp) > maxHostDowntime && !recentUptime && len(newEntry.ScanHistory) >= minScans {
err := hdb.hostTree.Remove(newEntry.PublicKey)
if err != nil {
hdb.log.Println("ERROR: unable to remove host newEntry which has had a ton of downtime:", err)
}
// The function should terminate here as no more interaction is needed
// with this host.
return
}
// Compress any old scans into the historic values.
for len(newEntry.ScanHistory) > minScans && time.Now().Sub(newEntry.ScanHistory[0].Timestamp) > maxHostDowntime {
timePassed := newEntry.ScanHistory[1].Timestamp.Sub(newEntry.ScanHistory[0].Timestamp)
if newEntry.ScanHistory[1].Success {
newEntry.HistoricUptime += timePassed
} else {
newEntry.HistoricDowntime += timePassed
}
newEntry.ScanHistory = newEntry.ScanHistory[1:]
}
// Add the updated entry
if !exists {
err := hdb.hostTree.Insert(newEntry)
......@@ -248,18 +281,31 @@ func (hdb *HostDB) threadedScan() {
// pushing them further back in the hierarchy, ensuring that for the
// most part only online hosts are getting scanned unless there are
// fewer than hostCheckupQuantity of them.
//
// TODO: Cannot use SelectRandom (despite it being faster) because
// SelectRandom only returns active/online hosts. Need to be scanning
// the offline ones as well, otherwise a little bit of downtime is a
// death sentence for a host.
// Grab a set of hosts to scan, grab hosts that are active, inactive,
// and offline to get high diversity.
var onlineHosts, offlineHosts []modules.HostDBEntry
for _, host := range hdb.hostTree.All() {
if len(onlineHosts) >= hostCheckupQuantity && len(offlineHosts) >= hostCheckupQuantity {
break
}
// Figure out if the host is online or offline.
online := len(host.ScanHistory) > 0 && host.ScanHistory[len(host.ScanHistory)-1].Success
if online && len(onlineHosts) < hostCheckupQuantity {
onlineHosts = append(onlineHosts, host)
} else if !online && len(offlineHosts) < hostCheckupQuantity {
offlineHosts = append(onlineHosts, host)
}
}
// Queue the scans for each host.
hdb.log.Println("Performing scan on", len(onlineHosts), "online hosts and", len(offlineHosts), "offline hosts.")
hdb.mu.Lock()
checkups := hdb.hostTree.All()
if len(checkups) > hostCheckupQuantity {
checkups = checkups[len(checkups)-hostCheckupQuantity:]
for _, host := range onlineHosts {
hdb.queueScan(host)
}
hdb.log.Println("Performing scan on", len(checkups), "hosts")
for _, host := range checkups {
for _, host := range offlineHosts {
hdb.queueScan(host)
}
hdb.mu.Unlock()
......
......@@ -226,8 +226,8 @@ func hostdbviewcmd(pubkey string) {
// host.
uptimeRatio := float64(0)
if len(info.Entry.ScanHistory) > 1 {
var uptime time.Duration
var downtime time.Duration
downtime := info.Entry.HistoricDowntime
uptime := info.Entry.HistoricUptime
recentTime := info.Entry.ScanHistory[0].Timestamp
recentSuccess := info.Entry.ScanHistory[0].Success
for _, scan := range info.Entry.ScanHistory[1:] {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment