Compare commits

...

4 Commits

3 changed files with 118 additions and 84 deletions

View File

@ -205,10 +205,10 @@ command="systemctl restart foo.service",no-port-forwarding,no-x11-forwarding,no-
<details>
<summary>{{ .Name }} and other template variables</summary>
`{{ .Name }}` is the name of your site.
`{{ .Message }}` is either `went down` or `came back up`.
`{{ .Status }}` is either `up` or `down`.
`{{ .Watchdog }}` is the name of your watchdog (useful if you have multiple).
- `{{ .Name }}` is the name of your site.
- `{{ .Message }}` is either `went down` or `came back up`.
- `{{ .Status }}` is either `up` or `down`.
- `{{ .Watchdog }}` is the name of your watchdog (useful if you have multiple).
It refers to the name of the watch, which is "Example Site" in the sample config below.

View File

@ -5,41 +5,44 @@
export CGO_ENABLED=0
exe=watchdog
distpre=../..
gocmd=.
echo ""
go generate -mod=vendor ./...
pushd cmd/${exe}
echo ""
echo "Windows amd64"
#GOOS=windows GOARCH=amd64 go build -mod=vendor -o dist/windows/amd64/${exe}.exe -ldflags "-H=windowsgui" $gocmd
#GOOS=windows GOARCH=amd64 go build -mod=vendor -o dist/windows/amd64/${exe}.debug.exe
GOOS=windows GOARCH=amd64 go build -mod=vendor -o dist/windows/amd64/${exe}.exe
#GOOS=windows GOARCH=amd64 go build -mod=vendor -o ${distpre}/dist/windows/amd64/${exe}.exe -ldflags "-H=windowsgui" $gocmd
#GOOS=windows GOARCH=amd64 go build -mod=vendor -o ${distpre}/dist/windows/amd64/${exe}.debug.exe
GOOS=windows GOARCH=amd64 go build -mod=vendor -o ${distpre}/dist/windows/amd64/${exe}.exe
echo "Windows 386"
#GOOS=windows GOARCH=386 go build -mod=vendor -o dist/windows/386/${exe}.exe -ldflags "-H=windowsgui" $gocmd
#GOOS=windows GOARCH=386 go build -mod=vendor -o dist/windows/386/${exe}.debug.exe
GOOS=windows GOARCH=386 go build -mod=vendor -o dist/windows/386/${exe}.exe
#GOOS=windows GOARCH=386 go build -mod=vendor -o ${distpre}/dist/windows/386/${exe}.exe -ldflags "-H=windowsgui" $gocmd
#GOOS=windows GOARCH=386 go build -mod=vendor -o ${distpre}/dist/windows/386/${exe}.debug.exe
GOOS=windows GOARCH=386 go build -mod=vendor -o ${distpre}/dist/windows/386/${exe}.exe
echo ""
echo "Darwin (macOS) amd64"
GOOS=darwin GOARCH=amd64 go build -mod=vendor -o dist/darwin/amd64/${exe} $gocmd
GOOS=darwin GOARCH=amd64 go build -mod=vendor -o ${distpre}/dist/darwin/amd64/${exe} $gocmd
echo ""
echo "Linux amd64"
GOOS=linux GOARCH=amd64 go build -mod=vendor -o dist/linux/amd64/${exe} $gocmd
GOOS=linux GOARCH=amd64 go build -mod=vendor -o ${distpre}/dist/linux/amd64/${exe} $gocmd
echo "Linux 386"
GOOS=linux GOARCH=386 go build -mod=vendor -o dist/linux/386/${exe} $gocmd
GOOS=linux GOARCH=386 go build -mod=vendor -o ${distpre}/dist/linux/386/${exe} $gocmd
echo ""
echo "RPi 4 (64-bit) ARMv8"
GOOS=linux GOARCH=arm64 go build -mod=vendor -o dist/linux/armv8/${exe} $gocmd
GOOS=linux GOARCH=arm64 go build -mod=vendor -o ${distpre}/dist/linux/armv8/${exe} $gocmd
echo "RPi 3 B+ ARMv7"
GOOS=linux GOARCH=arm GOARM=7 go build -mod=vendor -o dist/linux/armv7/${exe} $gocmd
GOOS=linux GOARCH=arm GOARM=7 go build -mod=vendor -o ${distpre}/dist/linux/armv7/${exe} $gocmd
echo "ARMv6"
GOOS=linux GOARCH=arm GOARM=6 go build -mod=vendor -o dist/linux/armv6/${exe} $gocmd
GOOS=linux GOARCH=arm GOARM=6 go build -mod=vendor -o ${distpre}/dist/linux/armv6/${exe} $gocmd
echo "RPi Zero ARMv5"
GOOS=linux GOARCH=arm GOARM=5 go build -mod=vendor -o dist/linux/armv5/${exe} $gocmd
GOOS=linux GOARCH=arm GOARM=5 go build -mod=vendor -o ${distpre}/dist/linux/armv5/${exe} $gocmd
echo ""
popd
rsync -av ./dist/ ubuntu@rootprojects.org:/srv/www/rootprojects.org/$exe/dist/
# https://rootprojects.org/serviceman/dist/windows/amd64/serviceman.exe

View File

@ -33,6 +33,12 @@ func (s Status) String() string {
}
}
const (
MessageDown = "went down"
MessageUp = "came back up"
MessageHiccup = "hiccupped"
)
type Dog struct {
Watchdog string
Name string
@ -47,15 +53,15 @@ type Dog struct {
status Status
changed bool
error error
failures int
passes int
lastFailed time.Time
lastPassed time.Time
lastNotified time.Time
//failures int
//passes int
//lastFailed time.Time
//lastPassed time.Time
//lastNotified time.Time
}
func New(d *Dog) *Dog {
d.lastPassed = time.Now().Add(-5 * time.Minute)
//d.lastPassed = time.Now().Add(-5 * time.Minute)
d.status = StatusUp
d.changed = false
return d
@ -70,88 +76,87 @@ func (d *Dog) Watch() {
}
}
// Now that I've added the ability to notify when a server is back up
// this definitely needs some refactoring. It's bad now.
func (d *Dog) watch() {
d.Logger <- fmt.Sprintf("Check: '%s'", d.Name)
err := d.check()
// This may be up or down
err := d.hardcheck()
if nil == err {
d.Logger <- fmt.Sprintf("Up: '%s'", d.Name)
// if it's down, coming up, notify
if d.changed {
d.notify("came back up")
d.notify(MessageUp)
}
return
}
time.Sleep(time.Duration(5) * time.Second)
err2 := d.check()
if nil != err2 {
d.Logger <- fmt.Sprintf("Down: '%s': %s", d.Name, err2)
} else {
d.Logger <- fmt.Sprintf("Hiccup: '%s': %s", d.Name, err)
return
// If being down is a change, check to see if it's just a hiccup
if d.changed {
time.Sleep(time.Duration(5) * time.Second)
err2 := d.softcheck()
if nil != err2 {
// it's really down
d.Logger <- fmt.Sprintf("Down: '%s': %s", d.Name, err2)
} else {
// it's not really down, so reset the change info
d.changed = false
d.status = StatusUp
// and notify of the hiccup
d.Logger <- fmt.Sprintf("Hiccup: '%s': %s", d.Name, err)
d.notify(MessageHiccup)
return
}
}
// TODO what if the server is flip-flopping rapidly?
// how to rate limit?
// "{{ .Server }} is on cooldown for 30 minutes"
// * We've had success since the last notification
// * It's been at least 5 minutes since the last notification
//fiveMinutesAgo := time.Now().Add(-5 * time.Minute)
//if d.lastPassed.After(d.lastNotified) && d.lastNotified.Before(fiveMinutesAgo) {
//}
t := 10
for {
// try to recover, then backoff exponentially
d.recover()
time.Sleep(time.Duration(t) * time.Second)
// backoff
t *= 2
err := d.check()
if nil != err {
d.Logger <- fmt.Sprintf("Unrecoverable: '%s': %s", d.Name, err)
if t > 120 {
t = 120
}
// We should notify if
// * The status has changed
//
// TODO what if the server is flip-flopping rapidly?
// how to rate limit?
// "{{ .Server }} is on cooldown for 30 minutes"
if d.changed {
d.notify("went down")
if StatusUp == d.status {
break
err := d.softcheck()
if nil != err {
// this is down, and we know it's down
d.status = StatusDown
d.Logger <- fmt.Sprintf("Unrecoverable: '%s': %s", d.Name, err)
if d.changed {
d.changed = false
d.notify(MessageDown)
}
// * We've had success since the last notification
// * It's been at least 5 minutes since the last notification
//fiveMinutesAgo := time.Now().Add(-5 * time.Minute)
//if d.lastPassed.After(d.lastNotified) && d.lastNotified.Before(fiveMinutesAgo) {
//}
//if !failure || d.failures >= 5 {
// go back to the main 5-minute loop
// break
//}
} else {
// it came back up
d.status = StatusUp
d.Logger <- fmt.Sprintf("Up: '%s'", d.Name)
if d.changed {
// and the downtime was short - just a recovery
d.notify(MessageHiccup)
} else {
// and the downtime was some time
d.notify(MessageUp)
}
d.changed = false
break
}
}
}
func (d *Dog) check() error {
previousStatus := d.status
var err error
defer func() {
// Are we up, or down?
if nil != err {
d.status = StatusDown
d.failures += 1
d.lastFailed = time.Now()
} else {
d.status = StatusUp
d.lastPassed = time.Now()
d.passes += 1
d.Logger <- fmt.Sprintf("Up: '%s'", d.Name)
}
// Has that changed?
if previousStatus != d.status {
d.changed = true
} else {
d.changed = false
}
}()
func (d *Dog) softcheck() error {
client := NewHTTPClient()
response, err := client.Get(d.CheckURL)
if nil != err {
@ -174,7 +179,7 @@ func (d *Dog) check() error {
}
if "" != d.Badwords {
if !bytes.Contains(b, []byte(d.Badwords)) {
if bytes.Contains(b, []byte(d.Badwords)) {
err = fmt.Errorf("Down: '%s' Found for '%s'", d.Badwords, d.Name)
d.Logger <- fmt.Sprintf("%s", err)
d.error = err
@ -185,6 +190,32 @@ func (d *Dog) check() error {
return nil
}
func (d *Dog) hardcheck() error {
previousStatus := d.status
err := d.softcheck()
// Are we up, or down?
if nil != err {
d.status = StatusDown
//d.failures += 1
//d.lastFailed = time.Now()
} else {
d.status = StatusUp
//d.lastPassed = time.Now()
//d.passes += 1
}
// Has that changed?
if previousStatus != d.status {
d.changed = true
} else {
d.changed = false
}
return err
}
func (d *Dog) recover() {
if "" == d.Recover {
return
@ -214,7 +245,7 @@ func (d *Dog) recover() {
func (d *Dog) notify(msg string) {
d.Logger <- fmt.Sprintf("Notifying the authorities of %s's status change", d.Name)
d.lastNotified = time.Now()
//d.lastNotified = time.Now()
for i := range d.Webhooks {
name := d.Webhooks[i]