Compare commits
4 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 5a0382e8a3 | |||
| e3de4a2ef6 | |||
| 80ad9d9dc3 | |||
| a644752133 |
@ -205,10 +205,10 @@ command="systemctl restart foo.service",no-port-forwarding,no-x11-forwarding,no-
|
|||||||
<details>
|
<details>
|
||||||
<summary>{{ .Name }} and other template variables</summary>
|
<summary>{{ .Name }} and other template variables</summary>
|
||||||
|
|
||||||
`{{ .Name }}` is the name of your site.
|
- `{{ .Name }}` is the name of your site.
|
||||||
`{{ .Message }}` is either `went down` or `came back up`.
|
- `{{ .Message }}` is either `went down` or `came back up`.
|
||||||
`{{ .Status }}` is either `up` or `down`.
|
- `{{ .Status }}` is either `up` or `down`.
|
||||||
`{{ .Watchdog }}` is the name of your watchdog (useful if you have multiple).
|
- `{{ .Watchdog }}` is the name of your watchdog (useful if you have multiple).
|
||||||
|
|
||||||
It refers to the name of the watch, which is "Example Site" in the sample config below.
|
It refers to the name of the watch, which is "Example Site" in the sample config below.
|
||||||
|
|
||||||
|
|||||||
29
build-all.sh
29
build-all.sh
@ -5,41 +5,44 @@
|
|||||||
|
|
||||||
export CGO_ENABLED=0
|
export CGO_ENABLED=0
|
||||||
exe=watchdog
|
exe=watchdog
|
||||||
|
distpre=../..
|
||||||
gocmd=.
|
gocmd=.
|
||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
go generate -mod=vendor ./...
|
go generate -mod=vendor ./...
|
||||||
|
|
||||||
|
pushd cmd/${exe}
|
||||||
echo ""
|
echo ""
|
||||||
echo "Windows amd64"
|
echo "Windows amd64"
|
||||||
#GOOS=windows GOARCH=amd64 go build -mod=vendor -o dist/windows/amd64/${exe}.exe -ldflags "-H=windowsgui" $gocmd
|
#GOOS=windows GOARCH=amd64 go build -mod=vendor -o ${distpre}/dist/windows/amd64/${exe}.exe -ldflags "-H=windowsgui" $gocmd
|
||||||
#GOOS=windows GOARCH=amd64 go build -mod=vendor -o dist/windows/amd64/${exe}.debug.exe
|
#GOOS=windows GOARCH=amd64 go build -mod=vendor -o ${distpre}/dist/windows/amd64/${exe}.debug.exe
|
||||||
GOOS=windows GOARCH=amd64 go build -mod=vendor -o dist/windows/amd64/${exe}.exe
|
GOOS=windows GOARCH=amd64 go build -mod=vendor -o ${distpre}/dist/windows/amd64/${exe}.exe
|
||||||
echo "Windows 386"
|
echo "Windows 386"
|
||||||
#GOOS=windows GOARCH=386 go build -mod=vendor -o dist/windows/386/${exe}.exe -ldflags "-H=windowsgui" $gocmd
|
#GOOS=windows GOARCH=386 go build -mod=vendor -o ${distpre}/dist/windows/386/${exe}.exe -ldflags "-H=windowsgui" $gocmd
|
||||||
#GOOS=windows GOARCH=386 go build -mod=vendor -o dist/windows/386/${exe}.debug.exe
|
#GOOS=windows GOARCH=386 go build -mod=vendor -o ${distpre}/dist/windows/386/${exe}.debug.exe
|
||||||
GOOS=windows GOARCH=386 go build -mod=vendor -o dist/windows/386/${exe}.exe
|
GOOS=windows GOARCH=386 go build -mod=vendor -o ${distpre}/dist/windows/386/${exe}.exe
|
||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
echo "Darwin (macOS) amd64"
|
echo "Darwin (macOS) amd64"
|
||||||
GOOS=darwin GOARCH=amd64 go build -mod=vendor -o dist/darwin/amd64/${exe} $gocmd
|
GOOS=darwin GOARCH=amd64 go build -mod=vendor -o ${distpre}/dist/darwin/amd64/${exe} $gocmd
|
||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
echo "Linux amd64"
|
echo "Linux amd64"
|
||||||
GOOS=linux GOARCH=amd64 go build -mod=vendor -o dist/linux/amd64/${exe} $gocmd
|
GOOS=linux GOARCH=amd64 go build -mod=vendor -o ${distpre}/dist/linux/amd64/${exe} $gocmd
|
||||||
echo "Linux 386"
|
echo "Linux 386"
|
||||||
GOOS=linux GOARCH=386 go build -mod=vendor -o dist/linux/386/${exe} $gocmd
|
GOOS=linux GOARCH=386 go build -mod=vendor -o ${distpre}/dist/linux/386/${exe} $gocmd
|
||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
echo "RPi 4 (64-bit) ARMv8"
|
echo "RPi 4 (64-bit) ARMv8"
|
||||||
GOOS=linux GOARCH=arm64 go build -mod=vendor -o dist/linux/armv8/${exe} $gocmd
|
GOOS=linux GOARCH=arm64 go build -mod=vendor -o ${distpre}/dist/linux/armv8/${exe} $gocmd
|
||||||
echo "RPi 3 B+ ARMv7"
|
echo "RPi 3 B+ ARMv7"
|
||||||
GOOS=linux GOARCH=arm GOARM=7 go build -mod=vendor -o dist/linux/armv7/${exe} $gocmd
|
GOOS=linux GOARCH=arm GOARM=7 go build -mod=vendor -o ${distpre}/dist/linux/armv7/${exe} $gocmd
|
||||||
echo "ARMv6"
|
echo "ARMv6"
|
||||||
GOOS=linux GOARCH=arm GOARM=6 go build -mod=vendor -o dist/linux/armv6/${exe} $gocmd
|
GOOS=linux GOARCH=arm GOARM=6 go build -mod=vendor -o ${distpre}/dist/linux/armv6/${exe} $gocmd
|
||||||
echo "RPi Zero ARMv5"
|
echo "RPi Zero ARMv5"
|
||||||
GOOS=linux GOARCH=arm GOARM=5 go build -mod=vendor -o dist/linux/armv5/${exe} $gocmd
|
GOOS=linux GOARCH=arm GOARM=5 go build -mod=vendor -o ${distpre}/dist/linux/armv5/${exe} $gocmd
|
||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
|
popd
|
||||||
rsync -av ./dist/ ubuntu@rootprojects.org:/srv/www/rootprojects.org/$exe/dist/
|
rsync -av ./dist/ ubuntu@rootprojects.org:/srv/www/rootprojects.org/$exe/dist/
|
||||||
# https://rootprojects.org/serviceman/dist/windows/amd64/serviceman.exe
|
# https://rootprojects.org/serviceman/dist/windows/amd64/serviceman.exe
|
||||||
|
|||||||
165
watchdog.go
165
watchdog.go
@ -33,6 +33,12 @@ func (s Status) String() string {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const (
|
||||||
|
MessageDown = "went down"
|
||||||
|
MessageUp = "came back up"
|
||||||
|
MessageHiccup = "hiccupped"
|
||||||
|
)
|
||||||
|
|
||||||
type Dog struct {
|
type Dog struct {
|
||||||
Watchdog string
|
Watchdog string
|
||||||
Name string
|
Name string
|
||||||
@ -47,15 +53,15 @@ type Dog struct {
|
|||||||
status Status
|
status Status
|
||||||
changed bool
|
changed bool
|
||||||
error error
|
error error
|
||||||
failures int
|
//failures int
|
||||||
passes int
|
//passes int
|
||||||
lastFailed time.Time
|
//lastFailed time.Time
|
||||||
lastPassed time.Time
|
//lastPassed time.Time
|
||||||
lastNotified time.Time
|
//lastNotified time.Time
|
||||||
}
|
}
|
||||||
|
|
||||||
func New(d *Dog) *Dog {
|
func New(d *Dog) *Dog {
|
||||||
d.lastPassed = time.Now().Add(-5 * time.Minute)
|
//d.lastPassed = time.Now().Add(-5 * time.Minute)
|
||||||
d.status = StatusUp
|
d.status = StatusUp
|
||||||
d.changed = false
|
d.changed = false
|
||||||
return d
|
return d
|
||||||
@ -70,88 +76,87 @@ func (d *Dog) Watch() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Now that I've added the ability to notify when a server is back up
|
||||||
|
// this definitely needs some refactoring. It's bad now.
|
||||||
func (d *Dog) watch() {
|
func (d *Dog) watch() {
|
||||||
d.Logger <- fmt.Sprintf("Check: '%s'", d.Name)
|
d.Logger <- fmt.Sprintf("Check: '%s'", d.Name)
|
||||||
|
|
||||||
err := d.check()
|
// This may be up or down
|
||||||
|
err := d.hardcheck()
|
||||||
if nil == err {
|
if nil == err {
|
||||||
|
d.Logger <- fmt.Sprintf("Up: '%s'", d.Name)
|
||||||
|
// if it's down, coming up, notify
|
||||||
if d.changed {
|
if d.changed {
|
||||||
d.notify("came back up")
|
d.notify(MessageUp)
|
||||||
}
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
time.Sleep(time.Duration(5) * time.Second)
|
// If being down is a change, check to see if it's just a hiccup
|
||||||
|
if d.changed {
|
||||||
err2 := d.check()
|
time.Sleep(time.Duration(5) * time.Second)
|
||||||
if nil != err2 {
|
err2 := d.softcheck()
|
||||||
d.Logger <- fmt.Sprintf("Down: '%s': %s", d.Name, err2)
|
if nil != err2 {
|
||||||
} else {
|
// it's really down
|
||||||
d.Logger <- fmt.Sprintf("Hiccup: '%s': %s", d.Name, err)
|
d.Logger <- fmt.Sprintf("Down: '%s': %s", d.Name, err2)
|
||||||
return
|
} else {
|
||||||
|
// it's not really down, so reset the change info
|
||||||
|
d.changed = false
|
||||||
|
d.status = StatusUp
|
||||||
|
// and notify of the hiccup
|
||||||
|
d.Logger <- fmt.Sprintf("Hiccup: '%s': %s", d.Name, err)
|
||||||
|
d.notify(MessageHiccup)
|
||||||
|
return
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO what if the server is flip-flopping rapidly?
|
||||||
|
// how to rate limit?
|
||||||
|
// "{{ .Server }} is on cooldown for 30 minutes"
|
||||||
|
|
||||||
|
// * We've had success since the last notification
|
||||||
|
// * It's been at least 5 minutes since the last notification
|
||||||
|
//fiveMinutesAgo := time.Now().Add(-5 * time.Minute)
|
||||||
|
//if d.lastPassed.After(d.lastNotified) && d.lastNotified.Before(fiveMinutesAgo) {
|
||||||
|
//}
|
||||||
|
|
||||||
t := 10
|
t := 10
|
||||||
for {
|
for {
|
||||||
|
// try to recover, then backoff exponentially
|
||||||
d.recover()
|
d.recover()
|
||||||
time.Sleep(time.Duration(t) * time.Second)
|
time.Sleep(time.Duration(t) * time.Second)
|
||||||
// backoff
|
|
||||||
t *= 2
|
t *= 2
|
||||||
err := d.check()
|
if t > 120 {
|
||||||
if nil != err {
|
t = 120
|
||||||
d.Logger <- fmt.Sprintf("Unrecoverable: '%s': %s", d.Name, err)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// We should notify if
|
err := d.softcheck()
|
||||||
// * The status has changed
|
if nil != err {
|
||||||
//
|
// this is down, and we know it's down
|
||||||
// TODO what if the server is flip-flopping rapidly?
|
d.status = StatusDown
|
||||||
// how to rate limit?
|
d.Logger <- fmt.Sprintf("Unrecoverable: '%s': %s", d.Name, err)
|
||||||
// "{{ .Server }} is on cooldown for 30 minutes"
|
if d.changed {
|
||||||
if d.changed {
|
d.changed = false
|
||||||
d.notify("went down")
|
d.notify(MessageDown)
|
||||||
if StatusUp == d.status {
|
|
||||||
break
|
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
// * We've had success since the last notification
|
// it came back up
|
||||||
// * It's been at least 5 minutes since the last notification
|
d.status = StatusUp
|
||||||
//fiveMinutesAgo := time.Now().Add(-5 * time.Minute)
|
d.Logger <- fmt.Sprintf("Up: '%s'", d.Name)
|
||||||
//if d.lastPassed.After(d.lastNotified) && d.lastNotified.Before(fiveMinutesAgo) {
|
if d.changed {
|
||||||
//}
|
// and the downtime was short - just a recovery
|
||||||
//if !failure || d.failures >= 5 {
|
d.notify(MessageHiccup)
|
||||||
// go back to the main 5-minute loop
|
} else {
|
||||||
// break
|
// and the downtime was some time
|
||||||
//}
|
d.notify(MessageUp)
|
||||||
|
}
|
||||||
|
d.changed = false
|
||||||
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (d *Dog) check() error {
|
func (d *Dog) softcheck() error {
|
||||||
previousStatus := d.status
|
|
||||||
|
|
||||||
var err error
|
|
||||||
defer func() {
|
|
||||||
// Are we up, or down?
|
|
||||||
if nil != err {
|
|
||||||
d.status = StatusDown
|
|
||||||
d.failures += 1
|
|
||||||
d.lastFailed = time.Now()
|
|
||||||
} else {
|
|
||||||
d.status = StatusUp
|
|
||||||
d.lastPassed = time.Now()
|
|
||||||
d.passes += 1
|
|
||||||
d.Logger <- fmt.Sprintf("Up: '%s'", d.Name)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Has that changed?
|
|
||||||
if previousStatus != d.status {
|
|
||||||
d.changed = true
|
|
||||||
} else {
|
|
||||||
d.changed = false
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
|
|
||||||
client := NewHTTPClient()
|
client := NewHTTPClient()
|
||||||
response, err := client.Get(d.CheckURL)
|
response, err := client.Get(d.CheckURL)
|
||||||
if nil != err {
|
if nil != err {
|
||||||
@ -174,7 +179,7 @@ func (d *Dog) check() error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if "" != d.Badwords {
|
if "" != d.Badwords {
|
||||||
if !bytes.Contains(b, []byte(d.Badwords)) {
|
if bytes.Contains(b, []byte(d.Badwords)) {
|
||||||
err = fmt.Errorf("Down: '%s' Found for '%s'", d.Badwords, d.Name)
|
err = fmt.Errorf("Down: '%s' Found for '%s'", d.Badwords, d.Name)
|
||||||
d.Logger <- fmt.Sprintf("%s", err)
|
d.Logger <- fmt.Sprintf("%s", err)
|
||||||
d.error = err
|
d.error = err
|
||||||
@ -185,6 +190,32 @@ func (d *Dog) check() error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (d *Dog) hardcheck() error {
|
||||||
|
previousStatus := d.status
|
||||||
|
|
||||||
|
err := d.softcheck()
|
||||||
|
|
||||||
|
// Are we up, or down?
|
||||||
|
if nil != err {
|
||||||
|
d.status = StatusDown
|
||||||
|
//d.failures += 1
|
||||||
|
//d.lastFailed = time.Now()
|
||||||
|
} else {
|
||||||
|
d.status = StatusUp
|
||||||
|
//d.lastPassed = time.Now()
|
||||||
|
//d.passes += 1
|
||||||
|
}
|
||||||
|
|
||||||
|
// Has that changed?
|
||||||
|
if previousStatus != d.status {
|
||||||
|
d.changed = true
|
||||||
|
} else {
|
||||||
|
d.changed = false
|
||||||
|
}
|
||||||
|
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
func (d *Dog) recover() {
|
func (d *Dog) recover() {
|
||||||
if "" == d.Recover {
|
if "" == d.Recover {
|
||||||
return
|
return
|
||||||
@ -214,7 +245,7 @@ func (d *Dog) recover() {
|
|||||||
|
|
||||||
func (d *Dog) notify(msg string) {
|
func (d *Dog) notify(msg string) {
|
||||||
d.Logger <- fmt.Sprintf("Notifying the authorities of %s's status change", d.Name)
|
d.Logger <- fmt.Sprintf("Notifying the authorities of %s's status change", d.Name)
|
||||||
d.lastNotified = time.Now()
|
//d.lastNotified = time.Now()
|
||||||
|
|
||||||
for i := range d.Webhooks {
|
for i := range d.Webhooks {
|
||||||
name := d.Webhooks[i]
|
name := d.Webhooks[i]
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user