Compare commits
	
		
			4 Commits
		
	
	
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 5a0382e8a3 | |||
| e3de4a2ef6 | |||
| 80ad9d9dc3 | |||
| a644752133 | 
| @ -205,10 +205,10 @@ command="systemctl restart foo.service",no-port-forwarding,no-x11-forwarding,no- | ||||
| <details> | ||||
|   <summary>{{ .Name }} and other template variables</summary> | ||||
| 
 | ||||
| `{{ .Name }}` is the name of your site. | ||||
| `{{ .Message }}` is either `went down` or `came back up`. | ||||
| `{{ .Status }}` is either `up` or `down`. | ||||
| `{{ .Watchdog }}` is the name of your watchdog (useful if you have multiple). | ||||
| - `{{ .Name }}` is the name of your site. | ||||
| - `{{ .Message }}` is either `went down` or `came back up`. | ||||
| - `{{ .Status }}` is either `up` or `down`. | ||||
| - `{{ .Watchdog }}` is the name of your watchdog (useful if you have multiple). | ||||
| 
 | ||||
| It refers to the name of the watch, which is "Example Site" in the sample config below. | ||||
| 
 | ||||
|  | ||||
							
								
								
									
										29
									
								
								build-all.sh
									
									
									
									
									
								
							
							
						
						
									
										29
									
								
								build-all.sh
									
									
									
									
									
								
							| @ -5,41 +5,44 @@ | ||||
| 
 | ||||
| export CGO_ENABLED=0 | ||||
| exe=watchdog | ||||
| distpre=../.. | ||||
| gocmd=. | ||||
| 
 | ||||
| echo "" | ||||
| go generate -mod=vendor ./... | ||||
| 
 | ||||
| pushd cmd/${exe} | ||||
| echo "" | ||||
| echo "Windows amd64" | ||||
| #GOOS=windows GOARCH=amd64 go build -mod=vendor -o dist/windows/amd64/${exe}.exe -ldflags "-H=windowsgui" $gocmd | ||||
| #GOOS=windows GOARCH=amd64 go build -mod=vendor -o dist/windows/amd64/${exe}.debug.exe | ||||
| GOOS=windows GOARCH=amd64 go build -mod=vendor -o dist/windows/amd64/${exe}.exe | ||||
| #GOOS=windows GOARCH=amd64 go build -mod=vendor -o ${distpre}/dist/windows/amd64/${exe}.exe -ldflags "-H=windowsgui" $gocmd | ||||
| #GOOS=windows GOARCH=amd64 go build -mod=vendor -o ${distpre}/dist/windows/amd64/${exe}.debug.exe | ||||
| GOOS=windows GOARCH=amd64 go build -mod=vendor -o ${distpre}/dist/windows/amd64/${exe}.exe | ||||
| echo "Windows 386" | ||||
| #GOOS=windows GOARCH=386 go build -mod=vendor -o dist/windows/386/${exe}.exe -ldflags "-H=windowsgui" $gocmd | ||||
| #GOOS=windows GOARCH=386 go build -mod=vendor -o dist/windows/386/${exe}.debug.exe | ||||
| GOOS=windows GOARCH=386 go build -mod=vendor -o dist/windows/386/${exe}.exe | ||||
| #GOOS=windows GOARCH=386 go build -mod=vendor -o ${distpre}/dist/windows/386/${exe}.exe -ldflags "-H=windowsgui" $gocmd | ||||
| #GOOS=windows GOARCH=386 go build -mod=vendor -o ${distpre}/dist/windows/386/${exe}.debug.exe | ||||
| GOOS=windows GOARCH=386 go build -mod=vendor -o ${distpre}/dist/windows/386/${exe}.exe | ||||
| 
 | ||||
| echo "" | ||||
| echo "Darwin (macOS) amd64" | ||||
| GOOS=darwin GOARCH=amd64 go build -mod=vendor -o dist/darwin/amd64/${exe} $gocmd | ||||
| GOOS=darwin GOARCH=amd64 go build -mod=vendor -o ${distpre}/dist/darwin/amd64/${exe} $gocmd | ||||
| 
 | ||||
| echo "" | ||||
| echo "Linux amd64" | ||||
| GOOS=linux GOARCH=amd64 go build -mod=vendor -o dist/linux/amd64/${exe} $gocmd | ||||
| GOOS=linux GOARCH=amd64 go build -mod=vendor -o ${distpre}/dist/linux/amd64/${exe} $gocmd | ||||
| echo "Linux 386" | ||||
| GOOS=linux GOARCH=386 go build -mod=vendor -o dist/linux/386/${exe} $gocmd | ||||
| GOOS=linux GOARCH=386 go build -mod=vendor -o ${distpre}/dist/linux/386/${exe} $gocmd | ||||
| 
 | ||||
| echo "" | ||||
| echo "RPi 4 (64-bit) ARMv8" | ||||
| GOOS=linux GOARCH=arm64 go build -mod=vendor -o dist/linux/armv8/${exe} $gocmd | ||||
| GOOS=linux GOARCH=arm64 go build -mod=vendor -o ${distpre}/dist/linux/armv8/${exe} $gocmd | ||||
| echo "RPi 3 B+ ARMv7" | ||||
| GOOS=linux GOARCH=arm GOARM=7 go build -mod=vendor -o dist/linux/armv7/${exe} $gocmd | ||||
| GOOS=linux GOARCH=arm GOARM=7 go build -mod=vendor -o ${distpre}/dist/linux/armv7/${exe} $gocmd | ||||
| echo "ARMv6" | ||||
| GOOS=linux GOARCH=arm GOARM=6 go build -mod=vendor -o dist/linux/armv6/${exe} $gocmd | ||||
| GOOS=linux GOARCH=arm GOARM=6 go build -mod=vendor -o ${distpre}/dist/linux/armv6/${exe} $gocmd | ||||
| echo "RPi Zero ARMv5" | ||||
| GOOS=linux GOARCH=arm GOARM=5 go build -mod=vendor -o dist/linux/armv5/${exe} $gocmd | ||||
| GOOS=linux GOARCH=arm GOARM=5 go build -mod=vendor -o ${distpre}/dist/linux/armv5/${exe} $gocmd | ||||
| 
 | ||||
| echo "" | ||||
| popd | ||||
| rsync -av ./dist/ ubuntu@rootprojects.org:/srv/www/rootprojects.org/$exe/dist/ | ||||
| # https://rootprojects.org/serviceman/dist/windows/amd64/serviceman.exe | ||||
|  | ||||
							
								
								
									
										143
									
								
								watchdog.go
									
									
									
									
									
								
							
							
						
						
									
										143
									
								
								watchdog.go
									
									
									
									
									
								
							| @ -33,6 +33,12 @@ func (s Status) String() string { | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| const ( | ||||
| 	MessageDown   = "went down" | ||||
| 	MessageUp     = "came back up" | ||||
| 	MessageHiccup = "hiccupped" | ||||
| ) | ||||
| 
 | ||||
| type Dog struct { | ||||
| 	Watchdog      string | ||||
| 	Name          string | ||||
| @ -47,15 +53,15 @@ type Dog struct { | ||||
| 	status        Status | ||||
| 	changed       bool | ||||
| 	error         error | ||||
| 	failures      int | ||||
| 	passes        int | ||||
| 	lastFailed    time.Time | ||||
| 	lastPassed    time.Time | ||||
| 	lastNotified  time.Time | ||||
| 	//failures      int | ||||
| 	//passes        int | ||||
| 	//lastFailed    time.Time | ||||
| 	//lastPassed    time.Time | ||||
| 	//lastNotified time.Time | ||||
| } | ||||
| 
 | ||||
| func New(d *Dog) *Dog { | ||||
| 	d.lastPassed = time.Now().Add(-5 * time.Minute) | ||||
| 	//d.lastPassed = time.Now().Add(-5 * time.Minute) | ||||
| 	d.status = StatusUp | ||||
| 	d.changed = false | ||||
| 	return d | ||||
| @ -70,88 +76,87 @@ func (d *Dog) Watch() { | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| // Now that I've added the ability to notify when a server is back up | ||||
| // this definitely needs some refactoring. It's bad now. | ||||
| func (d *Dog) watch() { | ||||
| 	d.Logger <- fmt.Sprintf("Check: '%s'", d.Name) | ||||
| 
 | ||||
| 	err := d.check() | ||||
| 	// This may be up or down | ||||
| 	err := d.hardcheck() | ||||
| 	if nil == err { | ||||
| 		d.Logger <- fmt.Sprintf("Up: '%s'", d.Name) | ||||
| 		// if it's down, coming up, notify | ||||
| 		if d.changed { | ||||
| 			d.notify("came back up") | ||||
| 			d.notify(MessageUp) | ||||
| 		} | ||||
| 		return | ||||
| 	} | ||||
| 
 | ||||
| 	// If being down is a change, check to see if it's just a hiccup | ||||
| 	if d.changed { | ||||
| 		time.Sleep(time.Duration(5) * time.Second) | ||||
| 
 | ||||
| 	err2 := d.check() | ||||
| 		err2 := d.softcheck() | ||||
| 		if nil != err2 { | ||||
| 			// it's really down | ||||
| 			d.Logger <- fmt.Sprintf("Down: '%s': %s", d.Name, err2) | ||||
| 		} else { | ||||
| 			// it's not really down, so reset the change info | ||||
| 			d.changed = false | ||||
| 			d.status = StatusUp | ||||
| 			// and notify of the hiccup | ||||
| 			d.Logger <- fmt.Sprintf("Hiccup: '%s': %s", d.Name, err) | ||||
| 			d.notify(MessageHiccup) | ||||
| 			return | ||||
| 		} | ||||
| 
 | ||||
| 	t := 10 | ||||
| 	for { | ||||
| 		d.recover() | ||||
| 		time.Sleep(time.Duration(t) * time.Second) | ||||
| 		// backoff | ||||
| 		t *= 2 | ||||
| 		err := d.check() | ||||
| 		if nil != err { | ||||
| 			d.Logger <- fmt.Sprintf("Unrecoverable: '%s': %s", d.Name, err) | ||||
| 	} | ||||
| 
 | ||||
| 		// We should notify if | ||||
| 		// * The status has changed | ||||
| 		// | ||||
| 	// TODO what if the server is flip-flopping rapidly? | ||||
| 	// how to rate limit? | ||||
| 	// "{{ .Server }} is on cooldown for 30 minutes" | ||||
| 		if d.changed { | ||||
| 			d.notify("went down") | ||||
| 			if StatusUp == d.status { | ||||
| 				break | ||||
| 			} | ||||
| 
 | ||||
| 	// * We've had success since the last notification | ||||
| 	// * It's been at least 5 minutes since the last notification | ||||
| 	//fiveMinutesAgo := time.Now().Add(-5 * time.Minute) | ||||
| 	//if d.lastPassed.After(d.lastNotified) && d.lastNotified.Before(fiveMinutesAgo) { | ||||
| 	//} | ||||
| 			//if !failure || d.failures >= 5 { | ||||
| 			// go back to the main 5-minute loop | ||||
| 			//	break | ||||
| 			//} | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	t := 10 | ||||
| 	for { | ||||
| 		// try to recover, then backoff exponentially | ||||
| 		d.recover() | ||||
| 		time.Sleep(time.Duration(t) * time.Second) | ||||
| 		t *= 2 | ||||
| 		if t > 120 { | ||||
| 			t = 120 | ||||
| 		} | ||||
| 
 | ||||
| func (d *Dog) check() error { | ||||
| 	previousStatus := d.status | ||||
| 
 | ||||
| 	var err error | ||||
| 	defer func() { | ||||
| 		// Are we up, or down? | ||||
| 		err := d.softcheck() | ||||
| 		if nil != err { | ||||
| 			// this is down, and we know it's down | ||||
| 			d.status = StatusDown | ||||
| 			d.failures += 1 | ||||
| 			d.lastFailed = time.Now() | ||||
| 		} else { | ||||
| 			d.status = StatusUp | ||||
| 			d.lastPassed = time.Now() | ||||
| 			d.passes += 1 | ||||
| 			d.Logger <- fmt.Sprintf("Up: '%s'", d.Name) | ||||
| 		} | ||||
| 
 | ||||
| 		// Has that changed? | ||||
| 		if previousStatus != d.status { | ||||
| 			d.changed = true | ||||
| 		} else { | ||||
| 			d.Logger <- fmt.Sprintf("Unrecoverable: '%s': %s", d.Name, err) | ||||
| 			if d.changed { | ||||
| 				d.changed = false | ||||
| 				d.notify(MessageDown) | ||||
| 			} | ||||
| 		} else { | ||||
| 			// it came back up | ||||
| 			d.status = StatusUp | ||||
| 			d.Logger <- fmt.Sprintf("Up: '%s'", d.Name) | ||||
| 			if d.changed { | ||||
| 				// and the downtime was short - just a recovery | ||||
| 				d.notify(MessageHiccup) | ||||
| 			} else { | ||||
| 				// and the downtime was some time | ||||
| 				d.notify(MessageUp) | ||||
| 			} | ||||
| 			d.changed = false | ||||
| 			break | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
| 	}() | ||||
| 
 | ||||
| func (d *Dog) softcheck() error { | ||||
| 	client := NewHTTPClient() | ||||
| 	response, err := client.Get(d.CheckURL) | ||||
| 	if nil != err { | ||||
| @ -174,7 +179,7 @@ func (d *Dog) check() error { | ||||
| 	} | ||||
| 
 | ||||
| 	if "" != d.Badwords { | ||||
| 		if !bytes.Contains(b, []byte(d.Badwords)) { | ||||
| 		if bytes.Contains(b, []byte(d.Badwords)) { | ||||
| 			err = fmt.Errorf("Down: '%s' Found for '%s'", d.Badwords, d.Name) | ||||
| 			d.Logger <- fmt.Sprintf("%s", err) | ||||
| 			d.error = err | ||||
| @ -185,6 +190,32 @@ func (d *Dog) check() error { | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| func (d *Dog) hardcheck() error { | ||||
| 	previousStatus := d.status | ||||
| 
 | ||||
| 	err := d.softcheck() | ||||
| 
 | ||||
| 	// Are we up, or down? | ||||
| 	if nil != err { | ||||
| 		d.status = StatusDown | ||||
| 		//d.failures += 1 | ||||
| 		//d.lastFailed = time.Now() | ||||
| 	} else { | ||||
| 		d.status = StatusUp | ||||
| 		//d.lastPassed = time.Now() | ||||
| 		//d.passes += 1 | ||||
| 	} | ||||
| 
 | ||||
| 	// Has that changed? | ||||
| 	if previousStatus != d.status { | ||||
| 		d.changed = true | ||||
| 	} else { | ||||
| 		d.changed = false | ||||
| 	} | ||||
| 
 | ||||
| 	return err | ||||
| } | ||||
| 
 | ||||
| func (d *Dog) recover() { | ||||
| 	if "" == d.Recover { | ||||
| 		return | ||||
| @ -214,7 +245,7 @@ func (d *Dog) recover() { | ||||
| 
 | ||||
| func (d *Dog) notify(msg string) { | ||||
| 	d.Logger <- fmt.Sprintf("Notifying the authorities of %s's status change", d.Name) | ||||
| 	d.lastNotified = time.Now() | ||||
| 	//d.lastNotified = time.Now() | ||||
| 
 | ||||
| 	for i := range d.Webhooks { | ||||
| 		name := d.Webhooks[i] | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user