303 lines
		
	
	
		
			6.8 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			303 lines
		
	
	
		
			6.8 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| package watchdog
 | |
| 
 | |
| import (
 | |
| 	"bytes"
 | |
| 	"context"
 | |
| 	"encoding/json"
 | |
| 	"fmt"
 | |
| 	"io/ioutil"
 | |
| 	"net"
 | |
| 	"net/http"
 | |
| 	"net/url"
 | |
| 	"os/exec"
 | |
| 	"strings"
 | |
| 	"time"
 | |
| )
 | |
| 
 | |
| type Dog struct {
 | |
| 	Name         string
 | |
| 	CheckURL     string
 | |
| 	Keywords     string
 | |
| 	Recover      string
 | |
| 	Webhooks     []string
 | |
| 	AllWebhooks  map[string]Webhook
 | |
| 	Logger       chan string
 | |
| 	error        error
 | |
| 	failures     int
 | |
| 	passes       int
 | |
| 	lastFailed   time.Time
 | |
| 	lastPassed   time.Time
 | |
| 	lastNotified time.Time
 | |
| }
 | |
| 
 | |
| func New(d *Dog) *Dog {
 | |
| 	d.lastPassed = time.Now().Add(-5 * time.Minute)
 | |
| 	return d
 | |
| }
 | |
| 
 | |
| func (d *Dog) Watch() {
 | |
| 	d.watch()
 | |
| 	for {
 | |
| 		// TODO set cancellable callback ?
 | |
| 		time.Sleep(5 * time.Minute)
 | |
| 		d.watch()
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func (d *Dog) watch() {
 | |
| 	d.Logger <- fmt.Sprintf("Check: '%s'", d.Name)
 | |
| 
 | |
| 	err := d.check()
 | |
| 	if nil == err {
 | |
| 		return
 | |
| 	}
 | |
| 
 | |
| 	time.Sleep(time.Duration(2) * time.Second)
 | |
| 	err2 := d.check()
 | |
| 	if nil != err2 {
 | |
| 		d.Logger <- fmt.Sprintf("Down: '%s': %s", d.Name, err2)
 | |
| 	} else {
 | |
| 		d.Logger <- fmt.Sprintf("Hiccup: '%s': %s", d.Name, err)
 | |
| 		return
 | |
| 	}
 | |
| 
 | |
| 	failure := false
 | |
| 	t := 10
 | |
| 	for {
 | |
| 		d.recover()
 | |
| 		time.Sleep(time.Duration(t) * time.Second)
 | |
| 		// backoff
 | |
| 		t *= 2
 | |
| 		err := d.check()
 | |
| 		if nil != err {
 | |
| 			d.Logger <- fmt.Sprintf("Unrecoverable: '%s': %s", d.Name, err)
 | |
| 			failure = true
 | |
| 		} else {
 | |
| 			failure = false
 | |
| 		}
 | |
| 
 | |
| 		// We should notify if
 | |
| 		// * We've had success since the last notification
 | |
| 		// * It's been at least 5 minutes since the last notification
 | |
| 		fiveMinutesAgo := time.Now().Add(-5 * time.Minute)
 | |
| 		if d.lastPassed.After(d.lastNotified) && d.lastNotified.Before(fiveMinutesAgo) {
 | |
| 			d.notify(failure)
 | |
| 		}
 | |
| 		if !failure || d.failures >= 5 {
 | |
| 			// go back to the main 5-minute loop
 | |
| 			break
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func (d *Dog) check() error {
 | |
| 	var err error
 | |
| 	defer func() {
 | |
| 		if nil != err {
 | |
| 			d.failures += 1
 | |
| 			d.lastFailed = time.Now()
 | |
| 		} else {
 | |
| 			d.lastPassed = time.Now()
 | |
| 			d.passes += 1
 | |
| 		}
 | |
| 	}()
 | |
| 
 | |
| 	client := NewHTTPClient()
 | |
| 	response, err := client.Get(d.CheckURL)
 | |
| 	if nil != err {
 | |
| 		d.error = fmt.Errorf("Connection Failure: " + err.Error())
 | |
| 		return err
 | |
| 	}
 | |
| 
 | |
| 	b, err := ioutil.ReadAll(response.Body)
 | |
| 	if nil != err {
 | |
| 		d.error = fmt.Errorf("Network Failure: " + err.Error())
 | |
| 		return err
 | |
| 	}
 | |
| 
 | |
| 	if !bytes.Contains(b, []byte(d.Keywords)) {
 | |
| 		err = fmt.Errorf("Down: '%s' Not Found for '%s'", d.Keywords, d.Name)
 | |
| 		d.Logger <- fmt.Sprintf("%s", err)
 | |
| 		d.error = err
 | |
| 		return err
 | |
| 	} else {
 | |
| 		d.Logger <- fmt.Sprintf("Up: '%s'", d.Name)
 | |
| 	}
 | |
| 
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| func (d *Dog) recover() {
 | |
| 	if "" == d.Recover {
 | |
| 		return
 | |
| 	}
 | |
| 
 | |
| 	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
 | |
| 	cmd := exec.CommandContext(ctx, "bash")
 | |
| 	pipe, err := cmd.StdinPipe()
 | |
| 	pipe.Write([]byte(d.Recover))
 | |
| 	if nil != err {
 | |
| 		d.Logger <- fmt.Sprintf("[Recover] Could not write to bash '%s': %s", d.Recover, err)
 | |
| 	}
 | |
| 	err = cmd.Start()
 | |
| 	if nil != err {
 | |
| 		d.Logger <- fmt.Sprintf("[Recover] Could not start '%s': %s", d.Recover, err)
 | |
| 	}
 | |
| 	err = pipe.Close()
 | |
| 	if nil != err {
 | |
| 		d.Logger <- fmt.Sprintf("[Recover] Could not close '%s': %s", d.Recover, err)
 | |
| 	}
 | |
| 	err = cmd.Wait()
 | |
| 	cancel()
 | |
| 	if nil != err {
 | |
| 		d.Logger <- fmt.Sprintf("[Recover] '%s' failed for '%s': %s", d.Recover, d.Name, err)
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func (d *Dog) notify(hardFail bool) {
 | |
| 	d.Logger <- fmt.Sprintf("Notifying the authorities of %s's failure", d.Name)
 | |
| 	d.lastNotified = time.Now()
 | |
| 
 | |
| 	for i := range d.Webhooks {
 | |
| 		name := d.Webhooks[i]
 | |
| 		if "" == name {
 | |
| 			continue
 | |
| 		}
 | |
| 
 | |
| 		h, ok := d.AllWebhooks[name]
 | |
| 		if !ok {
 | |
| 			// TODO check in main when config is read
 | |
| 			d.Webhooks[i] = ""
 | |
| 			d.Logger <- fmt.Sprintf("[Warning] Could not find webhook '%s' for '%s'", name, h.Name)
 | |
| 			continue
 | |
| 		}
 | |
| 
 | |
| 		d.notifyOne(h, hardFail)
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func (d *Dog) notifyOne(h Webhook, hardFail bool) {
 | |
| 	// TODO do this in main on config init
 | |
| 	if "" == h.Method {
 | |
| 		h.Method = "POST"
 | |
| 	}
 | |
| 
 | |
| 	var body *strings.Reader
 | |
| 	var err error
 | |
| 	// TODO real templates
 | |
| 	if 0 != len(h.Form) {
 | |
| 		form := url.Values{}
 | |
| 		for k := range h.Form {
 | |
| 			v := h.Form[k]
 | |
| 			// because `{{` gets urlencoded
 | |
| 			//k = strings.Replace(k, "{{ .Name }}", d.Name, -1)
 | |
| 			v = strings.Replace(v, "{{ .Name }}", d.Name, -1)
 | |
| 			d.Logger <- fmt.Sprintf("[HEADER] %s: %s", k, v)
 | |
| 			form.Set(k, v)
 | |
| 		}
 | |
| 		body = strings.NewReader(form.Encode())
 | |
| 	} else if 0 != len(h.JSON) {
 | |
| 		bodyBuf, err := json.Marshal(h.JSON)
 | |
| 		if nil != err {
 | |
| 			d.Logger <- fmt.Sprintf("[Notify] JSON Marshal Error for '%s': %s", h.Name, err)
 | |
| 			return
 | |
| 		}
 | |
| 		// `{{` should be left alone
 | |
| 		body = strings.NewReader(strings.Replace(string(bodyBuf), "{{ .Name }}", d.Name, -1))
 | |
| 	}
 | |
| 
 | |
| 	client := NewHTTPClient()
 | |
| 	req, err := http.NewRequest(h.Method, h.URL, body)
 | |
| 	if nil != err {
 | |
| 		d.Logger <- fmt.Sprintf("[Notify] HTTP Client Network Error for '%s': %s", h.Name, err)
 | |
| 		return
 | |
| 	}
 | |
| 
 | |
| 	if 0 != len(h.Form) {
 | |
| 		req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
 | |
| 	} else if 0 != len(h.JSON) {
 | |
| 		req.Header.Set("Content-Type", "application/json")
 | |
| 	}
 | |
| 
 | |
| 	if 0 != len(h.Auth) {
 | |
| 		user := h.Auth["user"]
 | |
| 		if "" == user {
 | |
| 			user = h.Auth["username"]
 | |
| 		}
 | |
| 		pass := h.Auth["pass"]
 | |
| 		if "" == user {
 | |
| 			pass = h.Auth["password"]
 | |
| 		}
 | |
| 		req.SetBasicAuth(user, pass)
 | |
| 	}
 | |
| 
 | |
| 	req.Header.Set("User-Agent", "Watchdog/1.0")
 | |
| 	for k := range h.Headers {
 | |
| 		req.Header.Set(k, h.Headers[k])
 | |
| 	}
 | |
| 
 | |
| 	resp, err := client.Do(req)
 | |
| 	if nil != err {
 | |
| 		d.Logger <- fmt.Sprintf("[Notify] HTTP Client Error for '%s': %s", h.Name, err)
 | |
| 		return
 | |
| 	}
 | |
| 
 | |
| 	if !(resp.StatusCode >= 200 && resp.StatusCode < 300) {
 | |
| 		d.Logger <- fmt.Sprintf("[Notify] Response Error for '%s': %s", h.Name, resp.Status)
 | |
| 		return
 | |
| 	}
 | |
| 
 | |
| 	// TODO json vs xml vs txt
 | |
| 	var data map[string]interface{}
 | |
| 	req.Header.Add("Accept", "application/json")
 | |
| 	decoder := json.NewDecoder(resp.Body)
 | |
| 	err = decoder.Decode(&data)
 | |
| 	if err != nil {
 | |
| 		d.Logger <- fmt.Sprintf("[Notify] Response Body Error for '%s': %s", h.Name, resp.Status)
 | |
| 		return
 | |
| 	}
 | |
| 
 | |
| 	// TODO some sort of way to determine if data is successful (keywords)
 | |
| 	d.Logger <- fmt.Sprintf("[Notify] Success? %#v", data)
 | |
| }
 | |
| 
 | |
| type Config struct {
 | |
| 	Watches  []ConfigWatch `json:"watches"`
 | |
| 	Webhooks []Webhook     `json:"webhooks"`
 | |
| }
 | |
| 
 | |
| type ConfigWatch struct {
 | |
| 	Name          string   `json:"name"`
 | |
| 	URL           string   `json:"url"`
 | |
| 	Keywords      string   `json:"keywords"`
 | |
| 	Webhooks      []string `json:"webhooks"`
 | |
| 	RecoverScript string   `json:"recover_script"`
 | |
| }
 | |
| 
 | |
| type Webhook struct {
 | |
| 	Name    string              `json:"name"`
 | |
| 	Method  string              `json:"method"`
 | |
| 	URL     string              `json:"url"`
 | |
| 	Auth    map[string]string   `json:"auth"`
 | |
| 	Headers map[string]string   `json:"headers"`
 | |
| 	Form    map[string]string   `json:"form"`
 | |
| 	JSON    map[string]string   `json:"json"`
 | |
| 	Config  map[string]string   `json:"config"`
 | |
| 	Configs []map[string]string `json:"configs"`
 | |
| }
 | |
| 
 | |
| // The default http client uses unsafe defaults
 | |
| func NewHTTPClient() *http.Client {
 | |
| 	transport := &http.Transport{
 | |
| 		Dial: (&net.Dialer{
 | |
| 			Timeout: 10 * time.Second,
 | |
| 		}).Dial,
 | |
| 		TLSHandshakeTimeout: 5 * time.Second,
 | |
| 	}
 | |
| 	client := &http.Client{
 | |
| 		Timeout:   time.Second * 5,
 | |
| 		Transport: transport,
 | |
| 	}
 | |
| 	return client
 | |
| }
 |