#!../tclnm
#
# tcl_snmp: demo_00 (1.01 27jan93 phk@data.fls.dk)
# 
# Watch a number of hosts, report when they disappear, reappear and reboot
# ------------------------------------------------------------------------
#
# This is the 1st thing I'd wanted my Sun Net Manager to do;  I't couldn't.
#
# This program watches system.sysUpTime to see if the host reboots. 
# In addition it will complain if no response is received.
#
# The hosts to watch are listed in the file $CF, one at a line.  When
# a change in modification time of the file $CF is detected, it is read again.
# This feature takes twice as many lines as the actual monitoring...
# 
# You can modify the 4 routines at the top to suit your needs for action.
# at present it will print a log on stdout, and send events to syslogd(8)
# together with a status every 15 minutes if something is wrong.
#
# CF 	is the name of the configuration file. I should contain hostnames
#	of the targets one per line, #-comments are allowed.
# 
# DF	is the name of the DBM-database.  Using a DBM base ensures no loss
#	of state just because the monitor (this program) reboots.
#
# DT	number of seconds between status messages
#
# SL	is the tag to use for syslog(1) (local.* is hardcoded...)
#
# PAUSE	is the number of seconds between scans..

set CF 		demo_00.conf
set DF 		demo_00.dbm
set DT 		900
set SL		DEMO_00
set PAUSE	15

##############################################################################

set last_stat -1

proc status args {
    global last_stat DT SL
    set l [getclock]
    if {($l / $DT) > $last_stat} {
	puts stdout "STAT:  [fmtclock $l {%Y%m%d %H%M%S}] $args"
	flush stdout
	if { $SL != "" } { exec logger -t $SL -p local0.crit not OK: $args }
	set last_stat [expr {$l / $DT}]
    }
}

proc event args {
    global SL
    set l [getclock] 
    puts stdout "EVENT: [fmtclock $l {%Y%m%d %H%M%S}] $args"
    flush stdout
    if { $SL != "" } { exec logger -t $SL -p local0.crit "$args" }
}

proc log args {
    global SL
    puts stdout "LOG:   $args"
    flush stdout
    if { $SL != "" } { exec logger -t $SL -p local0.notice "$args" }
}

proc notice args {
    #puts stdout "NOTICE:   $args"
}

set modtime 0
set host_list ""

# Since it's the same request again and again we use a PDU:
set pdu [snmp mkpdu system.sysUpTime.0]

while {1} {
    set d [dbm open $DF wc]
    set downs ""
    file stat $CF st

    if {$modtime != $st(mtime)} {

        # Config-file changed, (re)read it.
	log read config file "<$CF>" 

	# remember who we knew before
	catch {unset k()}
	foreach i $host_list { set k($i) 0 }	
	set o $host_list
	set host_list ""

	set f [open $CF r]
	while {-1 != [gets $f ss]} {

	    # get rid of comments
	    set s $ss
	    regsub {#.*} $ss {} s
	    if { $s == "" } continue

	    if {[catch {set k($s)}]} { 

		# a new one, open snmp ...
		set z [catch "snmp open $s" y]
		if {$z == 0} { 
		    set k($s) 1
		    set sd($s) $y 
		    set st($s) Unknown
		    set ut($s) 0
		    lappend host_list $s
		    log now watching $s too
		} else {
		    log new host $s: couldn't open
		}
	    } else { 

		# was there before too
		lappend host_list $s
		set k($s) 1
	    }
	}
	foreach i $o { 
	    if {$k($i) == 0} {

		# this one went away
		log leaving $i alone
		#TODO snmp close $sd($i)
		dbm delete $d $i
	    }
	}
	# If a host is removed from the config-file while we're not running
	# we must flush it from the dbm-file...
	set o ""
	dbm forall $d i {
	    if {[catch {set k($i)}] } {
		 lappend o $i
	    } else {
		 if {$k($i) != 1} {
		     lappend o $i
		 }
	    }
	}
	foreach i $o { 
	    notice cleaning $i
	    dbm delete $d $i
	}
	catch {unset k()}
	catch {unset o}

	close $f
	set modtime $st(mtime)
    }

    # Now do the real job...
    foreach i $host_list {
	set x ""
	set s [snmp get $sd($i) -v -P $pdu x]
	notice H:$i S:$s U:$x O:$ut($i)
	if { $s } { # No response
	    if { $st($i) != "Down" } {
		event $i went away...
	    }
	    lappend downs $i
	    set st($i) Down
	    dbm store $d $i "$ut($i) $st($i)"
	    continue
	}
	if { $ut($i) > $x } {
	    event $i rebooted 
	    set st($i) Up
	    set ut($i) $x
	    dbm store $d $i "$ut($i) $st($i)"
	    continue
	}
	set ut($i) $x
	if {$st($i) == "Down"} {
	    event $i came back again...
	    set st($i) Up
	    dbm store $d $i "$ut($i) $st($i)"
	    continue;
	}
	set st($i) Up
	dbm store $d $i "$ut($i) $st($i)"
    }
    dbm close $d
    if {$downs != ""} {
	status $downs
    }
    sleep $PAUSE
    }

exit 0
