#! /usr/local/bin/scotty -inf
##
## Simple IP Monitor Tools for [TK]INED.
##
## Copyright (c) 1993, 1994
##                    J. Schoenwaelder
##                    TU Braunschweig, Germany
##                    Institute for Operating Systems and Computer Networks
##
## Permission to use, copy, modify, and distribute this
## software and its documentation for any purpose and without
## fee is hereby granted, provided that this copyright
## notice appears in all copies.  The University of Braunschweig
## makes no representations about the suitability of this
## software for any purpose.  It is provided "as is" without
## express or implied warranty.
##

set intervall 10
set clone true
set clone_ids ""

##
## Delete an element from a list.
##

proc ldelete {list element} {
    upvar $list mylist
    set result ""
    foreach e $mylist {
	if {$e != $element} { lappend result $e }
    }
    return $result
}


## Get the IP Address of a node. Query the name server, if the
## address attribute is not set to something that looks like a
## valid IP address.

proc Get_IP_Address {node} {
    if {[ined_type $node]=="NODE"} {
        set host [ined_name $node]
        set ip [ined_address $node]
        if {[regexp "^\[0-9\]+\.\[0-9\]+\.\[0-9\]+\.\[0-9\]+$" $ip]>0} {
            return $ip
        }
        if {[catch {nslook $host} ip]==0} {
            return [lindex $ip 0]
        }
    }
    return ""
}

##
## Clone a node object. Return the new id.
##

proc clone {id {offset_x 20} {offset_y 20}} {

    global clone
    global clone_ids

    if {$clone != "true"} { return $id }

    set new [ined -noupdate create NODE]
    ined -noupdate name  $new   [ined -noupdate name $id]
    ined -noupdate color $new   [ined -noupdate color $id]
    ined -noupdate font  $new   [ined -noupdate font $id]
    ined -noupdate label $new   [ined -noupdate label $id]
    ined -noupdate icon  $new   [ined -noupdate icon $id]
    set xy [ined -noupdate move $id]
    set x [expr {[lindex $xy 0]+$offset_x}]
    set y [expr {[lindex $xy 1]+$offset_y}]
    ined -noupdate move $new $x $y

    lappend clone_ids $new

    return $new
}

##
## Check if all ined objects given by ids are still there.
## Modify the current job if any objects have neeb removed and
## kill the job, if no ids have been left.
##

proc check {cmd ids} {

    global ined_curjob_id
    global ined_curjob_itv

    set new_ids ""
    foreach id $ids {
        if {[ined retrieve $id] != ""} {
            lappend new_ids $id
        }
    }

    if {$new_ids != $ids} {
        if {$new_ids == ""} {
            ined_kill_job $ined_curjob_id
	    return 
        } else {
            ined_modify_job $ined_curjob_id [list $cmd $new_ids] \
		$ined_curjob_itv
        }
    }

    return $new_ids
}

##
## Extract all nodes of the given object list with a valid IP
## address that are reachable. Return a list of id/ip pairs.
##

proc extract {list} {

    set id_ip_list ""

    foreach comp $list {
        if {[ined_type $comp] == "NODE"} {
            set id [ined_id $comp]
            set host [lindex [ined_name $comp] 0]
            set ip [Get_IP_Address $comp]
            if {$ip == ""} {
                ined acknowledge "Can not lookup IP Address for $host."
                continue
            }
	    catch {
		if {[lindex [join [icmp $ip]] 1] < 0} {
		    ined acknowledge "$host \[$ip\] unreachable."
		    continue
		}
            }
	    lappend id_ip_list [list $id $ip]
        }
    }

    return $id_ip_list
}

##
## Compute the diff between two rstat calls.
##

proc rstat_diff {l1 l2 period} {
    set len [llength $l1]
    set res ""
    for {set i "0"} {$i < $len} {incr i} {
        set el1 [lindex $l1 $i]
        set el2 [lindex $l2 $i]
        set tmp [lindex $el1 2]
        if {[lindex $el1 1] == "Counter"} {
            set tmp [expr {[lindex $el1 2]-[lindex $el2 2]}]
	    if {$period <= 0} {
		set tmp 0
	    } else {
		set tmp [expr {"$tmp.0" / $period}]
	    }
	}
        if {[lindex $el1 1] == "Gauge"} {
            set tmp [expr {[lindex $el1 2]/256.0}]
        }
        lappend res [format "%-12s %-12s %16.3f" \
		     [lindex $el1 0] [lindex $el1 1] $tmp]
    }
    return $res
}

##
## Flash the color of the object given by id having the default color
## color. Put the text txt in the label and clear it the next time.
## This makes flashing possible even on monochrome displays.
##

proc flash {id color txt} {
    if {[ined color $id] == $color} {
	if {$color != "Red"} {
	    ined -noupdate color $id Red
	} else {
	    ined -noupdate color $id Yellow
	}
	ined label $id text $txt
    } else {
	ined -noupdate color $id $color
	ined label $id clear
    }
}

##
## Send an ICMP request periodically to test if the selected nodes
## are reachable. This routine makes use of the multi threaded ping 
## mechanism.
##

proc reachability {ids} {

    global reachability_ip
    global reachability_stat

    set ids [check reachability $ids]
    if {$ids == ""} return

    set id_list ""
    set ip_list ""

    foreach id $ids {
	set ip $reachability_ip($id)
	lappend id_list $id
	lappend ip_list $ip
    }
	
    if {$ip_list != ""} {
	set icmp_result [icmp -delay 100 -size 0 -timeout 5 $ip_list]
	set i 0
	foreach id $id_list {
	    set rtt [lindex [lindex $icmp_result $i] 1]
	    set host [lindex [ined name $id] 0]
	    set color [lindex $reachability_stat($id) 1]
	    if {$rtt < 0} {
		if {[lindex $reachability_stat($id) 0] == "ok"} {
		    if {$color != "Red"} {
			ined -noupdate color $id Red
		    } else {
			ined -noupdate color $id Yellow
		    }
		    ined label $id text "$host unreachable"
		    set reachability_stat($id) [list notok $color]
		}
	    } else {
		if {[lindex $reachability_stat($id) 0] == "notok"} {
		    ined -noupdate color $id $color
		    ined label $id name
		    set reachability_stat($id) [list ok $color]
		}
	    }
	    incr i
	}
    }
}

##
## Send an ICMP request periodically to test if the selected nodes
## are reachable. This s the actual proc that is invoked from the 
## user interface.
##

proc "check reachability" {list} {

    global intervall
    global reachability_ip
    global reachability_stat

    foreach id_ip [extract $list] {
	set id [lindex $id_ip 0]
	set ip [lindex $id_ip 1]

	set reachability_ip($id)   $ip
	set reachability_stat($id) [list ok [ined color $id]]
	lappend ids $id
    }

    if {[info exists ids]} {
	ined_create_job [list reachability $ids] $intervall
    }
}

##
## Display the round trip time in a strip chart.
##

proc pingtime {ids} {

    global pingtime_ip

    set ids [check pingtime $ids]
    if {$ids == ""} return

    set id_list ""
    set ip_list ""

    foreach id $ids {
	set ip $pingtime_ip($id)
	lappend id_list $id
	lappend ip_list $ip
    }
	
    if {$ip_list != ""} {
	set icmp_result [icmp -delay 100 -size 0 -timeout 10 $ip_list]
	set i 0
	foreach id $id_list {
	    set rtt [lindex [lindex $icmp_result $i] 1]
	    set host [lindex [ined name $id] 0]
	    if {$rtt < 0} {
		ined stripchart $id 0
		ined label $id text "$host unreachable"
	    } else {
		ined stripchart $id [expr {$rtt/10}]
		ined label $id text "rtt $rtt ms"
	    }
	    incr i
	}
    }
}

##
## Display the round trip time in a strip chart. This 
## is the actual proc that is invoked from the user interface.
##

proc "round trip time" {list} {

    global intervall
    global pingtime_ip

    foreach id_ip [extract $list] {
	set id [lindex $id_ip 0]
	set ip [lindex $id_ip 1]

	set id [clone $id 35 25]
	ined stripchart $id create

	set pingtime_ip($id) $ip
	lappend ids $id
    }

    if {[info exists ids]} {
	ined_create_job [list pingtime $ids] $intervall
    }
}

##
## Show the load given by the rstat RPC in a stripchart.
##

proc sysload {ids} {

    global clone
    global sysload_ip
    global sysload_time
    global sysload_stat

    set ids [check sysload $ids]
    if {$ids == ""} return

    foreach id $ids {
	set ip $sysload_ip($id)

	if {[catch {rpc stat $ip} res]} {
	    set ids [ldelete ids $id]
	    ined stripchart $id delete
	    if {$clone == "true"} {
		ined delete $id
	    }
	    continue
	}
	
	set now [getclock]
	set time_diff [expr {$now - $sysload_time($id)}]
	set rstat [rstat_diff $res $sysload_stat($id) $time_diff]

	set sysload_stat($id) $res
	set sysload_time($id) $now
	set load [lindex [lindex $rstat 18] 2]
	set load [expr ($load)*100]
	ined stripchart $id $load
    }
}

##
## Show the load given by the rstat RPC in a stripchart. This 
## is the actual proc that is invoked from the user interface.
##

proc "system load" {list} {

    global intervall
    global sysload_ip
    global sysload_time
    global sysload_stat

    foreach id_ip [extract $list] {
	set id [lindex $id_ip 0]
	set ip [lindex $id_ip 1]

	if {[catch {rpc stat $ip} res]} continue
	set id [clone $id 35 25]
	ined stripchart $id create
	ined label $id text "load"

	set sysload_ip($id)   $ip
	set sysload_stat($id) $res
	set sysload_time($id) [expr {[getclock]-$intervall}]

	lappend ids $id
    }

    if {[info exists ids]} {
	ined_create_job [list sysload $ids] $intervall
    }
}

##
## Show the cpu time split given by the rstat RPC in a barchart.
##

proc cpusplit {ids} {

    global clone
    global cpusplit_ip
    global cpusplit_time
    global cpusplit_stat

    set ids [check cpusplit $ids]
    if {$ids == ""} return

    foreach id $ids {
	set ip $cpusplit_ip($id)

	if {[catch {rpc stat $ip} res]} {
	    set ids [ldelete ids $id]
	    ined stripchart $id delete
	    if {$clone == "true"} {
		ined delete $id
	    }
	    continue
	}
	
	set now [getclock]
	set time_diff [expr {$now - $cpusplit_time($id)}]
	set rstat [rstat_diff $res $cpusplit_stat($id) $time_diff]

	set cpusplit_stat($id) $res
	set cpusplit_time($id) $now

	set load ""
	foreach idx "0 1 2 3" {
	    lappend load [lindex [lindex $rstat $idx] 2]
	}
	ined barchart $id $load
    }
}

##
## Show the cpu time split given by the rstat RPC in a barchart. This
## is the actual proc that is invoked from the user interface.
##

proc "cpu activity" {list} {

    global intervall
    global cpusplit_ip
    global cpusplit_time
    global cpusplit_stat

    foreach id_ip [extract $list] {
	set id [lindex $id_ip 0]
	set ip [lindex $id_ip 1]

	if {[catch {rpc stat $ip} res]} continue
	set id [clone $id 35 -35]
	ined barchart $id create
	ined label $id text "user nice system idle"

	set cpusplit_ip($id)   $ip
	set cpusplit_stat($id) $res
	set cpusplit_time($id) [expr {[getclock]-$intervall}]

	lappend ids $id
    }

    if {[info exists ids]} {
	ined_create_job [list cpusplit $ids] $intervall
    }
}

##
## Show the disk activity given by the rstat RPC in a barchart.
##

proc diskload {ids} {

    global clone
    global diskload_ip
    global diskload_time
    global diskload_stat

    set ids [check diskload $ids]
    if {$ids == ""} return

    foreach id $ids {
	set ip $diskload_ip($id)

	if {[catch {rpc stat $ip} res]} {
	    set ids [ldelete ids $id]
	    ined stripchart $id delete
	    if {$clone == "true"} {
		ined delete $id
	    }
	    continue
	}
	
	set now [getclock]
	set time_diff [expr {$now - $diskload_time($id)}]
	set rstat [rstat_diff $res $diskload_stat($id) $time_diff]

	set diskload_stat($id) $res
	set diskload_time($id) $now

	set load ""
	foreach idx "4 5 6 7 8 9 10 11" {
	    lappend load [expr {2*[lindex [lindex $rstat $idx] 2]}]
	}
	ined barchart $id $load
    }
}

##
## Show the disk activity given by the rstat RPC in a barchart. This
## is the actual proc that is invoked from the user interface.
##

proc "disk activity" {list} {

    global intervall
    global diskload_ip
    global diskload_time
    global diskload_stat

    foreach id_ip [extract $list] {
	set id [lindex $id_ip 0]
	set ip [lindex $id_ip 1]

	if {[catch {rpc stat $ip} res]} continue
	set id [clone $id -35 40]
	ined barchart $id create
	ined label $id text "d0 d1 d2 d3 pi po si so"

	set diskload_ip($id)   $ip
	set diskload_stat($id) $res
	set diskload_time($id) [expr {[getclock]-$intervall}]

	lappend ids $id
    }

    if {[info exists ids]} {
	ined_create_job [list diskload $ids] $intervall
    }
}

##
## Show the disk activity given by the rstat RPC in a barchart.
##

proc ifload {ids} {

    global clone
    global ifload_ip
    global ifload_time
    global ifload_stat

    set ids [check ifload $ids]
    if {$ids == ""} return

    foreach id $ids {
	set ip $ifload_ip($id)

	if {[catch {rpc stat $ip} res]} {
	    set ids [ldelete ids $id]
	    ined stripchart $id delete
	    if {$clone == "true"} {
		ined delete $id
	    }
	    continue
	}
	
	set now [getclock]
	set time_diff [expr {$now - $ifload_time($id)}]
	set rstat [rstat_diff $res $ifload_stat($id) $time_diff]

	set ifload_stat($id) $res
	set ifload_time($id) $now

	set load ""
	foreach idx "14 15 16 17" {
	    lappend load [expr {[lindex [lindex $rstat $idx] 2]}]
	}
	ined barchart $id $load
    }
}

##
## Show the interface activity given by the rstat RPC in a barchart. This
## is the actual proc that is invoked from the user interface.
##

proc "interface activity" {list} {

    global intervall
    global ifload_ip
    global ifload_time
    global ifload_stat

    foreach id_ip [extract $list] {
	set id [lindex $id_ip 0]
	set ip [lindex $id_ip 1]

	if {[catch {rpc stat $ip} res]} continue
	set id [clone $id -35 -40]
	ined barchart $id create
	ined label $id text "in ierr out oerr"

	set ifload_ip($id)   $ip
	set ifload_stat($id) $res
	set ifload_time($id) [expr {[getclock]-$intervall}]

	lappend ids $id
    }

    if {[info exists ids]} {
	ined_create_job [list ifload $ids] $intervall
    }
}

##
## Show the ethernet load as reported by an etherstatd.
##

proc etherload {ids} {

    global clone
    global etherload_ip

    set ids [check etherload $ids]
    if {$ids == ""} return

    foreach id $ids {
	set ip $etherload_ip($id)

	if {[catch {rpc ether $ip} load]} {
	    set ids [ldelete ids $id]
	    ined barchart $id delete
	    if {$clone == "true"} {
                ined delete $id
            }
	    continue
	}

	set time_ms [lindex [lindex $load 0] 2]
	set bytes   [lindex [lindex $load 1] 2]
	set packets [lindex [lindex $load 2] 2]
	set mbits   [expr {$bytes*8/1000000.0}]
	set time_s  [expr {$time_ms/1000.0}]
	set mbits_per_s   [expr {$mbits/$time_s}]
	set packets_per_s [expr {$packets/$time_s}]
	set bytes_per_s   [expr {$bytes*1.0/$packets}]
	ined barchart $id \
	    [expr {$mbits_per_s*12.5}] \
	    [expr {$packets_per_s/20}] \
	    [expr {$bytes_per_s/10}]
	ined label $id text \
	    [format "%3.2f MBit/s  %4.1f Packet/s %4.1f Bytes/Packet" \
	     $mbits_per_s $packets_per_s $bytes_per_s]
    }
}

##
## Show the ethernet load as reported by an etherstatd. This
## is the actual proc that is invoked from the user interface.
##

proc "ethernet load" {list} {

    global intervall
    global etherload_ip

    foreach id_ip [extract $list] {
	set id [lindex $id_ip 0]
	set ip [lindex $id_ip 1]

	if {![catch {rpc ether open $ip}]} {
	    set id [clone $id -40 -25]
	    ined barchart $id create
	    set etherload_ip($id)   $ip
	    lappend ids $id
	}
    }

    if {[info exists ids]} {
	ined_create_job [list etherload $ids] $intervall
    }
}

##
## Display the jobs currently running.
##

proc "monitor job info" {list} {

    set jobs [ined_list_jobs]

    if {$jobs == ""} {
	ined acknowledge "Sorry, no jobs available."
	return
    }
    
    set result ""
    set len 0
    foreach job $jobs {

	set jobcmd [lindex $job 1]

	# Convert the id's to hostnames for readability.
	
	set hosts ""
	foreach id [lindex $jobcmd 1] {
	    lappend hosts [lindex [ined name $id] 0]
	}
	if {[llength $hosts] > 3} {
	    set hosts "[lrange $hosts 0 2] ..."
	}
	
	set line \
	     [format "%3d %6.1f %6.1f %3d %8s %s %s" \
	     [lindex $job 0] [lindex $job 2] [lindex $job 3] \
	     [lindex $job 4] [lindex $job 5] [lindex $jobcmd 0] $hosts ]

	if {[string length $line] > $len} {
	    set len [string length $line]
	}

	lappend result $line
    }

    set header " ID   INTV   REM  CNT  STATUS    CMD"

    for {set i [string length $header]} {$i < $len} {incr i} {
	append header " "
    }

    ined browse $header $result

}

##
## Modify the state or the intervall of a running job.
##

proc "modify monitor job" {list} {

    # Ask for the job to modify.

    for {
	set jobnr [ined_select_job modify]
    } {
	$jobnr != ""
    } {
	set jobnr [ined_select_job modify]
    } {

	# Get the details about the selected job.

	set jobcmd ""
	set jobitv ""
	foreach job [ined_list_jobs] {
	    if {$jobnr == [lindex $job 0]} {
		set jobcmd [lindex $job 1]
		set jobitv [lindex $job 2]
		set jobstat [lindex $job 5]
	    }
	}
	
	# Convert the id's to hostnames for readability.
	
	set hosts ""
	foreach id [lindex $jobcmd 1] {
	    lappend hosts [lindex [ined -noupdate name $id] 0]
	}
	if {[llength $hosts] > 3} {
	    set hosts "[lrange $hosts 0 2] ..."
	}
	
	# Request for changes.
	
	set res [ined request "Modify job 0 ([lindex $jobcmd 0] on $hosts)" \
		 "{{Intervaltime \[s\]:} $jobitv scale 1 120} \
                  {{Job Status:} $jobstat radio waiting suspend} "]
	if {$res == ""} continue

	set jobitv  [lindex $res 0]
	set jobstat [lindex $res 1]
	
	if {$jobstat == "waiting"} { ined_resume_job  $jobnr }
	if {$jobstat == "suspend"} { ined_suspend_job $jobnr }
	
	ined_modify_job $jobnr $jobcmd $jobitv
    }
}

##
## This simple dialog allows us to modify the monitor parameters.
##

proc "set parameter" {list} {
    global intervall clone

    set result [ined request "Monitor Parameter" \
	" { {Intervall \[s\]:} $intervall scale 1 120} \
	  { {Clone nodes:}   $clone radio true false} "]

    if {$result==""} return

    set intervall [lindex $result 0]
    set clone     [lindex $result 1]

    if {$intervall<1} { set intervall 1 }
}

##
## Display some help about this tool.
##

proc "help IP Monitor" {list} {
    ined browse "Help about IP Monitor" {
	"check reachability:" 
	"    This command will periodically send an echo ICMP request to the" 
	"    selected hosts and takes action when a host gets unreachable." 
	"" 
	"round trip time:" 
	"    Send an ICMP echo request to the selected hosts and display the" 
	"    round trip time in a stripchart." 
	"" 
	"system load:" 
	"    Query the rstat daemon of the selected hosts and display the" 
	"    system load in stripchart." 
	"" 
	"cpu split:" 
	"    Query the rstat daemon of the selected hosts and display the" 
	"    the cpu split (user nice system idle) in a bar chart." 
	"" 
	"disk activity:" 
	"    Query the rstat daemon of the selected hosts and display the" 
        "    the disk activity in a bar chart. The first four bars show the" 
	"    activity of disk 0 to 3. The fifth and sixth bar display the " 
	"    paging activity and the last two bars show the swapping load." 
	"" 
	"ethernet load:" 
	"    Query the etherstat daemon of the selected hosts and display" 
	"    the ethernet load, the number of packets and the average packet" 
	"    size over the selected time intervall." 
	"" 
	"monitor job info:" 
	"    This command display information about all monitoring jobs" 
	"    started by this monitor script." 
	"" 
	"modify monitor job:" 
	"    Select one of the monitoring jobs and modify it. You can change" 
	"    the sampling interval and switch the state from waiting (which" 
	"    means active) to suspended." 
	"" 
	"set parameter:" 
	"    This dialog allows you to set the sampling interval and " 
	"    whether the monitoring commands should clone the selected " 
	"    nodes to display the status in a graphical chart." 
    }
}

##
## Delete the tools created by this interpreter.
##

proc "delete IP Monitor" {list} {

    global tools
    global clone_ids

    if {[ined_list_jobs] != ""} {
	if {[ined confirm "Kill running monitoring jobs?"] != "yes"} return
    }

    foreach id $tools { ined delete $id }    
    foreach id $clone_ids { ined delete $id }
    exit
}

set tools [ ined create TOOL "IP Monitor" \
	    "check reachability" "round trip time" "" \
	    "system load" "cpu activity" \
	    "disk activity" "interface activity" "" \
	    "ethernet load" "" \
            "monitor job info" "modify monitor job" "" \
	    "set parameter" "" \
	    "help IP Monitor" "delete IP Monitor" ]
