[dhcpmon] Monitor Mgmt Interface For DHCP Packets (#5317)

When BGP routes are missing, DHCP packets get relayed over mgmt
interface. This results in dhcpmon alerting that DHCP packets are
not being relayed. This is PR include mgmt interface as uplink
device, and so, if DHCP packet gets relayed over mgmt interface,
regular dhcpmon alert will not be issues. Instead, dhcpmon will
check the mgmt interface counts and issue a separate alert regarding
packets travelling through mgmt network.

In addition, this PR includes the following enhancements:
1. Add SIGUSR1 handler that prints out current packet counts
2. Increase alert grace window to 3 minutes from currently 2 minutes
3. Time is now computed more accurately
4. Print vlan name before counters

signed-off-by: Tamer Ahmed <tamer.ahmed@microsoft.com>
This commit is contained in:
Tamer Ahmed 2020-09-09 18:37:01 -07:00 committed by Ying Xie
parent 083607f4d1
commit 44d6e03df3
8 changed files with 463 additions and 174 deletions

View File

@ -105,6 +105,11 @@ command=/usr/sbin/dhcpmon -id {{ vlan_name }}
{% for (name, prefix) in PORTCHANNEL_INTERFACE %}
{% if prefix | ipv4 %} -iu {{ name }}{% endif -%}
{% endfor %}
{% if MGMT_INTERFACE %}
{% for (name, prefix) in MGMT_INTERFACE|pfx_filter %}
{% if prefix | ipv4 %} -im {{ name }}{% endif %}
{% endfor %}
{% endif %}
priority=4
autostart=false

View File

@ -8,6 +8,7 @@
#include <errno.h>
#include <string.h>
#include <stdlib.h>
#include <stdbool.h>
#include <net/ethernet.h>
#include <netinet/ip.h>
#include <netinet/udp.h>
@ -22,6 +23,9 @@
#include "dhcp_device.h"
/** Counter print width */
#define DHCP_COUNTER_WIDTH 10
/** Start of Ether header of a captured frame */
#define ETHER_START_OFFSET 0
/** Start of IP header of a captured frame */
@ -35,21 +39,6 @@
/** Offset of DHCP GIADDR */
#define DHCP_GIADDR_OFFSET 24
/**
* DHCP message types
**/
typedef enum
{
DHCP_MESSAGE_TYPE_DISCOVER = 1,
DHCP_MESSAGE_TYPE_OFFER = 2,
DHCP_MESSAGE_TYPE_REQUEST = 3,
DHCP_MESSAGE_TYPE_DECLINE = 4,
DHCP_MESSAGE_TYPE_ACK = 5,
DHCP_MESSAGE_TYPE_NAK = 6,
DHCP_MESSAGE_TYPE_RELEASE = 7,
DHCP_MESSAGE_TYPE_INFORM = 8
} dhcp_message_type;
#define OP_LDHA (BPF_LD | BPF_H | BPF_ABS) /** bpf ldh Abs */
#define OP_LDHI (BPF_LD | BPF_H | BPF_IND) /** bpf ldh Ind */
#define OP_LDB (BPF_LD | BPF_B | BPF_ABS) /** bpf ldb Abs*/
@ -94,17 +83,21 @@ static struct sock_fprog dhcp_sock_bfp = {
.len = sizeof(dhcp_bpf_code) / sizeof(*dhcp_bpf_code), .filter = dhcp_bpf_code
};
/** global aggregate counter for DHCP interfaces */
static dhcp_device_counters_t glob_counters[DHCP_DIR_COUNT] = {
[DHCP_RX] = {.discover = 0, .offer = 0, .request = 0, .ack = 0},
[DHCP_TX] = {.discover = 0, .offer = 0, .request = 0, .ack = 0},
/** Aggregate device of DHCP interfaces. It contains aggregate counters from
all interfaces
*/
static dhcp_device_context_t aggregate_dev = {0};
/** Monitored DHCP message type */
static dhcp_message_type_t monitored_msgs[] = {
DHCP_MESSAGE_TYPE_DISCOVER,
DHCP_MESSAGE_TYPE_OFFER,
DHCP_MESSAGE_TYPE_REQUEST,
DHCP_MESSAGE_TYPE_ACK
};
/** global aggregate counter snapshot for DHCP interfaces */
static dhcp_device_counters_t glob_counters_snapshot[DHCP_DIR_COUNT] = {
[DHCP_RX] = {.discover = 0, .offer = 0, .request = 0, .ack = 0},
[DHCP_TX] = {.discover = 0, .offer = 0, .request = 0, .ack = 0},
};
/** Number of monitored DHCP message type */
static uint8_t monitored_msg_sz = sizeof(monitored_msgs) / sizeof(*monitored_msgs);
/**
* @code handle_dhcp_option_53(context, dhcp_option, dir, iphdr, dhcphdr);
@ -128,42 +121,29 @@ static void handle_dhcp_option_53(dhcp_device_context_t *context,
in_addr_t giaddr;
switch (dhcp_option[2])
{
// DHCP messages send by client
case DHCP_MESSAGE_TYPE_DISCOVER:
giaddr = ntohl(dhcphdr[DHCP_GIADDR_OFFSET] << 24 | dhcphdr[DHCP_GIADDR_OFFSET + 1] << 16 |
dhcphdr[DHCP_GIADDR_OFFSET + 2] << 8 | dhcphdr[DHCP_GIADDR_OFFSET + 3]);
context->counters[dir].discover++;
if ((context->vlan_ip == giaddr && context->is_uplink && dir == DHCP_TX) ||
(!context->is_uplink && dir == DHCP_RX && iphdr->ip_dst.s_addr == INADDR_BROADCAST)) {
glob_counters[dir].discover++;
}
break;
case DHCP_MESSAGE_TYPE_OFFER:
context->counters[dir].offer++;
if ((context->vlan_ip == iphdr->ip_dst.s_addr && context->is_uplink && dir == DHCP_RX) ||
(!context->is_uplink && dir == DHCP_TX)) {
glob_counters[dir].offer++;
}
break;
case DHCP_MESSAGE_TYPE_REQUEST:
giaddr = ntohl(dhcphdr[DHCP_GIADDR_OFFSET] << 24 | dhcphdr[DHCP_GIADDR_OFFSET + 1] << 16 |
dhcphdr[DHCP_GIADDR_OFFSET + 2] << 8 | dhcphdr[DHCP_GIADDR_OFFSET + 3]);
context->counters[dir].request++;
if ((context->vlan_ip == giaddr && context->is_uplink && dir == DHCP_TX) ||
(!context->is_uplink && dir == DHCP_RX && iphdr->ip_dst.s_addr == INADDR_BROADCAST)) {
glob_counters[dir].request++;
}
break;
case DHCP_MESSAGE_TYPE_ACK:
context->counters[dir].ack++;
if ((context->vlan_ip == iphdr->ip_dst.s_addr && context->is_uplink && dir == DHCP_RX) ||
(!context->is_uplink && dir == DHCP_TX)) {
glob_counters[dir].ack++;
}
break;
case DHCP_MESSAGE_TYPE_DECLINE:
case DHCP_MESSAGE_TYPE_NAK:
case DHCP_MESSAGE_TYPE_RELEASE:
case DHCP_MESSAGE_TYPE_INFORM:
giaddr = ntohl(dhcphdr[DHCP_GIADDR_OFFSET] << 24 | dhcphdr[DHCP_GIADDR_OFFSET + 1] << 16 |
dhcphdr[DHCP_GIADDR_OFFSET + 2] << 8 | dhcphdr[DHCP_GIADDR_OFFSET + 3]);
if ((context->vlan_ip == giaddr && context->is_uplink && dir == DHCP_TX) ||
(!context->is_uplink && dir == DHCP_RX && iphdr->ip_dst.s_addr == INADDR_BROADCAST)) {
context->counters[DHCP_COUNTERS_CURRENT][dir][dhcp_option[2]]++;
aggregate_dev.counters[DHCP_COUNTERS_CURRENT][dir][dhcp_option[2]]++;
}
break;
// DHCP messages send by server
case DHCP_MESSAGE_TYPE_OFFER:
case DHCP_MESSAGE_TYPE_ACK:
case DHCP_MESSAGE_TYPE_NAK:
if ((context->vlan_ip == iphdr->ip_dst.s_addr && context->is_uplink && dir == DHCP_RX) ||
(!context->is_uplink && dir == DHCP_TX)) {
context->counters[DHCP_COUNTERS_CURRENT][dir][dhcp_option[2]]++;
aggregate_dev.counters[DHCP_COUNTERS_CURRENT][dir][dhcp_option[2]]++;
}
break;
default:
syslog(LOG_WARNING, "handle_dhcp_option_53(%s): Unknown DHCP option 53 type %d", context->intf, dhcp_option[2]);
@ -241,62 +221,166 @@ static void read_callback(int fd, short event, void *arg)
}
/**
* @code dhcp_device_validate(counters, counters_snapshot);
* @code dhcp_device_is_dhcp_inactive(counters);
*
* @brief validate current interface counters by comparing aggregate counter with snapshot counters.
* @brief Check if there were no DHCP activity
*
* @param counters recent interface counter
* @param counters_snapshot snapshot counters
* @param counters current/snapshot counter
*
* @return DHCP_MON_STATUS_HEALTHY, DHCP_MON_STATUS_UNHEALTHY, or DHCP_MON_STATUS_INDETERMINATE
* @return true if there were no DHCP activity, false otherwise
*/
static dhcp_mon_status_t dhcp_device_validate(dhcp_device_counters_t *counters,
dhcp_device_counters_t *counters_snapshot)
static bool dhcp_device_is_dhcp_inactive(uint64_t counters[][DHCP_DIR_COUNT][DHCP_MESSAGE_TYPE_COUNT])
{
dhcp_mon_status_t rv = DHCP_MON_STATUS_HEALTHY;
uint64_t *rx_counters = counters[DHCP_COUNTERS_CURRENT][DHCP_RX];
uint64_t *rx_counter_snapshot = counters[DHCP_COUNTERS_SNAPSHOT][DHCP_RX];
if ((counters[DHCP_RX].discover == counters_snapshot[DHCP_RX].discover) &&
(counters[DHCP_RX].offer == counters_snapshot[DHCP_RX].offer ) &&
(counters[DHCP_RX].request == counters_snapshot[DHCP_RX].request ) &&
(counters[DHCP_RX].ack == counters_snapshot[DHCP_RX].ack ) ) {
rv = DHCP_MON_STATUS_INDETERMINATE;
} else {
// if we have rx DORA then we should have corresponding tx DORA (DORA being relayed)
if (((counters[DHCP_RX].discover > counters_snapshot[DHCP_RX].discover) &&
(counters[DHCP_TX].discover <= counters_snapshot[DHCP_TX].discover) ) ||
((counters[DHCP_RX].offer > counters_snapshot[DHCP_RX].offer ) &&
(counters[DHCP_TX].offer <= counters_snapshot[DHCP_TX].offer ) ) ||
((counters[DHCP_RX].request > counters_snapshot[DHCP_RX].request ) &&
(counters[DHCP_TX].request <= counters_snapshot[DHCP_TX].request ) ) ||
((counters[DHCP_RX].ack > counters_snapshot[DHCP_RX].ack ) &&
(counters[DHCP_TX].ack <= counters_snapshot[DHCP_TX].ack ) ) ) {
rv = DHCP_MON_STATUS_UNHEALTHY;
}
bool rv = true;
for (uint8_t i = 0; (i < monitored_msg_sz) && rv; i++) {
rv = rx_counters[monitored_msgs[i]] == rx_counter_snapshot[monitored_msgs[i]];
}
return rv;
}
/**
* @code dhcp_print_counters(counters);
* @code dhcp_device_is_dhcp_msg_unhealthy(type, counters);
*
* @brief Check if DHCP relay is functioning properly for message of type 'type'.
* For every rx of message 'type', there should be increment of the same message type.
*
* @param type DHCP message type
* @param counters current/snapshot counter
*
* @return true if DHCP message 'type' is transmitted,false otherwise
*/
static bool dhcp_device_is_dhcp_msg_unhealthy(dhcp_message_type_t type,
uint64_t counters[][DHCP_DIR_COUNT][DHCP_MESSAGE_TYPE_COUNT])
{
// check if DHCP message 'type' is being relayed
return ((counters[DHCP_COUNTERS_CURRENT][DHCP_RX][type] > counters[DHCP_COUNTERS_SNAPSHOT][DHCP_RX][type]) &&
(counters[DHCP_COUNTERS_CURRENT][DHCP_TX][type] <= counters[DHCP_COUNTERS_SNAPSHOT][DHCP_TX][type]) );
}
/**
* @code dhcp_device_check_positive_health(counters, counters_snapshot);
*
* @brief Check if DHCP relay is functioning properly for monitored messages (Discover, Offer, Request, ACK.)
* For every rx of monitored messages, there should be increment of the same message type.
*
* @param counters current/snapshot counter
*
* @return DHCP_MON_STATUS_HEALTHY, DHCP_MON_STATUS_UNHEALTHY, or DHCP_MON_STATUS_INDETERMINATE
*/
static dhcp_mon_status_t dhcp_device_check_positive_health(uint64_t counters[][DHCP_DIR_COUNT][DHCP_MESSAGE_TYPE_COUNT])
{
dhcp_mon_status_t rv = DHCP_MON_STATUS_HEALTHY;
bool is_dhcp_unhealthy = false;
for (uint8_t i = 0; (i < monitored_msg_sz) && !is_dhcp_unhealthy; i++) {
is_dhcp_unhealthy = dhcp_device_is_dhcp_msg_unhealthy(monitored_msgs[i], counters);
}
// if we have rx DORA then we should have corresponding tx DORA (DORA being relayed)
if (is_dhcp_unhealthy) {
rv = DHCP_MON_STATUS_UNHEALTHY;
}
return rv;
}
/**
* @code dhcp_device_check_negative_health(counters);
*
* @brief Check that DHCP relayed messages are not being transmitted out of this interface/dev
* using its counters. The interface is negatively healthy if there are not DHCP message
* travelling through it.
*
* @param counters recent interface counter
* @param counters_snapshot snapshot counters
*
* @return DHCP_MON_STATUS_HEALTHY, DHCP_MON_STATUS_UNHEALTHY, or DHCP_MON_STATUS_INDETERMINATE
*/
static dhcp_mon_status_t dhcp_device_check_negative_health(uint64_t counters[][DHCP_DIR_COUNT][DHCP_MESSAGE_TYPE_COUNT])
{
dhcp_mon_status_t rv = DHCP_MON_STATUS_HEALTHY;
uint64_t *tx_counters = counters[DHCP_COUNTERS_CURRENT][DHCP_TX];
uint64_t *tx_counter_snapshot = counters[DHCP_COUNTERS_SNAPSHOT][DHCP_TX];
bool is_dhcp_unhealthy = false;
for (uint8_t i = 0; (i < monitored_msg_sz) && !is_dhcp_unhealthy; i++) {
is_dhcp_unhealthy = tx_counters[monitored_msgs[i]] > tx_counter_snapshot[monitored_msgs[i]];
}
// for negative validation, return unhealthy if DHCP packet are being
// transmitted out of the device/interface
if (is_dhcp_unhealthy) {
rv = DHCP_MON_STATUS_UNHEALTHY;
}
return rv;
}
/**
* @code dhcp_device_check_health(check_type, counters, counters_snapshot);
*
* @brief Check that DHCP relay is functioning properly given a check type. Positive check
* indicates for every rx of DHCP message of type 'type', there would increment of
* the corresponding TX of the same message type. While negative check indicates the
* device should not be actively transmitting any DHCP messages. If it does, it is
* considered unhealthy.
*
* @param check_type type of health check
* @param counters current/snapshot counter
*
* @return DHCP_MON_STATUS_HEALTHY, DHCP_MON_STATUS_UNHEALTHY, or DHCP_MON_STATUS_INDETERMINATE
*/
static dhcp_mon_status_t dhcp_device_check_health(dhcp_mon_check_t check_type,
uint64_t counters[][DHCP_DIR_COUNT][DHCP_MESSAGE_TYPE_COUNT])
{
dhcp_mon_status_t rv = DHCP_MON_STATUS_HEALTHY;
if (dhcp_device_is_dhcp_inactive(aggregate_dev.counters)) {
rv = DHCP_MON_STATUS_INDETERMINATE;
} else if (check_type == DHCP_MON_CHECK_POSITIVE) {
rv = dhcp_device_check_positive_health(counters);
} else if (check_type == DHCP_MON_CHECK_NEGATIVE) {
rv = dhcp_device_check_negative_health(counters);
}
return rv;
}
/**
* @code dhcp_print_counters(vlan_intf, counters);
*
* @brief prints DHCP counters to sylsog.
*
* @param counters interface counter
*/
static void dhcp_print_counters(dhcp_device_counters_t *counters)
static void dhcp_print_counters(const char *vlan_intf, uint64_t counters[][DHCP_DIR_COUNT][DHCP_MESSAGE_TYPE_COUNT])
{
syslog(LOG_NOTICE, "DHCP Discover rx: %lu, tx:%lu, Offer rx: %lu, tx:%lu, Request rx: %lu, tx:%lu, ACK rx: %lu, tx:%lu\n",
counters[DHCP_RX].discover, counters[DHCP_TX].discover,
counters[DHCP_RX].offer, counters[DHCP_TX].offer,
counters[DHCP_RX].request, counters[DHCP_TX].request,
counters[DHCP_RX].ack, counters[DHCP_TX].ack);
uint64_t *rx_counters = counters[DHCP_COUNTERS_CURRENT][DHCP_RX];
uint64_t *tx_counters = counters[DHCP_COUNTERS_CURRENT][DHCP_TX];
syslog(LOG_NOTICE,
"[%*s] DHCP Discover rx/tx: %*lu/%*lu, Offer rx/tx: %*lu/%*lu, "
"Request rx/tx: %*lu/%*lu, ACK rx/tx: %*lu/%*lu\n",
IF_NAMESIZE, vlan_intf,
DHCP_COUNTER_WIDTH, rx_counters[DHCP_MESSAGE_TYPE_DISCOVER],
DHCP_COUNTER_WIDTH, tx_counters[DHCP_MESSAGE_TYPE_DISCOVER],
DHCP_COUNTER_WIDTH, rx_counters[DHCP_MESSAGE_TYPE_OFFER],
DHCP_COUNTER_WIDTH, tx_counters[DHCP_MESSAGE_TYPE_OFFER],
DHCP_COUNTER_WIDTH, rx_counters[DHCP_MESSAGE_TYPE_REQUEST],
DHCP_COUNTER_WIDTH, tx_counters[DHCP_MESSAGE_TYPE_REQUEST],
DHCP_COUNTER_WIDTH, rx_counters[DHCP_MESSAGE_TYPE_ACK],
DHCP_COUNTER_WIDTH, tx_counters[DHCP_MESSAGE_TYPE_ACK]);
}
/**
* @code init_socket(context, intf);
*
* @brief initializes socket, bind it to interface and bpf prgram, and
* @brief initializes socket, bind it to interface and bpf program, and
* associate with libevent base
*
* @param context pointer to device (interface) context
@ -402,6 +486,18 @@ int dhcp_device_get_ip(dhcp_device_context_t *context, in_addr_t *ip)
return rv;
}
/**
* @code dhcp_device_get_aggregate_context();
*
* @brief Accessor method
*
* @return pointer to aggregate device (interface) context
*/
dhcp_device_context_t* dhcp_device_get_aggregate_context()
{
return &aggregate_dev;
}
/**
* @code dhcp_device_init(context, intf, is_uplink);
*
@ -421,8 +517,7 @@ int dhcp_device_init(dhcp_device_context_t **context, const char *intf, uint8_t
dev_context->is_uplink = is_uplink;
memset(&dev_context->counters, 0, sizeof(dev_context->counters));
memset(&dev_context->counters_snapshot, 0, sizeof(dev_context->counters_snapshot));
memset(dev_context->counters, 0, sizeof(dev_context->counters));
*context = dev_context;
rv = 0;
@ -497,36 +592,44 @@ void dhcp_device_shutdown(dhcp_device_context_t *context)
}
/**
* @code dhcp_device_get_status(context);
* @code dhcp_device_get_status(check_type, context);
*
* @brief collects DHCP relay status info for a given interface. If context is null, it will report aggregate
* status
*/
dhcp_mon_status_t dhcp_device_get_status(dhcp_device_context_t *context)
dhcp_mon_status_t dhcp_device_get_status(dhcp_mon_check_t check_type, dhcp_device_context_t *context)
{
dhcp_mon_status_t rv = 0;
dhcp_mon_status_t rv = DHCP_MON_STATUS_HEALTHY;
if (context != NULL) {
rv = dhcp_device_validate(context->counters, context->counters_snapshot);
memcpy(context->counters_snapshot, context->counters, sizeof(context->counters_snapshot));
} else {
rv = dhcp_device_validate(glob_counters, glob_counters_snapshot);
memcpy(glob_counters_snapshot, glob_counters, sizeof(glob_counters_snapshot));
rv = dhcp_device_check_health(check_type, context->counters);
}
return rv;
}
/**
* @code dhcp_device_print_status();
* @code dhcp_device_update_snapshot(context);
*
* @brief prints status counters to syslog. If context is null, it will print aggregate status
* @brief Update device/interface counters snapshot
*/
void dhcp_device_update_snapshot(dhcp_device_context_t *context)
{
if (context != NULL) {
memcpy(context->counters[DHCP_COUNTERS_SNAPSHOT],
context->counters[DHCP_COUNTERS_CURRENT],
sizeof(context->counters[DHCP_COUNTERS_SNAPSHOT]));
}
}
/**
* @code dhcp_device_print_status(context);
*
* @brief prints status counters to syslog.
*/
void dhcp_device_print_status(dhcp_device_context_t *context)
{
if (context != NULL) {
dhcp_print_counters(context->counters);
} else {
dhcp_print_counters(glob_counters);
dhcp_print_counters(context->intf, context->counters);
}
}

View File

@ -17,6 +17,23 @@
#include <event2/buffer.h>
/**
* DHCP message types
**/
typedef enum
{
DHCP_MESSAGE_TYPE_DISCOVER = 1,
DHCP_MESSAGE_TYPE_OFFER = 2,
DHCP_MESSAGE_TYPE_REQUEST = 3,
DHCP_MESSAGE_TYPE_DECLINE = 4,
DHCP_MESSAGE_TYPE_ACK = 5,
DHCP_MESSAGE_TYPE_NAK = 6,
DHCP_MESSAGE_TYPE_RELEASE = 7,
DHCP_MESSAGE_TYPE_INFORM = 8,
DHCP_MESSAGE_TYPE_COUNT
} dhcp_message_type_t;
/** packet direction */
typedef enum
{
@ -26,6 +43,15 @@ typedef enum
DHCP_DIR_COUNT
} dhcp_packet_direction_t;
/** counters type */
typedef enum
{
DHCP_COUNTERS_CURRENT, /** DHCP current counters */
DHCP_COUNTERS_SNAPSHOT, /** DHCP snapshot counters */
DHCP_COUNTERS_COUNT
} dhcp_counters_type_t;
/** dhcp health status */
typedef enum
{
@ -34,14 +60,12 @@ typedef enum
DHCP_MON_STATUS_INDETERMINATE, /** DHCP relay health could not be determined */
} dhcp_mon_status_t;
/** DHCP device (interface) health counters */
typedef struct
/** dhcp check type */
typedef enum
{
uint64_t discover; /** DHCP discover packets */
uint64_t offer; /** DHCP offer packets */
uint64_t request; /** DHCP request packets */
uint64_t ack; /** DHCP ack packets */
} dhcp_device_counters_t;
DHCP_MON_CHECK_NEGATIVE, /** Presence of relayed DHCP packets activity is flagged as unhealthy state */
DHCP_MON_CHECK_POSITIVE, /** Validate that received DORA packets are relayed */
} dhcp_mon_check_t;
/** DHCP device (interface) context */
typedef struct
@ -54,10 +78,8 @@ typedef struct
char intf[IF_NAMESIZE]; /** device (interface) name */
uint8_t *buffer; /** buffer used to read socket data */
size_t snaplen; /** snap length or buffer size */
dhcp_device_counters_t counters[DHCP_DIR_COUNT];
/** current coutners of DORA packets */
dhcp_device_counters_t counters_snapshot[DHCP_DIR_COUNT];
/** counter snapshot */
uint64_t counters[DHCP_COUNTERS_COUNT][DHCP_DIR_COUNT][DHCP_MESSAGE_TYPE_COUNT];
/** current/snapshot counters of DHCP packets */
} dhcp_device_context_t;
/**
@ -72,6 +94,15 @@ typedef struct
*/
int dhcp_device_get_ip(dhcp_device_context_t *context, in_addr_t *ip);
/**
* @code dhcp_device_get_aggregate_context();
*
* @brief Accessor method
*
* @return pointer to aggregate device (interface) context
*/
dhcp_device_context_t* dhcp_device_get_aggregate_context();
/**
* @code dhcp_device_init(context, intf, is_uplink);
*
@ -116,22 +147,34 @@ int dhcp_device_start_capture(dhcp_device_context_t *context,
void dhcp_device_shutdown(dhcp_device_context_t *context);
/**
* @code dhcp_device_get_status(context);
* @code dhcp_device_get_status(check_type, context);
*
* @brief collects DHCP relay status info for a given interface. If context is null, it will report aggregate
* status
*
* @param context Device (interface) context
* @param check_type Type of validation
* @param context Device (interface) context
*
* @return DHCP_MON_STATUS_HEALTHY, DHCP_MON_STATUS_UNHEALTHY, or DHCP_MON_STATUS_INDETERMINATE
*/
dhcp_mon_status_t dhcp_device_get_status(dhcp_device_context_t *context);
dhcp_mon_status_t dhcp_device_get_status(dhcp_mon_check_t check_type, dhcp_device_context_t *context);
/**
* @code dhcp_device_print_status();
* @code dhcp_device_update_snapshot(context);
*
* @param context Device (interface) context
*
* @brief Update device/interface counters snapshot
*/
void dhcp_device_update_snapshot(dhcp_device_context_t *context);
/**
* @code dhcp_device_print_status(context);
*
* @brief prints status counters to syslog. If context is null, it will print aggregate status
*
* @param context Device (interface) context
*
* @return none
*/
void dhcp_device_print_status(dhcp_device_context_t *context);

View File

@ -12,6 +12,9 @@
#include "dhcp_devman.h"
/** Prefix appended to Aggregation device */
#define AGG_DEV_PREFIX "Agg-"
/** struct for interface information */
struct intf
{
@ -27,22 +30,34 @@ static LIST_HEAD(intf_list, intf) intfs;
static uint32_t dhcp_num_south_intf = 0;
/** dhcp_num_north_intf number of north interfaces */
static uint32_t dhcp_num_north_intf = 0;
/** dhcp_num_mgmt_intf number of mgmt interfaces */
static uint32_t dhcp_num_mgmt_intf = 0;
/** On Device vlan interface IP address corresponding vlan downlink IP
* This IP is used to filter Offer/Ack packet coming from DHCP server */
static in_addr_t vlan_ip = 0;
/** vlan interface name */
static char vlan_intf[IF_NAMESIZE] = "Undefined";
/** mgmt interface */
static struct intf *mgmt_intf = NULL;
/**
* @code dhcp_devman_get_vlan_intf();
*
* Accessor method
*/
const char* dhcp_devman_get_vlan_intf()
dhcp_device_context_t* dhcp_devman_get_agg_dev()
{
return vlan_intf;
return dhcp_device_get_aggregate_context();
}
/**
* @code dhcp_devman_get_mgmt_dev();
*
* Accessor method
*/
dhcp_device_context_t* dhcp_devman_get_mgmt_dev()
{
return mgmt_intf ? mgmt_intf->dev_context : NULL;
}
/**
@ -86,27 +101,42 @@ void dhcp_devman_shutdown()
*
* @brief adds interface to the device manager.
*/
int dhcp_devman_add_intf(const char *name, uint8_t is_uplink)
int dhcp_devman_add_intf(const char *name, char intf_type)
{
int rv = -1;
struct intf *dev = malloc(sizeof(struct intf));
if (dev != NULL) {
dev->name = name;
dev->is_uplink = is_uplink;
if (is_uplink) {
dev->is_uplink = intf_type != 'd';
switch (intf_type)
{
case 'u':
dhcp_num_north_intf++;
} else {
break;
case 'd':
dhcp_num_south_intf++;
assert(dhcp_num_south_intf <= 1);
break;
case 'm':
dhcp_num_mgmt_intf++;
assert(dhcp_num_mgmt_intf <= 1);
mgmt_intf = dev;
break;
default:
break;
}
rv = dhcp_device_init(&dev->dev_context, dev->name, dev->is_uplink);
if (rv == 0 && !is_uplink) {
if (rv == 0 && intf_type == 'd') {
rv = dhcp_device_get_ip(dev->dev_context, &vlan_ip);
strncpy(vlan_intf, name, sizeof(vlan_intf) - 1);
vlan_intf[sizeof(vlan_intf) - 1] = '\0';
dhcp_device_context_t *agg_dev = dhcp_device_get_aggregate_context();
strncpy(agg_dev->intf, AGG_DEV_PREFIX, sizeof(AGG_DEV_PREFIX));
strncpy(agg_dev->intf + sizeof(AGG_DEV_PREFIX) - 1, name, sizeof(agg_dev->intf) - sizeof(AGG_DEV_PREFIX));
agg_dev->intf[sizeof(agg_dev->intf) - 1] = '\0';
}
LIST_INSERT_HEAD(&intfs, dev, entry);
@ -152,21 +182,41 @@ int dhcp_devman_start_capture(size_t snaplen, struct event_base *base)
}
/**
* @code dhcp_devman_get_status();
* @code dhcp_devman_get_status(check_type, context);
*
* @brief collects DHCP relay status info.
*/
dhcp_mon_status_t dhcp_devman_get_status()
dhcp_mon_status_t dhcp_devman_get_status(dhcp_mon_check_t check_type, dhcp_device_context_t *context)
{
return dhcp_device_get_status(NULL);
return dhcp_device_get_status(check_type, context);
}
/**
* @code dhcp_devman_print_status();
* @code dhcp_devman_update_snapshot(context);
*
* @brief prints status counters to syslog
* @brief Update device/interface counters snapshot
*/
void dhcp_devman_print_status()
void dhcp_devman_update_snapshot(dhcp_device_context_t *context)
{
dhcp_device_print_status(NULL);
dhcp_device_update_snapshot(context);
}
/**
* @code dhcp_devman_print_status(context);
*
* @brief prints status counters to syslog, if context is null, it prints status counters for all interfaces
*/
void dhcp_devman_print_status(dhcp_device_context_t *context)
{
if (context == NULL) {
struct intf *int_ptr;
LIST_FOREACH(int_ptr, &intfs, entry) {
dhcp_device_print_status(int_ptr->dev_context);
}
dhcp_device_print_status(dhcp_devman_get_agg_dev());
} else {
dhcp_device_print_status(context);
}
}

View File

@ -36,9 +36,18 @@ void dhcp_devman_shutdown();
*
* @brief Accessor method
*
* @return pointer to vlan ip interface name
* @return pointer to aggregate device (interface) context
*/
const char* dhcp_devman_get_vlan_intf();
dhcp_device_context_t* dhcp_devman_get_agg_dev();
/**
* @code dhcp_devman_get_mgmt_intf_context();
*
* @brief Accessor method
*
* @return pointer to mgmt interface context
*/
dhcp_device_context_t* dhcp_devman_get_mgmt_dev();
/**
* @code dhcp_devman_add_intf(name, uplink);
@ -46,11 +55,13 @@ const char* dhcp_devman_get_vlan_intf();
* @brief adds interface to the device manager.
*
* @param name interface name
* @param is_uplink true for uplink (north) interface
* @param intf_type 'u' for uplink (north) interface
* 'd' for downlink (south) interface
* 'm' for mgmt interface
*
* @return 0 on success, nonzero otherwise
*/
int dhcp_devman_add_intf(const char *name, uint8_t is_uplink);
int dhcp_devman_add_intf(const char *name, char intf_type);
/**
* @code dhcp_devman_start_capture(snaplen, base);
@ -65,21 +76,35 @@ int dhcp_devman_add_intf(const char *name, uint8_t is_uplink);
int dhcp_devman_start_capture(size_t snaplen, struct event_base *base);
/**
* @code dhcp_devman_get_status();
* @code dhcp_devman_get_status(check_type, context);
*
* @brief collects DHCP relay status info.
*
* @param check_type Type of validation
* @param context pointer to device (interface) context
*
* @return DHCP_MON_STATUS_HEALTHY, DHCP_MON_STATUS_UNHEALTHY, or DHCP_MON_STATUS_INDETERMINATE
*/
dhcp_mon_status_t dhcp_devman_get_status();
dhcp_mon_status_t dhcp_devman_get_status(dhcp_mon_check_t check_type, dhcp_device_context_t *context);
/**
* @code dhcp_devman_print_status();
* @code dhcp_devman_update_snapshot(context);
*
* @param context Device (interface) context
*
* @brief Update device/interface counters snapshot
*/
void dhcp_devman_update_snapshot(dhcp_device_context_t *context);
/**
* @code dhcp_devman_print_status(context);
*
* @brief prints status counters to syslog
*
* @param context pointer to device (interface) context
*
* @return none
*/
void dhcp_devman_print_status();
void dhcp_devman_print_status(dhcp_device_context_t *context);
#endif /* DHCP_DEVMAN_H_ */

View File

@ -16,8 +16,17 @@
#include "dhcp_mon.h"
#include "dhcp_devman.h"
/** DHCP device/interface state */
typedef struct
{
dhcp_mon_check_t check_type; /** check type */
dhcp_device_context_t* (*get_context)(); /** functor to a device context accessor function */
int count; /** count in the number of unhealthy checks */
const char *msg; /** message to be printed if unhealthy state is determined */
} dhcp_mon_state_t;
/** window_interval_sec monitoring window for dhcp relay health checks */
static int window_interval_sec = 12;
static int window_interval_sec = 18;
/** dhcp_unhealthy_max_count max count of consecutive unhealthy statuses before reporting to syslog */
static int dhcp_unhealthy_max_count = 10;
/** libevent base struct */
@ -28,6 +37,25 @@ static struct event *ev_timeout = NULL;
static struct event *ev_sigint;
/** libevent SIGTERM signal event struct */
static struct event *ev_sigterm;
/** libevent SIGUSR1 signal event struct */
static struct event *ev_sigusr1;
/** DHCP monitor state data for aggregate device for mgmt device */
static dhcp_mon_state_t state_data[] = {
[0] = {
.check_type = DHCP_MON_CHECK_POSITIVE,
.get_context = dhcp_devman_get_agg_dev,
.count = 0,
.msg = "dhcpmon detected disparity in DHCP Relay behavior. Duration: %d (sec) for vlan: '%s'\n"
},
[1] = {
.check_type = DHCP_MON_CHECK_NEGATIVE,
.get_context = dhcp_devman_get_mgmt_dev,
.count = 0,
.msg = "dhcpmon detected DHCP packets traveling through mgmt interface (please check BGP routes.)"
" Duration: %d (sec) for intf: '%s'\n"
}
};
/**
* @code signal_callback(fd, event, arg);
@ -42,9 +70,47 @@ static struct event *ev_sigterm;
*/
static void signal_callback(evutil_socket_t fd, short event, void *arg)
{
syslog(LOG_ALERT, "Received signal %s\n", strsignal(fd));
dhcp_devman_print_status();
dhcp_mon_stop();
syslog(LOG_ALERT, "Received signal: '%s'\n", strsignal(fd));
dhcp_devman_print_status(NULL);
if ((fd == SIGTERM) || (fd == SIGINT)) {
dhcp_mon_stop();
}
}
/**
* @code check_dhcp_relay_health(state_data);
*
* @brief check DHCP relay overall health
*
* @param state_data pointer to dhcpmon state data
*
* @return none
*/
static void check_dhcp_relay_health(dhcp_mon_state_t *state_data)
{
dhcp_device_context_t *context = state_data->get_context();
dhcp_mon_status_t dhcp_mon_status = dhcp_devman_get_status(state_data->check_type, context);
switch (dhcp_mon_status)
{
case DHCP_MON_STATUS_UNHEALTHY:
if (++state_data->count > dhcp_unhealthy_max_count) {
syslog(LOG_ALERT, state_data->msg, state_data->count * window_interval_sec, context->intf);
dhcp_devman_print_status(context);
}
break;
case DHCP_MON_STATUS_HEALTHY:
state_data->count = 0;
break;
case DHCP_MON_STATUS_INDETERMINATE:
if (state_data->count) {
state_data->count++;
}
break;
default:
syslog(LOG_ERR, "DHCP Relay returned unknown status %d\n", dhcp_mon_status);
break;
}
}
/**
@ -60,28 +126,12 @@ static void signal_callback(evutil_socket_t fd, short event, void *arg)
*/
static void timeout_callback(evutil_socket_t fd, short event, void *arg)
{
static int count = 0;
dhcp_mon_status_t dhcp_mon_status = dhcp_devman_get_status();
for (uint8_t i = 0; i < sizeof(state_data) / sizeof(*state_data); i++) {
check_dhcp_relay_health(&state_data[i]);
}
switch (dhcp_mon_status)
{
case DHCP_MON_STATUS_UNHEALTHY:
if (++count > dhcp_unhealthy_max_count) {
syslog(LOG_ALERT, "dhcpmon detected disparity in DHCP Relay behavior. Failure count: %d for vlan: '%s'\n",
count, dhcp_devman_get_vlan_intf());
dhcp_devman_print_status();
}
break;
case DHCP_MON_STATUS_HEALTHY:
if (count > 0) {
count = 0;
}
break;
case DHCP_MON_STATUS_INDETERMINATE:
break;
default:
syslog(LOG_ERR, "DHCP Relay returned unknown status %d\n", dhcp_mon_status);
break;
for (uint8_t i = 0; i < sizeof(state_data) / sizeof(*state_data); i++) {
dhcp_devman_update_snapshot(state_data[i].get_context());
}
}
@ -118,6 +168,12 @@ int dhcp_mon_init(int window_sec, int max_count)
break;
}
ev_sigusr1 = evsignal_new(base, SIGUSR1, signal_callback, base);
if (ev_sigusr1 == NULL) {
syslog(LOG_ERR, "Could not create SIGUSER1 libevent signal!\n");
break;
}
ev_timeout = event_new(base, -1, EV_PERSIST, timeout_callback, base);
if (ev_timeout == NULL) {
syslog(LOG_ERR, "Could not create libevent timer!\n");
@ -140,10 +196,12 @@ void dhcp_mon_shutdown()
event_del(ev_timeout);
event_del(ev_sigint);
event_del(ev_sigterm);
event_del(ev_sigusr1);
event_free(ev_timeout);
event_free(ev_sigint);
event_free(ev_sigterm);
event_free(ev_sigusr1);
event_base_free(base);
}
@ -173,6 +231,11 @@ int dhcp_mon_start(size_t snaplen)
break;
}
if (evsignal_add(ev_sigusr1, NULL) != 0) {
syslog(LOG_ERR, "Could not add SIGUSR1 libevent signal!\n");
break;
}
struct timeval event_time = {.tv_sec = window_interval_sec, .tv_usec = 0};
if (evtimer_add(ev_timeout, &event_time) != 0) {
syslog(LOG_ERR, "Could not add event timer to libevent!\n");

View File

@ -24,7 +24,7 @@
static const size_t dhcpmon_default_snaplen = 65535;
/** dhcpmon_default_health_check_window: default value for a time window, during which DHCP DORA packet counts are being
* collected */
static const uint32_t dhcpmon_default_health_check_window = 12;
static const uint32_t dhcpmon_default_health_check_window = 18;
/** dhcpmon_default_unhealthy_max_count: default max consecutive unhealthy status reported before reporting an issue
* with DHCP relay */
static const uint32_t dhcpmon_default_unhealthy_max_count = 10;
@ -40,7 +40,7 @@ static const uint32_t dhcpmon_default_unhealthy_max_count = 10;
*/
static void usage(const char *prog)
{
printf("Usage: %s -id <south interface> {-iu <north interface>}+ [-w <snapshot window in sec>]"
printf("Usage: %s -id <south interface> {-iu <north interface>}+ -im <mgmt interface> [-w <snapshot window in sec>]"
"[-c <unhealthy status count>] [-s <snap length>] [-d]\n", prog);
printf("where\n");
printf("\tsouth interface: is a vlan interface,\n");
@ -50,7 +50,7 @@ static void usage(const char *prog)
printf("\tunhealthy status count: count of consecutive unhealthy status before writing an alert to syslog "
"(default %d),\n",
dhcpmon_default_unhealthy_max_count);
printf("\tsnap length: snap length of packet capture (default %d),\n", dhcpmon_default_snaplen);
printf("\tsnap length: snap length of packet capture (default %ld),\n", dhcpmon_default_snaplen);
printf("\t-d: daemonize %s.\n", prog);
exit(EXIT_SUCCESS);
@ -127,7 +127,7 @@ int main(int argc, char **argv)
usage(basename(argv[0]));
break;
case 'i':
if (dhcp_devman_add_intf(argv[i + 1], argv[i][2] == 'u') != 0) {
if (dhcp_devman_add_intf(argv[i + 1], argv[i][2]) != 0) {
usage(basename(argv[0]));
}
i += 2;

View File

@ -35,7 +35,7 @@ stderr_logfile=syslog
programs=dhcpmon-Vlan1000
[program:dhcpmon-Vlan1000]
command=/usr/sbin/dhcpmon -id Vlan1000 -iu PortChannel01 -iu PortChannel02 -iu PortChannel03 -iu PortChannel04
command=/usr/sbin/dhcpmon -id Vlan1000 -iu PortChannel01 -iu PortChannel02 -iu PortChannel03 -iu PortChannel04 -im eth0
priority=4
autostart=false
autorestart=false