diff -Nuarp a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c --- a/drivers/net/bonding/bond_3ad.c Sun Dec 21 16:09:04 2003 +++ b/drivers/net/bonding/bond_3ad.c Sun Dec 21 16:09:05 2003 @@ -47,6 +47,9 @@ * - Send LACPDU as highest priority packet to further fix the above * problem on very high Tx traffic load where packets may get dropped * by the slave. + * + * 2003/09/24 - Shmulik Hen + * - Code cleanup and style changes */ //#define BONDING_DEBUG 1 @@ -2416,6 +2419,8 @@ int bond_3ad_xmit_xor(struct sk_buff *sk skb->dev = slave->dev; skb->priority = 1; dev_queue_xmit(skb); + + goto out; } } diff -Nuarp a/drivers/net/bonding/bond_3ad.h b/drivers/net/bonding/bond_3ad.h --- a/drivers/net/bonding/bond_3ad.h Sun Dec 21 16:09:04 2003 +++ b/drivers/net/bonding/bond_3ad.h Sun Dec 21 16:09:05 2003 @@ -28,6 +28,9 @@ * 2003/05/01 - Shmulik Hen * - Renamed bond_3ad_link_status_changed() to * bond_3ad_handle_link_change() for compatibility with TLB. + * + * 2003/09/24 - Shmulik Hen + * - Code cleanup and style changes */ #ifndef __BOND_3AD_H__ diff -Nuarp a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c --- a/drivers/net/bonding/bond_alb.c Sun Dec 21 16:09:04 2003 +++ b/drivers/net/bonding/bond_alb.c Sun Dec 21 16:09:05 2003 @@ -28,6 +28,9 @@ * 2003/08/06 - Amir Noam * - Add support for setting bond's MAC address with special * handling required for ALB/TLB. + * + * 2003/09/24 - Shmulik Hen + * - Code cleanup and style changes */ //#define BONDING_DEBUG 1 @@ -52,11 +55,11 @@ #define ALB_TIMER_TICKS_PER_SEC 10 /* should be a divisor of HZ */ -#define BOND_TLB_REBALANCE_INTERVAL 10 /* in seconds, periodic re-balancing - * used for division - never set +#define BOND_TLB_REBALANCE_INTERVAL 10 /* In seconds, periodic re-balancing. + * Used for division - never set * to zero !!! */ -#define BOND_ALB_LP_INTERVAL 1 /* in seconds periodic send of +#define BOND_ALB_LP_INTERVAL 1 /* In seconds, periodic send of * learning packets to the switch */ @@ -68,7 +71,7 @@ #define TLB_HASH_TABLE_SIZE 256 /* The size of the clients hash table. * Note that this value MUST NOT be smaller - * because the key hash table BYTE wide ! + * because the key hash table is BYTE wide ! */ @@ -143,7 +146,7 @@ static inline void tlb_init_table_entry( { if (save_load) { entry->load_history = 1 + entry->tx_bytes / - BOND_TLB_REBALANCE_INTERVAL; + BOND_TLB_REBALANCE_INTERVAL; entry->tx_bytes = 0; } @@ -380,15 +383,18 @@ out: static struct slave *rlb_next_rx_slave(struct bonding *bond) { struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); - struct slave *rx_slave = NULL, *slave; + struct slave *rx_slave, *slave, *start_at; int i = 0; - slave = bond_info->next_rx_slave; - if (!slave) { - slave = bond->first_slave; + if (bond_info->next_rx_slave) { + start_at = bond_info->next_rx_slave; + } else { + start_at = bond->first_slave; } - bond_for_each_slave(bond, slave, i) { + rx_slave = NULL; + + bond_for_each_slave_from(bond, slave, i, start_at) { if (SLAVE_IS_OK(slave)) { if (!rx_slave) { rx_slave = slave; @@ -907,7 +913,7 @@ static void alb_swap_mac_addr(struct bon } if (bond->alb_info.rlb_enabled && slaves_state_differ) { - /* A disabled slave was assigned an active mac addr */ + /* A disabled slave was assigned an active mac addr */ rlb_teach_disabled_mac_on_primary(bond, disabled_slave->dev->dev_addr); } @@ -929,10 +935,8 @@ static void alb_swap_mac_addr(struct bon */ static void alb_change_hw_addr_on_detach(struct bonding *bond, struct slave *slave) { - struct slave *tmp_slave; int perm_curr_diff; int perm_bond_diff; - int i, found = 0; perm_curr_diff = memcmp(slave->perm_hwaddr, slave->dev->dev_addr, @@ -940,7 +944,11 @@ static void alb_change_hw_addr_on_detach perm_bond_diff = memcmp(slave->perm_hwaddr, bond->dev->dev_addr, ETH_ALEN); + if (perm_curr_diff && perm_bond_diff) { + struct slave *tmp_slave; + int i, found = 0; + bond_for_each_slave(bond, tmp_slave, i) { if (!memcmp(slave->perm_hwaddr, tmp_slave->dev->dev_addr, @@ -1018,8 +1026,8 @@ static int alb_handle_addr_collision_on_ return 0; } - /* the slave's address is equal to the address of the bond - * search for a spare address in the bond for this slave. + /* The slave's address is equal to the address of the bond. + * Search for a spare address in the bond for this slave. */ free_mac_slave = NULL; @@ -1470,7 +1478,7 @@ void bond_alb_handle_link_change(struct void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave) { struct slave *swap_slave; - int i, found = 0; + int i; if (bond->curr_active_slave == new_slave) { return; @@ -1493,18 +1501,19 @@ void bond_alb_handle_active_change(struc * i.e. swap mac addresses of old curr_active_slave and new curr_active_slave */ if (!swap_slave) { + struct slave *tmp_slave; /* find slave that is holding the bond's mac address */ - bond_for_each_slave(bond, swap_slave, i) { - if (!memcmp(swap_slave->dev->dev_addr, + bond_for_each_slave(bond, tmp_slave, i) { + if (!memcmp(tmp_slave->dev->dev_addr, bond->dev->dev_addr, ETH_ALEN)) { - found = 1; + swap_slave = tmp_slave; break; } } } /* curr_active_slave must be set before calling alb_swap_mac_addr */ - if (found) { + if (swap_slave) { /* swap mac address */ alb_swap_mac_addr(bond, swap_slave, new_slave); } else { @@ -1520,9 +1529,9 @@ int bond_alb_set_mac_address(struct net_ { struct bonding *bond = (struct bonding *)bond_dev->priv; struct sockaddr *sa = addr; - struct slave *swap_slave; + struct slave *slave, *swap_slave; int res; - int i, found = 0; + int i; if (!is_valid_ether_addr(sa->sa_data)) { return -EADDRNOTAVAIL; @@ -1543,14 +1552,16 @@ int bond_alb_set_mac_address(struct net_ return 0; } - bond_for_each_slave(bond, swap_slave, i) { - if (!memcmp(swap_slave->dev->dev_addr, bond_dev->dev_addr, ETH_ALEN)) { - found = 1; + swap_slave = NULL; + + bond_for_each_slave(bond, slave, i) { + if (!memcmp(slave->dev->dev_addr, bond_dev->dev_addr, ETH_ALEN)) { + swap_slave = slave; break; } } - if (found) { + if (swap_slave) { alb_swap_mac_addr(bond, swap_slave, bond->curr_active_slave); } else { alb_set_slave_mac_addr(bond->curr_active_slave, bond_dev->dev_addr, diff -Nuarp a/drivers/net/bonding/bond_alb.h b/drivers/net/bonding/bond_alb.h --- a/drivers/net/bonding/bond_alb.h Sun Dec 21 16:09:04 2003 +++ b/drivers/net/bonding/bond_alb.h Sun Dec 21 16:09:05 2003 @@ -24,6 +24,9 @@ * 2003/08/06 - Amir Noam * - Add support for setting bond's MAC address with special * handling required for ALB/TLB. + * + * 2003/09/24 - Shmulik Hen + * - Code cleanup and style changes */ #ifndef __BOND_ALB_H__ diff -Nuarp a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c --- a/drivers/net/bonding/bond_main.c Sun Dec 21 16:09:04 2003 +++ b/drivers/net/bonding/bond_main.c Sun Dec 21 16:09:05 2003 @@ -426,6 +426,32 @@ * - Convert /proc to seq_file interface. * Change /proc/net/bondX/info to /proc/net/bonding/bondX. * Set version to 2.4.1. + * + * 2003/11/20 - Amir Noam + * - Fix /proc creation/destruction. + * + * 2003/12/01 - Shmulik Hen + * - Massive cleanup - Set version to 2.5.0 + * Code changes: + * o Consolidate format of prints and debug prints. + * o Remove bonding_t/slave_t typedefs and consolidate all casts. + * o Remove dead code and unnecessary checks. + * o Consolidate starting/stopping timers. + * o Consolidate handling of primary module param throughout the code. + * o Removed multicast module param support - all settings are done + * according to mode. + * o Slave list iteration - bond is no longer part of the list, + * added cyclic list iteration macros. + * o Consolidate error handling in all xmit functions. + * Style changes: + * o Consolidate function naming and declarations. + * o Consolidate function params and local variables names. + * o Consolidate return values. + * o Consolidate curly braces. + * o Consolidate conditionals format. + * o Change struct member names and types. + * o Chomp trailing spaces, remove empty lines, fix indentations. + * o Re-organize code according to context. */ //#define BONDING_DEBUG 1 @@ -454,7 +480,6 @@ #include #include #include - #include #include #include @@ -463,52 +488,72 @@ #include #include #include - -#include #include #include #include #include #include +#include #include "bonding.h" #include "bond_3ad.h" #include "bond_alb.h" -static const char *version = -DRV_NAME ".c:v" DRV_VERSION " (" DRV_RELDATE ")\n"; +/*---------------------------- Module parameters ----------------------------*/ /* monitor all links that often (in milliseconds). <=0 disables monitoring */ -#ifndef BOND_LINK_MON_INTERV #define BOND_LINK_MON_INTERV 0 -#endif - -#ifndef BOND_LINK_ARP_INTERV #define BOND_LINK_ARP_INTERV 0 -#endif +#define MAX_ARP_IP_TARGETS 16 -#ifndef MAX_ARP_IP_TARGETS -#define MAX_ARP_IP_TARGETS 16 -#endif +static int max_bonds = BOND_DEFAULT_MAX_BONDS; +static int miimon = BOND_LINK_MON_INTERV; +static int updelay = 0; +static int downdelay = 0; +static int use_carrier = 1; +static char *mode = NULL; +static char *primary = NULL; +static char *lacp_rate = NULL; +static int arp_interval = BOND_LINK_ARP_INTERV; +static char *arp_ip_target[MAX_ARP_IP_TARGETS] = { NULL, }; + +MODULE_PARM(max_bonds, "i"); +MODULE_PARM_DESC(max_bonds, "Max number of bonded devices"); +MODULE_PARM(miimon, "i"); +MODULE_PARM_DESC(miimon, "Link check interval in milliseconds"); +MODULE_PARM(updelay, "i"); +MODULE_PARM_DESC(updelay, "Delay before considering link up, in milliseconds"); +MODULE_PARM(downdelay, "i"); +MODULE_PARM_DESC(downdelay, "Delay before considering link down, in milliseconds"); +MODULE_PARM(use_carrier, "i"); +MODULE_PARM_DESC(use_carrier, "Use netif_carrier_ok (vs MII ioctls) in miimon; 0 for off, 1 for on (default)"); +MODULE_PARM(mode, "s"); +MODULE_PARM_DESC(mode, "Mode of operation : 0 for round robin, 1 for active-backup, 2 for xor"); +MODULE_PARM(primary, "s"); +MODULE_PARM_DESC(primary, "Primary network device to use"); +MODULE_PARM(lacp_rate, "s"); +MODULE_PARM_DESC(lacp_rate, "LACPDU tx rate to request from 802.3ad partner (slow/fast)"); +MODULE_PARM(arp_interval, "i"); +MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds"); +MODULE_PARM(arp_ip_target, "1-" __MODULE_STRING(MAX_ARP_IP_TARGETS) "s"); +MODULE_PARM_DESC(arp_ip_target, "arp targets in n.n.n.n form"); -#define USES_PRIMARY(mode) \ - (((mode) == BOND_MODE_ACTIVEBACKUP) || \ - ((mode) == BOND_MODE_TLB) || \ - ((mode) == BOND_MODE_ALB)) +/*----------------------------- Global variables ----------------------------*/ -struct bond_parm_tbl { - char *modename; - int mode; -}; +static const char *version = + DRV_DESCRIPTION ": v" DRV_VERSION " (" DRV_RELDATE ")\n"; -static int arp_interval = BOND_LINK_ARP_INTERV; -static char *arp_ip_target[MAX_ARP_IP_TARGETS] = { NULL, }; -static u32 arp_target[MAX_ARP_IP_TARGETS] = { 0, } ; -static int arp_ip_count = 0; -static u32 my_ip = 0; +static LIST_HEAD(bond_dev_list); -static char *primary= NULL; +#ifdef CONFIG_PROC_FS +static struct proc_dir_entry *bond_proc_dir = NULL; +#endif -static int app_abi_ver = 0; +static u32 arp_target[MAX_ARP_IP_TARGETS] = { 0, } ; +static int arp_ip_count = 0; +static u32 my_ip = 0; +static int bond_mode = BOND_MODE_ROUNDROBIN; +static int lacp_fast = 0; +static int app_abi_ver = 0; static int orig_app_abi_ver = -1; /* This is used to save the first ABI version * we receive from the application. Once set, * it won't be changed, and the module will @@ -517,14 +562,16 @@ static int orig_app_abi_ver = -1; /* Thi * another ABI version. */ -static int max_bonds = BOND_DEFAULT_MAX_BONDS; -static int miimon = BOND_LINK_MON_INTERV; -static int use_carrier = 1; -static int bond_mode = BOND_MODE_ROUNDROBIN; -static int updelay = 0; -static int downdelay = 0; +struct bond_parm_tbl { + char *modename; + int mode; +}; -static char *mode = NULL; +static struct bond_parm_tbl bond_lacp_tbl[] = { +{ "slow", AD_LACP_SLOW}, +{ "fast", AD_LACP_FAST}, +{ NULL, -1}, +}; static struct bond_parm_tbl bond_mode_tbl[] = { { "balance-rr", BOND_MODE_ROUNDROBIN}, @@ -537,75 +584,7 @@ static struct bond_parm_tbl bond_mode_tb { NULL, -1}, }; -static int lacp_fast = 0; -static char *lacp_rate = NULL; - -static struct bond_parm_tbl bond_lacp_tbl[] = { -{ "slow", AD_LACP_SLOW}, -{ "fast", AD_LACP_FAST}, -{ NULL, -1}, -}; - -static LIST_HEAD(bond_dev_list); -#ifdef CONFIG_PROC_FS -static struct proc_dir_entry *bond_proc_dir = NULL; -#endif - -MODULE_PARM(max_bonds, "i"); -MODULE_PARM_DESC(max_bonds, "Max number of bonded devices"); -MODULE_PARM(miimon, "i"); -MODULE_PARM_DESC(miimon, "Link check interval in milliseconds"); -MODULE_PARM(use_carrier, "i"); -MODULE_PARM_DESC(use_carrier, "Use netif_carrier_ok (vs MII ioctls) in miimon; 0 for off, 1 for on (default)"); -MODULE_PARM(mode, "s"); -MODULE_PARM_DESC(mode, "Mode of operation : 0 for round robin, 1 for active-backup, 2 for xor"); -MODULE_PARM(arp_interval, "i"); -MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds"); -MODULE_PARM(arp_ip_target, "1-" __MODULE_STRING(MAX_ARP_IP_TARGETS) "s"); -MODULE_PARM_DESC(arp_ip_target, "arp targets in n.n.n.n form"); -MODULE_PARM(updelay, "i"); -MODULE_PARM_DESC(updelay, "Delay before considering link up, in milliseconds"); -MODULE_PARM(downdelay, "i"); -MODULE_PARM_DESC(downdelay, "Delay before considering link down, in milliseconds"); -MODULE_PARM(primary, "s"); -MODULE_PARM_DESC(primary, "Primary network device to use"); -MODULE_PARM(lacp_rate, "s"); -MODULE_PARM_DESC(lacp_rate, "LACPDU tx rate to request from 802.3ad partner (slow/fast)"); - -static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev); -static int bond_xmit_xor(struct sk_buff *skb, struct net_device *bond_dev); -static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_dev); -static struct net_device_stats *bond_get_stats(struct net_device *bond_dev); -static void bond_mii_monitor(struct net_device *bond_dev); -static void bond_loadbalance_arp_mon(struct net_device *bond_dev); -static void bond_activebackup_arp_mon(struct net_device *bond_dev); -static void bond_mc_list_destroy(struct bonding *bond); -static void bond_mc_add(struct bonding *bond, void *addr, int alen); -static void bond_mc_delete(struct bonding *bond, void *addr, int alen); -static int bond_mc_list_copy(struct dev_mc_list *mc_list, struct bonding *bond, int gpf_flag); -static inline int bond_is_dmi_same(struct dev_mc_list *dmi1, struct dev_mc_list *dmi2); -static void bond_set_promiscuity(struct bonding *bond, int inc); -static void bond_set_allmulti(struct bonding *bond, int inc); -static struct dev_mc_list *bond_mc_list_find_dmi(struct dev_mc_list *dmi, struct dev_mc_list *mc_list); -static void bond_mc_swap(struct bonding *bond, struct slave *new_active, struct slave *old_active); -static int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev); -static int bond_release(struct net_device *bond_dev, struct net_device *slave_dev); -static int bond_release_all(struct net_device *bond_dev); -static int bond_sethwaddr(struct net_device *bond_dev, struct net_device *slave_dev); -static void bond_change_active_slave(struct bonding *bond, struct slave *new_active); -static void bond_select_active_slave(struct bonding *bond); -static struct slave *bond_find_best_slave(struct bonding *bond); - -static void bond_arp_send_all(struct slave *slave) -{ - int i; - - for (i = 0; (idev, - my_ip, NULL, slave->dev->dev_addr, - NULL); - } -} +/*---------------------------- General routines -----------------------------*/ static const char *bond_mode_name(void) { @@ -629,84 +608,7 @@ static const char *bond_mode_name(void) } } -void bond_set_slave_inactive_flags(struct slave *slave) -{ - slave->state = BOND_STATE_BACKUP; - slave->dev->flags |= IFF_NOARP; -} - -void bond_set_slave_active_flags(struct slave *slave) -{ - slave->state = BOND_STATE_ACTIVE; - slave->dev->flags &= ~IFF_NOARP; -} - -/* - * This function detaches the slave from the list. - * WARNING: no check is made to verify if the slave effectively - * belongs to . - * Nothing is freed on return, structures are just unchained. - * If any slave pointer in bond was pointing to , - * it should be changed by the calling function. - * - * bond->lock held for writing by caller. - */ -static void bond_detach_slave(struct bonding *bond, struct slave *slave) -{ - if (slave->next) { - slave->next->prev = slave->prev; - } - - if (slave->prev) { - slave->prev->next = slave->next; - } - - if (bond->first_slave == slave) { /* slave is the first slave */ - if (bond->slave_cnt > 1) { /* there are more slave */ - bond->first_slave = slave->next; - } else { - bond->first_slave = NULL; /* slave was the last one */ - } - } - - slave->next = NULL; - slave->prev = NULL; - bond->slave_cnt--; -} - -/* - * This function attaches the slave to the end of list. - * - * bond->lock held for writing by caller. - */ -static void bond_attach_slave(struct bonding *bond, struct slave *new_slave) -{ - if (bond->first_slave == NULL) { /* attaching the first slave */ - new_slave->next = new_slave; - new_slave->prev = new_slave; - bond->first_slave = new_slave; - } else { - new_slave->next = bond->first_slave; - new_slave->prev = bond->first_slave->prev; - new_slave->next->prev = new_slave; - new_slave->prev->next = new_slave; - } - - bond->slave_cnt++; -} - -/* - * Less bad way to call ioctl from within the kernel; this needs to be - * done some other way to get the call out of interrupt context. - * Needs "ioctl" variable to be supplied by calling context. - */ -#define IOCTL(dev, arg, cmd) ({ \ - int res = 0; \ - mm_segment_t fs = get_fs(); \ - set_fs(get_ds()); \ - res = ioctl(dev, arg, cmd); \ - set_fs(fs); \ - res; }) +/*------------------------------- Link status -------------------------------*/ /* * Get link speed and duplex from the slave's base driver @@ -835,175 +737,68 @@ static int bond_check_dev_link(struct ne return (reporting ? -1 : BMSR_LSTATUS); } -/* register to receive lacpdus on a bond */ -static void bond_register_lacpdu(struct bonding *bond) -{ - struct packet_type *pk_type = &(BOND_AD_INFO(bond).ad_pkt_type); - - /* initialize packet type */ - pk_type->type = PKT_TYPE_LACPDU; - pk_type->dev = bond->dev; - pk_type->func = bond_3ad_lacpdu_recv; - pk_type->data = (void*)1; /* understand shared skbs */ - - dev_add_pack(pk_type); -} +/*----------------------------- Multicast list ------------------------------*/ -/* unregister to receive lacpdus on a bond */ -static void bond_unregister_lacpdu(struct bonding *bond) +/* + * Returns 0 if dmi1 and dmi2 are the same, non-0 otherwise + */ +static inline int bond_is_dmi_same(struct dev_mc_list *dmi1, struct dev_mc_list *dmi2) { - dev_remove_pack(&(BOND_AD_INFO(bond).ad_pkt_type)); + return memcmp(dmi1->dmi_addr, dmi2->dmi_addr, dmi1->dmi_addrlen) == 0 && + dmi1->dmi_addrlen == dmi2->dmi_addrlen; } -static int bond_open(struct net_device *bond_dev) +/* + * returns dmi entry if found, NULL otherwise + */ +static struct dev_mc_list *bond_mc_list_find_dmi(struct dev_mc_list *dmi, struct dev_mc_list *mc_list) { - struct bonding *bond = (struct bonding *)bond_dev->priv; - struct timer_list *timer = &bond->mii_timer; - struct timer_list *arp_timer = &bond->arp_timer; - - bond->kill_timers = 0; - - if ((bond_mode == BOND_MODE_TLB) || - (bond_mode == BOND_MODE_ALB)) { - struct timer_list *alb_timer = &(BOND_ALB_INFO(bond).alb_timer); + struct dev_mc_list *idmi; - /* bond_alb_initialize must be called before the timer - * is started. - */ - if (bond_alb_initialize(bond, (bond_mode == BOND_MODE_ALB))) { - /* something went wrong - fail the open operation */ - return -1; + for (idmi = mc_list; idmi; idmi = idmi->next) { + if (bond_is_dmi_same(dmi, idmi)) { + return idmi; } - - init_timer(alb_timer); - alb_timer->expires = jiffies + 1; - alb_timer->data = (unsigned long)bond; - alb_timer->function = (void *)&bond_alb_monitor; - add_timer(alb_timer); } - if (miimon) { /* link check interval, in milliseconds. */ - init_timer(timer); - timer->expires = jiffies + 1; - timer->data = (unsigned long)bond_dev; - timer->function = (void *)&bond_mii_monitor; - add_timer(timer); - } + return NULL; +} - if (arp_interval) { /* arp interval, in milliseconds. */ - init_timer(arp_timer); - arp_timer->expires = jiffies + 1; - arp_timer->data = (unsigned long)bond_dev; - if (bond_mode == BOND_MODE_ACTIVEBACKUP) { - arp_timer->function = (void *)&bond_activebackup_arp_mon; - } else { - arp_timer->function = (void *)&bond_loadbalance_arp_mon; +/* + * Push the promiscuity flag down to appropriate slaves + */ +static void bond_set_promiscuity(struct bonding *bond, int inc) +{ + if (USES_PRIMARY(bond_mode)) { + /* write lock already acquired */ + if (bond->curr_active_slave) { + dev_set_promiscuity(bond->curr_active_slave->dev, inc); + } + } else { + struct slave *slave; + int i; + bond_for_each_slave(bond, slave, i) { + dev_set_promiscuity(slave->dev, inc); } - add_timer(arp_timer); } - - if (bond_mode == BOND_MODE_8023AD) { - struct timer_list *ad_timer = &(BOND_AD_INFO(bond).ad_timer); - init_timer(ad_timer); - ad_timer->expires = jiffies + 1; - ad_timer->data = (unsigned long)bond; - ad_timer->function = (void *)&bond_3ad_state_machine_handler; - add_timer(ad_timer); - - /* register to receive LACPDUs */ - bond_register_lacpdu(bond); - } - - return 0; -} - -static int bond_close(struct net_device *bond_dev) -{ - struct bonding *bond = (struct bonding *)bond_dev->priv; - - write_lock_bh(&bond->lock); - - bond_mc_list_destroy(bond); - - if (bond_mode == BOND_MODE_8023AD) { - /* Unregister the receive of LACPDUs */ - bond_unregister_lacpdu(bond); - } - - /* signal timers not to re-arm */ - bond->kill_timers = 1; - - write_unlock_bh(&bond->lock); - - /* del_timer_sync must run without holding the bond->lock - * because a running timer might be trying to hold it too - */ - - if (miimon) { /* link check interval, in milliseconds. */ - del_timer_sync(&bond->mii_timer); - } - - if (arp_interval) { /* arp interval, in milliseconds. */ - del_timer_sync(&bond->arp_timer); - } - - switch (bond_mode) { - case BOND_MODE_8023AD: - del_timer_sync(&(BOND_AD_INFO(bond).ad_timer)); - break; - case BOND_MODE_TLB: - case BOND_MODE_ALB: - del_timer_sync(&(BOND_ALB_INFO(bond).alb_timer)); - break; - default: - break; - } - - /* Release the bonded slaves */ - bond_release_all(bond_dev); - - if ((bond_mode == BOND_MODE_TLB) || - (bond_mode == BOND_MODE_ALB)) { - /* Must be called only after all - * slaves have been released - */ - bond_alb_deinitialize(bond); - } - - return 0; -} - -/* - * flush all members of flush->mc_list from device dev->mc_list - */ -static void bond_mc_list_flush(struct net_device *slave_dev, struct net_device *bond_dev) -{ - struct dev_mc_list *dmi; - - for (dmi = bond_dev->mc_list; dmi; dmi = dmi->next) { - dev_mc_delete(slave_dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); - } - - if (bond_mode == BOND_MODE_8023AD) { - /* del lacpdu mc addr from mc list */ - u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR; - - dev_mc_delete(slave_dev, lacpdu_multicast, ETH_ALEN, 0); - } -} +} /* - * Totally destroys the mc_list in bond + * Push the allmulti flag down to all slaves */ -static void bond_mc_list_destroy(struct bonding *bond) +static void bond_set_allmulti(struct bonding *bond, int inc) { - struct dev_mc_list *dmi; - - dmi = bond->mc_list; - while (dmi) { - bond->mc_list = dmi->next; - kfree(dmi); - dmi = bond->mc_list; + if (USES_PRIMARY(bond_mode)) { + /* write lock already acquired */ + if (bond->curr_active_slave) { + dev_set_allmulti(bond->curr_active_slave->dev, inc); + } + } else { + struct slave *slave; + int i; + bond_for_each_slave(bond, slave, i) { + dev_set_allmulti(slave->dev, inc); + } } } @@ -1048,6 +843,21 @@ static void bond_mc_delete(struct bondin } /* + * Totally destroys the mc_list in bond + */ +static void bond_mc_list_destroy(struct bonding *bond) +{ + struct dev_mc_list *dmi; + + dmi = bond->mc_list; + while (dmi) { + bond->mc_list = dmi->next; + kfree(dmi); + dmi = bond->mc_list; + } +} + +/* * Copy all the Multicast addresses from src to the bonding device dst */ static int bond_mc_list_copy(struct dev_mc_list *mc_list, struct bonding *bond, int gpf_flag) @@ -1074,118 +884,26 @@ static int bond_mc_list_copy(struct dev_ } /* - * Returns 0 if dmi1 and dmi2 are the same, non-0 otherwise - */ -static inline int bond_is_dmi_same(struct dev_mc_list *dmi1, struct dev_mc_list *dmi2) -{ - return memcmp(dmi1->dmi_addr, dmi2->dmi_addr, dmi1->dmi_addrlen) == 0 && - dmi1->dmi_addrlen == dmi2->dmi_addrlen; -} - -/* - * Push the promiscuity flag down to appropriate slaves - */ -static void bond_set_promiscuity(struct bonding *bond, int inc) -{ - if (USES_PRIMARY(bond_mode)) { - /* write lock already acquired */ - if (bond->curr_active_slave) { - dev_set_promiscuity(bond->curr_active_slave->dev, inc); - } - } else { - struct slave *slave; - int i; - bond_for_each_slave(bond, slave, i) { - dev_set_promiscuity(slave->dev, inc); - } - } -} - -/* - * Push the allmulti flag down to all slaves - */ -static void bond_set_allmulti(struct bonding *bond, int inc) -{ - if (USES_PRIMARY(bond_mode)) { - /* write lock already acquired */ - if (bond->curr_active_slave) { - dev_set_allmulti(bond->curr_active_slave->dev, inc); - } - } else { - struct slave *slave; - int i; - bond_for_each_slave(bond, slave, i) { - dev_set_allmulti(slave->dev, inc); - } - } -} - -/* - * returns dmi entry if found, NULL otherwise + * flush all members of flush->mc_list from device dev->mc_list */ -static struct dev_mc_list *bond_mc_list_find_dmi(struct dev_mc_list *dmi, struct dev_mc_list *mc_list) -{ - struct dev_mc_list *idmi; - - for (idmi = mc_list; idmi; idmi = idmi->next) { - if (bond_is_dmi_same(dmi, idmi)) { - return idmi; - } - } - - return NULL; -} - -static void bond_set_multicast_list(struct net_device *bond_dev) +static void bond_mc_list_flush(struct net_device *bond_dev, struct net_device *slave_dev) { - struct bonding *bond = (struct bonding *)bond_dev->priv; struct dev_mc_list *dmi; - write_lock_bh(&bond->lock); - - /* - * Do promisc before checking multicast_mode - */ - if ((bond_dev->flags & IFF_PROMISC) && !(bond->flags & IFF_PROMISC)) { - bond_set_promiscuity(bond, 1); - } - - if (!(bond_dev->flags & IFF_PROMISC) && (bond->flags & IFF_PROMISC)) { - bond_set_promiscuity(bond, -1); - } - - /* set allmulti flag to slaves */ - if ((bond_dev->flags & IFF_ALLMULTI) && !(bond->flags & IFF_ALLMULTI)) { - bond_set_allmulti(bond, 1); - } - - if (!(bond_dev->flags & IFF_ALLMULTI) && (bond->flags & IFF_ALLMULTI)) { - bond_set_allmulti(bond, -1); - } - - bond->flags = bond_dev->flags; - - /* looking for addresses to add to slaves' mc list */ for (dmi = bond_dev->mc_list; dmi; dmi = dmi->next) { - if (!bond_mc_list_find_dmi(dmi, bond->mc_list)) { - bond_mc_add(bond, dmi->dmi_addr, dmi->dmi_addrlen); - } - } - - /* looking for addresses to delete from slaves' list */ - for (dmi = bond->mc_list; dmi; dmi = dmi->next) { - if (!bond_mc_list_find_dmi(dmi, bond_dev->mc_list)) { - bond_mc_delete(bond, dmi->dmi_addr, dmi->dmi_addrlen); - } + dev_mc_delete(slave_dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); } - /* save master's multicast list */ - bond_mc_list_destroy(bond); - bond_mc_list_copy(bond_dev->mc_list, bond, GFP_ATOMIC); + if (bond_mode == BOND_MODE_8023AD) { + /* del lacpdu mc addr from mc list */ + u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR; - write_unlock_bh(&bond->lock); + dev_mc_delete(slave_dev, lacpdu_multicast, ETH_ALEN, 0); + } } +/*--------------------------- Active slave change ---------------------------*/ + /* * Update the mc list and multicast-related flags for the new and * old active slaves (if any) according to the multicast mode, and @@ -1231,18 +949,239 @@ static void bond_mc_swap(struct bonding } } -/* enslave device to bond device */ -static int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) +/** + * find_best_interface - select the best available slave to be the active one + * @bond: our bonding struct + * + * Warning: Caller must hold curr_slave_lock for writing. + */ +static struct slave *bond_find_best_slave(struct bonding *bond) { - struct bonding *bond = (struct bonding *)bond_dev->priv; - struct slave *new_slave = NULL; - struct dev_mc_list *dmi; - int link_reporting; - struct sockaddr addr; - int res = 0; + struct slave *new_active, *old_active; + struct slave *bestslave = NULL; + int mintime; + int i; - if (slave_dev->do_ioctl == NULL) { - printk(KERN_WARNING DRV_NAME + new_active = old_active = bond->curr_active_slave; + + if (!new_active) { /* there were no active slaves left */ + if (bond->slave_cnt > 0) { /* found one slave */ + new_active = bond->first_slave; + } else { + return NULL; /* still no slave, return NULL */ + } + } + + mintime = updelay; + + /* first try the primary link; if arping, a link must tx/rx traffic + * before it can be considered the curr_active_slave - also, we would skip + * slaves between the curr_active_slave and primary_slave that may be up + * and able to arp + */ + if ((bond->primary_slave) && + (!arp_interval) && + (IS_UP(bond->primary_slave->dev))) { + new_active = bond->primary_slave; + } + + /* remember where to stop iterating over the slaves */ + old_active = new_active; + + bond_for_each_slave_from(bond, new_active, i, old_active) { + if (IS_UP(new_active->dev)) { + if (new_active->link == BOND_LINK_UP) { + return new_active; + } else if (new_active->link == BOND_LINK_BACK) { + /* link up, but waiting for stabilization */ + if (new_active->delay < mintime) { + mintime = new_active->delay; + bestslave = new_active; + } + } + } + } + + return bestslave; +} + +/** + * change_active_interface - change the active slave into the specified one + * @bond: our bonding struct + * @new: the new slave to make the active one + * + * Set the new slave to the bond's settings and unset them on the old + * curr_active_slave. + * Setting include flags, mc-list, promiscuity, allmulti, etc. + * + * If @new's link state is %BOND_LINK_BACK we'll set it to %BOND_LINK_UP, + * because it is apparently the best available slave we have, even though its + * updelay hasn't timed out yet. + * + * Warning: Caller must hold curr_slave_lock for writing. + */ +static void bond_change_active_slave(struct bonding *bond, struct slave *new_active) +{ + struct slave *old_active = bond->curr_active_slave; + + if (old_active == new_active) { + return; + } + + if (new_active) { + if (new_active->link == BOND_LINK_BACK) { + if (USES_PRIMARY(bond_mode)) { + printk(KERN_INFO DRV_NAME + ": %s: making interface %s the new " + "active one %d ms earlier.\n", + bond->dev->name, new_active->dev->name, + (updelay - new_active->delay) * miimon); + } + + new_active->delay = 0; + new_active->link = BOND_LINK_UP; + new_active->jiffies = jiffies; + + if (bond_mode == BOND_MODE_8023AD) { + bond_3ad_handle_link_change(new_active, BOND_LINK_UP); + } + + if ((bond_mode == BOND_MODE_TLB) || + (bond_mode == BOND_MODE_ALB)) { + bond_alb_handle_link_change(bond, new_active, BOND_LINK_UP); + } + } else { + if (USES_PRIMARY(bond_mode)) { + printk(KERN_INFO DRV_NAME + ": %s: making interface %s the new " + "active one.\n", + bond->dev->name, new_active->dev->name); + } + } + } + + if (bond_mode == BOND_MODE_ACTIVEBACKUP) { + if (old_active) { + bond_set_slave_inactive_flags(old_active); + } + + if (new_active) { + bond_set_slave_active_flags(new_active); + } + } + + if (USES_PRIMARY(bond_mode)) { + bond_mc_swap(bond, new_active, old_active); + } + + if ((bond_mode == BOND_MODE_TLB) || + (bond_mode == BOND_MODE_ALB)) { + bond_alb_handle_active_change(bond, new_active); + } else { + bond->curr_active_slave = new_active; + } +} + +/** + * bond_select_active_slave - select a new active slave, if needed + * @bond: our bonding struct + * + * This functions shoud be called when one of the following occurs: + * - The old curr_active_slave has been released or lost its link. + * - The primary_slave has got its link back. + * - A slave has got its link back and there's no old curr_active_slave. + * + * Warning: Caller must hold curr_slave_lock for writing. + */ +static void bond_select_active_slave(struct bonding *bond) +{ + struct slave *best_slave; + + best_slave = bond_find_best_slave(bond); + if (best_slave != bond->curr_active_slave) { + bond_change_active_slave(bond, best_slave); + } +} + +/*--------------------------- slave list handling ---------------------------*/ + +/* + * This function attaches the slave to the end of list. + * + * bond->lock held for writing by caller. + */ +static void bond_attach_slave(struct bonding *bond, struct slave *new_slave) +{ + if (bond->first_slave == NULL) { /* attaching the first slave */ + new_slave->next = new_slave; + new_slave->prev = new_slave; + bond->first_slave = new_slave; + } else { + new_slave->next = bond->first_slave; + new_slave->prev = bond->first_slave->prev; + new_slave->next->prev = new_slave; + new_slave->prev->next = new_slave; + } + + bond->slave_cnt++; +} + +/* + * This function detaches the slave from the list. + * WARNING: no check is made to verify if the slave effectively + * belongs to . + * Nothing is freed on return, structures are just unchained. + * If any slave pointer in bond was pointing to , + * it should be changed by the calling function. + * + * bond->lock held for writing by caller. + */ +static void bond_detach_slave(struct bonding *bond, struct slave *slave) +{ + if (slave->next) { + slave->next->prev = slave->prev; + } + + if (slave->prev) { + slave->prev->next = slave->next; + } + + if (bond->first_slave == slave) { /* slave is the first slave */ + if (bond->slave_cnt > 1) { /* there are more slave */ + bond->first_slave = slave->next; + } else { + bond->first_slave = NULL; /* slave was the last one */ + } + } + + slave->next = NULL; + slave->prev = NULL; + bond->slave_cnt--; +} + +/*---------------------------------- IOCTL ----------------------------------*/ + +static int bond_sethwaddr(struct net_device *bond_dev, struct net_device *slave_dev) +{ + dprintk("bond_dev=%p\n", bond_dev); + dprintk("slave_dev=%p\n", slave_dev); + dprintk("slave_dev->addr_len=%d\n", slave_dev->addr_len); + memcpy(bond_dev->dev_addr, slave_dev->dev_addr, slave_dev->addr_len); + return 0; +} + +/* enslave device to bond device */ +static int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) +{ + struct bonding *bond = (struct bonding *)bond_dev->priv; + struct slave *new_slave = NULL; + struct dev_mc_list *dmi; + struct sockaddr addr; + int link_reporting; + int res = 0; + + if (slave_dev->do_ioctl == NULL) { + printk(KERN_WARNING DRV_NAME ": Warning : no link monitoring support for %s\n", slave_dev->name); } @@ -1321,10 +1260,10 @@ static int bond_enslave(struct net_devic */ memcpy(new_slave->perm_hwaddr, slave_dev->dev_addr, ETH_ALEN); - /* set slave to master's mac address - * The application already set the master's - * mac address to that of the first slave - */ + /* set slave to master's mac address + * The application already set the master's + * mac address to that of the first slave + */ memcpy(addr.sa_data, bond_dev->dev_addr, bond_dev->addr_len); addr.sa_family = slave_dev->type; res = slave_dev->set_mac_address(slave_dev, &addr); @@ -1511,7 +1450,7 @@ static int bond_enslave(struct net_devic lacp_fast); } else { SLAVE_AD_INFO(new_slave).id = - SLAVE_AD_INFO(new_slave->prev).id + 1; + SLAVE_AD_INFO(new_slave->prev).id + 1; } bond_3ad_bind_slave(new_slave); @@ -1605,235 +1544,29 @@ err_free: } /* - * This function changes the active slave to slave . - * It returns -EINVAL in the following cases. - * - is not found in the list. - * - There is not active slave now. - * - is already active. - * - The link state of is not BOND_LINK_UP. - * - is not running. - * In these cases, this fuction does nothing. - * In the other cases, currnt_slave pointer is changed and 0 is returned. + * Try to release the slave device from the bond device + * It is legal to access curr_active_slave without a lock because all the function + * is write-locked. + * + * The rules for slave state should be: + * for Active/Backup: + * Active stays on all backups go down + * for Bonded connections: + * The first up interface should be left on and all others downed. */ -static int bond_ioctl_change_active(struct net_device *bond_dev, struct net_device *slave_dev) +static int bond_release(struct net_device *bond_dev, struct net_device *slave_dev) { struct bonding *bond = (struct bonding *)bond_dev->priv; - struct slave *old_active = NULL; - struct slave *new_active = NULL; - int res = 0; + struct slave *slave; + struct sockaddr addr; + int mac_addr_differ; - /* Verify that master_dev is indeed the master of slave_dev */ + /* slave is not a slave or master is not master of this slave */ if (!(slave_dev->flags & IFF_SLAVE) || (slave_dev->master != bond_dev)) { - - return -EINVAL; - } - - write_lock_bh(&bond->lock); - - old_active = bond->curr_active_slave; - new_active = bond_get_slave_by_dev(bond, slave_dev); - - /* - * Changing to the current active: do nothing; return success. - */ - if (new_active && (new_active == old_active)) { - write_unlock_bh(&bond->lock); - return 0; - } - - if ((new_active) && - (old_active) && - (new_active->link == BOND_LINK_UP) && - IS_UP(new_active->dev)) { - bond_change_active_slave(bond, new_active); - } else { - res = -EINVAL; - } - - write_unlock_bh(&bond->lock); - - return res; -} - -/** - * find_best_interface - select the best available slave to be the active one - * @bond: our bonding struct - * - * Warning: Caller must hold curr_slave_lock for writing. - */ -static struct slave *bond_find_best_slave(struct bonding *bond) -{ - struct slave *new_active, *old_active; - struct slave *bestslave = NULL; - int mintime; - int i; - - new_active = old_active = bond->curr_active_slave; - - if (!new_active) { /* there were no active slaves left */ - if (bond->slave_cnt > 0) { /* found one slave */ - new_active = bond->first_slave; - } else { - return NULL; /* still no slave, return NULL */ - } - } - - mintime = updelay; - - /* first try the primary link; if arping, a link must tx/rx traffic - * before it can be considered the curr_active_slave - also, we would skip - * slaves between the curr_active_slave and primary_slave that may be up - * and able to arp - */ - if ((bond->primary_slave) && - (!arp_interval) && - (IS_UP(bond->primary_slave->dev))) { - new_active = bond->primary_slave; - } - - /* remember where to stop iterating over the slaves */ - old_active = new_active; - - bond_for_each_slave_from(bond, new_active, i, old_active) { - if (IS_UP(new_active->dev)) { - if (new_active->link == BOND_LINK_UP) { - return new_active; - } else if (new_active->link == BOND_LINK_BACK) { - /* link up, but waiting for stabilization */ - if (new_active->delay < mintime) { - mintime = new_active->delay; - bestslave = new_active; - } - } - } - } - - return bestslave; -} - -/** - * change_active_interface - change the active slave into the specified one - * @bond: our bonding struct - * @new: the new slave to make the active one - * - * Set the new slave to the bond's settings and unset them on the old - * curr_active_slave. - * Setting include flags, mc-list, promiscuity, allmulti, etc. - * - * If @new's link state is %BOND_LINK_BACK we'll set it to %BOND_LINK_UP, - * because it is apparently the best available slave we have, even though its - * updelay hasn't timed out yet. - * - * Warning: Caller must hold curr_slave_lock for writing. - */ -static void bond_change_active_slave(struct bonding *bond, struct slave *new_active) -{ - struct slave *old_active = bond->curr_active_slave; - - if (old_active == new_active) { - return; - } - - if (new_active) { - if (new_active->link == BOND_LINK_BACK) { - if (USES_PRIMARY(bond_mode)) { - printk(KERN_INFO DRV_NAME - ": %s: making interface %s the new " - "active one %d ms earlier.\n", - bond->dev->name, new_active->dev->name, - (updelay - new_active->delay) * miimon); - } - - new_active->delay = 0; - new_active->link = BOND_LINK_UP; - new_active->jiffies = jiffies; - - if (bond_mode == BOND_MODE_8023AD) { - bond_3ad_handle_link_change(new_active, BOND_LINK_UP); - } - - if ((bond_mode == BOND_MODE_TLB) || - (bond_mode == BOND_MODE_ALB)) { - bond_alb_handle_link_change(bond, new_active, BOND_LINK_UP); - } - } else { - if (USES_PRIMARY(bond_mode)) { - printk(KERN_INFO DRV_NAME - ": %s: making interface %s the new " - "active one.\n", - bond->dev->name, new_active->dev->name); - } - } - } - - if (bond_mode == BOND_MODE_ACTIVEBACKUP) { - if (old_active) { - bond_set_slave_inactive_flags(old_active); - } - - if (new_active) { - bond_set_slave_active_flags(new_active); - } - } - - if (USES_PRIMARY(bond_mode)) { - bond_mc_swap(bond, new_active, old_active); - } - - if ((bond_mode == BOND_MODE_TLB) || - (bond_mode == BOND_MODE_ALB)) { - bond_alb_handle_active_change(bond, new_active); - } else { - bond->curr_active_slave = new_active; - } -} - -/** - * reselect_active_interface - select a new active slave, if needed - * @bond: our bonding struct - * - * This functions shoud be called when one of the following occurs: - * - The old curr_active_slave has been released or lost its link. - * - The primary_slave has got its link back. - * - A slave has got its link back and there's no old curr_active_slave. - * - * Warning: Caller must hold curr_slave_lock for writing. - */ -static void bond_select_active_slave(struct bonding *bond) -{ - struct slave *best_slave; - - best_slave = bond_find_best_slave(bond); - if (best_slave != bond->curr_active_slave) { - bond_change_active_slave(bond, best_slave); - } -} - -/* - * Try to release the slave device from the bond device - * It is legal to access curr_active_slave without a lock because all the function - * is write-locked. - * - * The rules for slave state should be: - * for Active/Backup: - * Active stays on all backups go down - * for Bonded connections: - * The first up interface should be left on and all others downed. - */ -static int bond_release(struct net_device *bond_dev, struct net_device *slave_dev) -{ - struct bonding *bond = (struct bonding *)bond_dev->priv; - struct slave *slave; - struct sockaddr addr; - int mac_addr_differ; - - /* slave is not a slave or master is not master of this slave */ - if (!(slave_dev->flags & IFF_SLAVE) || - (slave_dev->master != bond_dev)) { - printk(KERN_ERR DRV_NAME - ": Error: %s: cannot release %s.\n", - bond_dev->name, slave_dev->name); + printk(KERN_ERR DRV_NAME + ": Error: %s: cannot release %s.\n", + bond_dev->name, slave_dev->name); return -EINVAL; } @@ -1898,7 +1631,7 @@ static int bond_release(struct net_devic bond_select_active_slave(bond); } - if (bond->curr_active_slave == NULL) { + if (!bond->curr_active_slave) { printk(KERN_INFO DRV_NAME ": %s: now running without any active " "interface !\n", @@ -1932,7 +1665,7 @@ static int bond_release(struct net_devic } /* flush master's mc_list from slave */ - bond_mc_list_flush (slave_dev, bond_dev); + bond_mc_list_flush(bond_dev, slave_dev); } netdev_set_master(slave_dev, NULL); @@ -2029,7 +1762,7 @@ static int bond_release_all(struct net_d } /* flush master's mc_list from slave */ - bond_mc_list_flush(slave_dev, bond_dev); + bond_mc_list_flush(bond_dev, slave_dev); } netdev_set_master(slave_dev, NULL); @@ -2073,40 +1806,190 @@ out: return 0; } -/* this function is called regularly to monitor each slave's link. */ -static void bond_mii_monitor(struct net_device *bond_dev) +/* + * This function changes the active slave to slave . + * It returns -EINVAL in the following cases. + * - is not found in the list. + * - There is not active slave now. + * - is already active. + * - The link state of is not BOND_LINK_UP. + * - is not running. + * In these cases, this fuction does nothing. + * In the other cases, currnt_slave pointer is changed and 0 is returned. + */ +static int bond_ioctl_change_active(struct net_device *bond_dev, struct net_device *slave_dev) { struct bonding *bond = (struct bonding *)bond_dev->priv; - struct slave *slave, *oldcurrent; - int slave_died = 0; - int do_failover = 0; - int delta_in_ticks = miimon * HZ / 1000; - int i; - - read_lock(&bond->lock); + struct slave *old_active = NULL; + struct slave *new_active = NULL; + int res = 0; - if (bond->kill_timers) { - goto out; + /* Verify that master_dev is indeed the master of slave_dev */ + if (!(slave_dev->flags & IFF_SLAVE) || + (slave_dev->master != bond_dev)) { + return -EINVAL; } - if (bond->slave_cnt == 0) { - goto re_arm; - } + write_lock_bh(&bond->lock); - /* we will try to read the link status of each of our slaves, and - * set their IFF_RUNNING flag appropriately. For each slave not - * supporting MII status, we won't do anything so that a user-space - * program could monitor the link itself if needed. - */ + old_active = bond->curr_active_slave; + new_active = bond_get_slave_by_dev(bond, slave_dev); - read_lock(&bond->curr_slave_lock); - oldcurrent = bond->curr_active_slave; - read_unlock(&bond->curr_slave_lock); + /* + * Changing to the current active: do nothing; return success. + */ + if (new_active && (new_active == old_active)) { + write_unlock_bh(&bond->lock); + return 0; + } - bond_for_each_slave(bond, slave, i) { - struct net_device *slave_dev = slave->dev; - int link_state; - u16 old_speed = slave->speed; + if ((new_active) && + (old_active) && + (new_active->link == BOND_LINK_UP) && + IS_UP(new_active->dev)) { + bond_change_active_slave(bond, new_active); + } else { + res = -EINVAL; + } + + write_unlock_bh(&bond->lock); + + return res; +} + +static int bond_ethtool_ioctl(struct net_device *bond_dev, struct ifreq *ifr) +{ + struct ethtool_drvinfo info; + void *addr = ifr->ifr_data; + uint32_t cmd; + + if (get_user(cmd, (uint32_t *)addr)) { + return -EFAULT; + } + + switch (cmd) { + case ETHTOOL_GDRVINFO: + if (copy_from_user(&info, addr, sizeof(info))) { + return -EFAULT; + } + + if (strcmp(info.driver, "ifenslave") == 0) { + int new_abi_ver; + char *endptr; + + new_abi_ver = simple_strtoul(info.fw_version, + &endptr, 0); + if (*endptr) { + printk(KERN_ERR DRV_NAME + ": Error: got invalid ABI " + "version from application\n"); + + return -EINVAL; + } + + if (orig_app_abi_ver == -1) { + orig_app_abi_ver = new_abi_ver; + } + + app_abi_ver = new_abi_ver; + } + + strncpy(info.driver, DRV_NAME, 32); + strncpy(info.version, DRV_VERSION, 32); + snprintf(info.fw_version, 32, "%d", BOND_ABI_VERSION); + + if (copy_to_user(addr, &info, sizeof(info))) { + return -EFAULT; + } + + return 0; + default: + return -EOPNOTSUPP; + } +} + +static int bond_info_query(struct net_device *bond_dev, struct ifbond *info) +{ + struct bonding *bond = (struct bonding *)bond_dev->priv; + + info->bond_mode = bond_mode; + info->miimon = miimon; + + read_lock_bh(&bond->lock); + info->num_slaves = bond->slave_cnt; + read_unlock_bh(&bond->lock); + + return 0; +} + +static int bond_slave_info_query(struct net_device *bond_dev, struct ifslave *info) +{ + struct bonding *bond = (struct bonding *)bond_dev->priv; + struct slave *slave; + int i, found = 0; + + if (info->slave_id < 0) { + return -ENODEV; + } + + read_lock_bh(&bond->lock); + + bond_for_each_slave(bond, slave, i) { + if (i == (int)info->slave_id) { + found = 1; + break; + } + } + + read_unlock_bh(&bond->lock); + + if (found) { + strcpy(info->slave_name, slave->dev->name); + info->link = slave->link; + info->state = slave->state; + info->link_failure_count = slave->link_failure_count; + } else { + return -ENODEV; + } + + return 0; +} + +/*-------------------------------- Monitoring -------------------------------*/ + +/* this function is called regularly to monitor each slave's link. */ +static void bond_mii_monitor(struct net_device *bond_dev) +{ + struct bonding *bond = (struct bonding *)bond_dev->priv; + struct slave *slave, *oldcurrent; + int do_failover = 0; + int delta_in_ticks = miimon * HZ / 1000; + int i; + + read_lock(&bond->lock); + + if (bond->kill_timers) { + goto out; + } + + if (bond->slave_cnt == 0) { + goto re_arm; + } + + /* we will try to read the link status of each of our slaves, and + * set their IFF_RUNNING flag appropriately. For each slave not + * supporting MII status, we won't do anything so that a user-space + * program could monitor the link itself if needed. + */ + + read_lock(&bond->curr_slave_lock); + oldcurrent = bond->curr_active_slave; + read_unlock(&bond->curr_slave_lock); + + bond_for_each_slave(bond, slave, i) { + struct net_device *slave_dev = slave->dev; + int link_state; + u16 old_speed = slave->speed; u8 old_duplex = slave->duplex; link_state = bond_check_dev_link(slave_dev, 0); @@ -2161,7 +2044,7 @@ static void bond_mii_monitor(struct net_ printk(KERN_INFO DRV_NAME ": %s: link status definitely " "down for interface %s, " - "disabling it", + "disabling it\n", bond_dev->name, slave_dev->name); @@ -2178,8 +2061,6 @@ static void bond_mii_monitor(struct net_ if (slave == oldcurrent) { do_failover = 1; } - - slave_died = 1; } else { slave->delay--; } @@ -2297,6 +2178,7 @@ static void bond_mii_monitor(struct net_ write_lock(&bond->curr_slave_lock); bond_select_active_slave(bond); + if (oldcurrent && !bond->curr_active_slave) { printk(KERN_INFO DRV_NAME ": %s: now running without any active " @@ -2313,6 +2195,17 @@ out: read_unlock(&bond->lock); } +static void bond_arp_send_all(struct slave *slave) +{ + int i; + + for (i = 0; (idev, + my_ip, NULL, slave->dev->dev_addr, + NULL); + } +} + /* * this function is called regularly to monitor each slave's link * ensuring that traffic is being sent and received when arp monitoring @@ -2325,7 +2218,7 @@ static void bond_loadbalance_arp_mon(str struct bonding *bond = (struct bonding *)bond_dev->priv; struct slave *slave, *oldcurrent; int do_failover = 0; - int delta_in_ticks = arp_interval * HZ / 1000; + int delta_in_ticks = arp_interval * HZ / 1000; int i; read_lock(&bond->lock); @@ -2458,7 +2351,7 @@ static void bond_activebackup_arp_mon(st { struct bonding *bond = (struct bonding *)bond_dev->priv; struct slave *slave; - int delta_in_ticks = arp_interval * HZ / 1000; + int delta_in_ticks = arp_interval * HZ / 1000; int i; read_lock(&bond->lock); @@ -2579,7 +2472,7 @@ static void bond_activebackup_arp_mon(st printk(KERN_INFO DRV_NAME ": %s: link status down for active interface " - "%s, disabling it", + "%s, disabling it\n", bond_dev->name, slave->dev->name); @@ -2630,8 +2523,7 @@ static void bond_activebackup_arp_mon(st * for becoming the curr_active_slave */ if (!slave) { - - if (bond->current_arp_slave == NULL) { + if (!bond->current_arp_slave) { bond->current_arp_slave = bond->first_slave; } @@ -2680,473 +2572,532 @@ out: read_unlock(&bond->lock); } -static int bond_sethwaddr(struct net_device *bond_dev, struct net_device *slave_dev) -{ - dprintk("bond_dev=%p\n", bond_dev); - dprintk("slave_dev=%p\n", slave_dev); - dprintk("slave_dev->addr_len=%d\n", slave_dev->addr_len); - memcpy(bond_dev->dev_addr, slave_dev->dev_addr, slave_dev->addr_len); - return 0; -} - -static int bond_info_query(struct net_device *bond_dev, struct ifbond *info) -{ - struct bonding *bond = (struct bonding *)bond_dev->priv; - - info->bond_mode = bond_mode; - info->miimon = miimon; +/*------------------------------ proc/seq_file-------------------------------*/ - read_lock_bh(&bond->lock); - info->num_slaves = bond->slave_cnt; - read_unlock_bh(&bond->lock); +#ifdef CONFIG_PROC_FS - return 0; -} +#define SEQ_START_TOKEN ((void *)1) -static int bond_slave_info_query(struct net_device *bond_dev, struct ifslave *info) +static void *bond_info_seq_start(struct seq_file *seq, loff_t *pos) { - struct bonding *bond = (struct bonding *)bond_dev->priv; + struct bonding *bond = seq->private; + loff_t off = 0; struct slave *slave; - int i, found = 0; - - if (info->slave_id < 0) { - return -ENODEV; - } + int i; + /* make sure the bond won't be taken away */ + read_lock(&dev_base_lock); read_lock_bh(&bond->lock); - bond_for_each_slave(bond, slave, i) { - if (i == (int)info->slave_id) { - found = 1; - break; - } + if (*pos == 0) { + return SEQ_START_TOKEN; } - read_unlock_bh(&bond->lock); - - if (found) { - strcpy(info->slave_name, slave->dev->name); - info->link = slave->link; - info->state = slave->state; - info->link_failure_count = slave->link_failure_count; - } else { - return -ENODEV; + bond_for_each_slave(bond, slave, i) { + if (++off == *pos) { + return slave; + } } - return 0; + return NULL; } -static int bond_ethtool_ioctl(struct net_device *bond_dev, struct ifreq *ifr) +static void *bond_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - void *addr = ifr->ifr_data; - uint32_t cmd; - struct ethtool_drvinfo info; - char *endptr; + struct bonding *bond = seq->private; + struct slave *slave = v; - if (get_user(cmd, (uint32_t *)addr)) { - return -EFAULT; + ++*pos; + if (v == SEQ_START_TOKEN) { + return bond->first_slave; } - switch (cmd) { - case ETHTOOL_GDRVINFO: - if (copy_from_user(&info, addr, sizeof(info))) { - return -EFAULT; - } - - if (strcmp(info.driver, "ifenslave") == 0) { - int new_abi_ver; + slave = slave->next; - new_abi_ver = simple_strtoul(info.fw_version, - &endptr, 0); - if (*endptr) { - printk(KERN_ERR DRV_NAME - ": Error: got invalid ABI " - "version from application\n"); + return (slave == bond->first_slave) ? NULL : slave; +} - return -EINVAL; - } +static void bond_info_seq_stop(struct seq_file *seq, void *v) +{ + struct bonding *bond = seq->private; - if (orig_app_abi_ver == -1) { - orig_app_abi_ver = new_abi_ver; - } + read_unlock_bh(&bond->lock); + read_unlock(&dev_base_lock); +} - app_abi_ver = new_abi_ver; - } +static void bond_info_show_master(struct seq_file *seq, struct bonding *bond) +{ + struct slave *curr; - strncpy(info.driver, DRV_NAME, 32); - strncpy(info.version, DRV_VERSION, 32); - snprintf(info.fw_version, 32, "%d", BOND_ABI_VERSION); + read_lock(&bond->curr_slave_lock); + curr = bond->curr_active_slave; + read_unlock(&bond->curr_slave_lock); - if (copy_to_user(addr, &info, sizeof(info))) { - return -EFAULT; + seq_printf(seq, "Bonding Mode: %s\n", bond_mode_name()); + + if (USES_PRIMARY(bond_mode)) { + if (curr) { + seq_printf(seq, + "Currently Active Slave: %s\n", + curr->dev->name); } + } - return 0; - default: - return -EOPNOTSUPP; + seq_printf(seq, "MII Status: %s\n", (curr) ? "up" : "down"); + seq_printf(seq, "MII Polling Interval (ms): %d\n", miimon); + seq_printf(seq, "Up Delay (ms): %d\n", updelay * miimon); + seq_printf(seq, "Down Delay (ms): %d\n", downdelay * miimon); + + if (bond_mode == BOND_MODE_8023AD) { + struct ad_info ad_info; + + seq_puts(seq, "\n802.3ad info\n"); + + if (bond_3ad_get_active_agg_info(bond, &ad_info)) { + seq_printf(seq, "bond %s has no active aggregator\n", + bond->dev->name); + } else { + seq_printf(seq, "Active Aggregator Info:\n"); + + seq_printf(seq, "\tAggregator ID: %d\n", + ad_info.aggregator_id); + seq_printf(seq, "\tNumber of ports: %d\n", + ad_info.ports); + seq_printf(seq, "\tActor Key: %d\n", + ad_info.actor_key); + seq_printf(seq, "\tPartner Key: %d\n", + ad_info.partner_key); + seq_printf(seq, "\tPartner Mac Address: %02x:%02x:%02x:%02x:%02x:%02x\n", + ad_info.partner_system[0], + ad_info.partner_system[1], + ad_info.partner_system[2], + ad_info.partner_system[3], + ad_info.partner_system[4], + ad_info.partner_system[5]); + } } } -static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd) +static void bond_info_show_slave(struct seq_file *seq, const struct slave *slave) { - struct net_device *slave_dev = NULL; - struct ifbond *u_binfo = NULL, k_binfo; - struct ifslave *u_sinfo = NULL, k_sinfo; - struct mii_ioctl_data *mii = NULL; - int prev_abi_ver = orig_app_abi_ver; - int res = 0; + seq_printf(seq, "\nSlave Interface: %s\n", slave->dev->name); + seq_printf(seq, "MII Status: %s\n", + (slave->link == BOND_LINK_UP) ? "up" : "down"); + seq_printf(seq, "Link Failure Count: %d\n", + slave->link_failure_count); - dprintk("bond_ioctl: master=%s, cmd=%d\n", - bond_dev->name, cmd); + if (app_abi_ver >= 1) { + seq_printf(seq, + "Permanent HW addr: %02x:%02x:%02x:%02x:%02x:%02x\n", + slave->perm_hwaddr[0], + slave->perm_hwaddr[1], + slave->perm_hwaddr[2], + slave->perm_hwaddr[3], + slave->perm_hwaddr[4], + slave->perm_hwaddr[5]); + } - switch (cmd) { - case SIOCETHTOOL: - return bond_ethtool_ioctl(bond_dev, ifr); - case SIOCGMIIPHY: - mii = (struct mii_ioctl_data *)&ifr->ifr_data; - if (!mii) { - return -EINVAL; - } - mii->phy_id = 0; - /* Fall Through */ - case SIOCGMIIREG: - /* - * We do this again just in case we were called by SIOCGMIIREG - * instead of SIOCGMIIPHY. - */ - mii = (struct mii_ioctl_data *)&ifr->ifr_data; - if (!mii) { - return -EINVAL; - } + if (bond_mode == BOND_MODE_8023AD) { + const struct aggregator *agg + = SLAVE_AD_INFO(slave).port.aggregator; - if (mii->reg_num == 1) { - struct bonding *bond = (struct bonding *)bond_dev->priv; - mii->val_out = 0; - read_lock_bh(&bond->lock); - read_lock(&bond->curr_slave_lock); - if (bond->curr_active_slave) { - mii->val_out = BMSR_LSTATUS; - } - read_unlock(&bond->curr_slave_lock); - read_unlock_bh(&bond->lock); + if (agg) { + seq_printf(seq, "Aggregator ID: %d\n", + agg->aggregator_identifier); + } else { + seq_puts(seq, "Aggregator ID: N/A\n"); } + } +} - return 0; - case BOND_INFO_QUERY_OLD: - case SIOCBONDINFOQUERY: - u_binfo = (struct ifbond *)ifr->ifr_data; +static int bond_info_seq_show(struct seq_file *seq, void *v) +{ + if (v == SEQ_START_TOKEN) { + seq_printf(seq, "%s\n", version); + bond_info_show_master(seq, seq->private); + } else { + bond_info_show_slave(seq, v); + } - if (copy_from_user(&k_binfo, u_binfo, sizeof(ifbond))) { - return -EFAULT; - } + return 0; +} - res = bond_info_query(bond_dev, &k_binfo); - if (res == 0) { - if (copy_to_user(u_binfo, &k_binfo, sizeof(ifbond))) { - return -EFAULT; - } - } +static struct seq_operations bond_info_seq_ops = { + .start = bond_info_seq_start, + .next = bond_info_seq_next, + .stop = bond_info_seq_stop, + .show = bond_info_seq_show, +}; - return res; - case BOND_SLAVE_INFO_QUERY_OLD: - case SIOCBONDSLAVEINFOQUERY: - u_sinfo = (struct ifslave *)ifr->ifr_data; +static int bond_info_open(struct inode *inode, struct file *file) +{ + struct seq_file *seq; + struct proc_dir_entry *proc; + int res; - if (copy_from_user(&k_sinfo, u_sinfo, sizeof(ifslave))) { - return -EFAULT; - } + res = seq_open(file, &bond_info_seq_ops); + if (!res) { + /* recover the pointer buried in proc_dir_entry data */ + seq = file->private_data; + proc = PDE(inode); + seq->private = proc->data; + } - res = bond_slave_info_query(bond_dev, &k_sinfo); - if (res == 0) { - if (copy_to_user(u_sinfo, &k_sinfo, sizeof(ifslave))) { - return -EFAULT; - } - } + return res; +} - return res; - default: - /* Go on */ - break; +static struct file_operations bond_info_fops = { + .owner = THIS_MODULE, + .open = bond_info_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static int bond_create_proc_entry(struct bonding *bond) +{ + struct net_device *bond_dev = bond->dev; + + if (bond_proc_dir) { + bond->proc_entry = create_proc_entry(bond_dev->name, + S_IRUGO, + bond_proc_dir); + if (bond->proc_entry == NULL) { + printk(KERN_WARNING DRV_NAME + ": Warning: Cannot create /proc/net/%s/%s\n", + DRV_NAME, bond_dev->name); + } else { + bond->proc_entry->data = bond; + bond->proc_entry->proc_fops = &bond_info_fops; + bond->proc_entry->owner = THIS_MODULE; + memcpy(bond->proc_file_name, bond_dev->name, IFNAMSIZ); + } } - if (!capable(CAP_NET_ADMIN)) { - return -EPERM; + return 0; +} + +static void bond_remove_proc_entry(struct bonding *bond) +{ + if (bond_proc_dir && bond->proc_entry) { + remove_proc_entry(bond->proc_file_name, bond_proc_dir); + memset(bond->proc_file_name, 0, IFNAMSIZ); + bond->proc_entry = NULL; } +} - if (orig_app_abi_ver == -1) { - /* no orig_app_abi_ver was provided yet, so we'll use the - * current one from now on, even if it's 0 - */ - orig_app_abi_ver = app_abi_ver; +/* Create the bonding directory under /proc/net, if doesn't exist yet. + * Caller must hold rtnl_lock. + */ +static void bond_create_proc_dir(void) +{ + int len = strlen(DRV_NAME); - } else if (orig_app_abi_ver != app_abi_ver) { - printk(KERN_ERR DRV_NAME - ": Error: already using ifenslave ABI version %d; to " - "upgrade ifenslave to version %d, you must first " - "reload bonding.\n", - orig_app_abi_ver, app_abi_ver); - return -EINVAL; + for (bond_proc_dir = proc_net->subdir; bond_proc_dir; + bond_proc_dir = bond_proc_dir->next) { + if ((bond_proc_dir->namelen == len) && + !memcmp(bond_proc_dir->name, DRV_NAME, len)) { + break; + } } - slave_dev = dev_get_by_name(ifr->ifr_slave); + if (!bond_proc_dir) { + bond_proc_dir = proc_mkdir(DRV_NAME, proc_net); + if (bond_proc_dir) { + bond_proc_dir->owner = THIS_MODULE; + } else { + printk(KERN_WARNING DRV_NAME + ": Warning: cannot create /proc/net/%s\n", + DRV_NAME); + } + } +} - dprintk("slave_dev=%p: \n", slave_dev); +/* Destroy the bonding directory under /proc/net, if empty. + * Caller must hold rtnl_lock. + */ +static void bond_destroy_proc_dir(void) +{ + struct proc_dir_entry *de; - if (!slave_dev) { - res = -ENODEV; - } else { - dprintk("slave_dev->name=%s: \n", slave_dev->name); - switch (cmd) { - case BOND_ENSLAVE_OLD: - case SIOCBONDENSLAVE: - res = bond_enslave(bond_dev, slave_dev); - break; - case BOND_RELEASE_OLD: - case SIOCBONDRELEASE: - res = bond_release(bond_dev, slave_dev); - break; - case BOND_SETHWADDR_OLD: - case SIOCBONDSETHWADDR: - res = bond_sethwaddr(bond_dev, slave_dev); - break; - case BOND_CHANGE_ACTIVE_OLD: - case SIOCBONDCHANGEACTIVE: - if (USES_PRIMARY(bond_mode)) { - res = bond_ioctl_change_active(bond_dev, slave_dev); - } else { - res = -EINVAL; - } + if (!bond_proc_dir) { + return; + } + + /* verify that the /proc dir is empty */ + for (de = bond_proc_dir->subdir; de; de = de->next) { + /* ignore . and .. */ + if (*(de->name) != '.') { break; - default: - res = -EOPNOTSUPP; } + } - dev_put(slave_dev); + if (de) { + if (bond_proc_dir->owner == THIS_MODULE) { + bond_proc_dir->owner = NULL; + } + } else { + remove_proc_entry(DRV_NAME, proc_net); + bond_proc_dir = NULL; } +} +#endif /* CONFIG_PROC_FS */ - if (res < 0) { - /* The ioctl failed, so there's no point in changing the - * orig_app_abi_ver. We'll restore it's value just in case - * we've changed it earlier in this function. +/*-------------------------- netdev event handling --------------------------*/ + +/* + * Change device name + */ +static int bond_event_changename(struct bonding *bond) +{ +#ifdef CONFIG_PROC_FS + bond_remove_proc_entry(bond); + bond_create_proc_entry(bond); +#endif + + return NOTIFY_DONE; +} + +static int bond_master_netdev_event(unsigned long event, struct net_device *bond_dev) +{ + struct bonding *event_bond = (struct bonding *)bond_dev->priv; + + switch (event) { + case NETDEV_CHANGENAME: + return bond_event_changename(event_bond); + case NETDEV_UNREGISTER: + /* + * TODO: remove a bond from the list? */ - orig_app_abi_ver = prev_abi_ver; + break; + default: + break; } - return res; + return NOTIFY_DONE; } -#ifdef CONFIG_NET_FASTROUTE -static int bond_accept_fastpath(struct net_device *bond_dev, struct dst_entry *dst) +static int bond_slave_netdev_event(unsigned long event, struct net_device *slave_dev) { - return -1; + struct net_device *bond_dev = slave_dev->master; + + switch (event) { + case NETDEV_UNREGISTER: + if (bond_dev) { + bond_release(bond_dev, slave_dev); + } + break; + case NETDEV_CHANGE: + /* + * TODO: is this what we get if somebody + * sets up a hierarchical bond, then rmmod's + * one of the slave bonding devices? + */ + break; + case NETDEV_DOWN: + /* + * ... Or is it this? + */ + break; + case NETDEV_CHANGEMTU: + /* + * TODO: Should slaves be allowed to + * independently alter their MTU? For + * an active-backup bond, slaves need + * not be the same type of device, so + * MTUs may vary. For other modes, + * slaves arguably should have the + * same MTUs. To do this, we'd need to + * take over the slave's change_mtu + * function for the duration of their + * servitude. + */ + break; + case NETDEV_CHANGENAME: + /* + * TODO: handle changing the primary's name + */ + break; + default: + break; + } + + return NOTIFY_DONE; } -#endif /* - * in broadcast mode, we send everything to all usable interfaces. + * bond_netdev_event: handle netdev notifier chain events. + * + * This function receives events for the netdev chain. The caller (an + * ioctl handler calling notifier_call_chain) holds the necessary + * locks for us to safely manipulate the slave devices (RTNL lock, + * dev_probe_lock). */ -static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *bond_dev) +static int bond_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct bonding *bond = (struct bonding *)bond_dev->priv; - struct slave *slave, *start_at; - struct net_device *tx_dev = NULL; - int i; + struct net_device *event_dev = (struct net_device *)ptr; - read_lock(&bond->lock); + dprintk("event_dev: %s, event: %lx\n", + (event_dev ? event_dev->name : "None"), + event); - if (!BOND_IS_OK(bond)) { - goto free_out; + if (event_dev->flags & IFF_MASTER) { + dprintk("IFF_MASTER\n"); + return bond_master_netdev_event(event, event_dev); } - read_lock(&bond->curr_slave_lock); - start_at = bond->curr_active_slave; - read_unlock(&bond->curr_slave_lock); - - if (!start_at) { - goto free_out; + if (event_dev->flags & IFF_SLAVE) { + dprintk("IFF_SLAVE\n"); + return bond_slave_netdev_event(event, event_dev); } - bond_for_each_slave_from(bond, slave, i, start_at) { - if (IS_UP(slave->dev) && - (slave->link == BOND_LINK_UP) && - (slave->state == BOND_STATE_ACTIVE)) { - if (tx_dev) { - struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); - if (!skb2) { - printk(KERN_ERR DRV_NAME - ": Error: bond_xmit_broadcast(): " - "skb_clone() failed\n"); - continue; - } + return NOTIFY_DONE; +} - skb2->dev = tx_dev; - skb2->priority = 1; - dev_queue_xmit(skb2); - } - tx_dev = slave->dev; - } - } +static struct notifier_block bond_netdev_notifier = { + .notifier_call = bond_netdev_event, +}; - if (tx_dev) { - skb->dev = tx_dev; - skb->priority = 1; - dev_queue_xmit(skb); - } else { - goto free_out; - } +/*-------------------------- Packet type handling ---------------------------*/ -out: - /* frame sent to all suitable interfaces */ - read_unlock(&bond->lock); - return 0; +/* register to receive lacpdus on a bond */ +static void bond_register_lacpdu(struct bonding *bond) +{ + struct packet_type *pk_type = &(BOND_AD_INFO(bond).ad_pkt_type); -free_out: - /* no suitable interface, frame not sent */ - dev_kfree_skb(skb); - goto out; + /* initialize packet type */ + pk_type->type = PKT_TYPE_LACPDU; + pk_type->dev = bond->dev; + pk_type->func = bond_3ad_lacpdu_recv; + pk_type->data = (void*)1; /* understand shared skbs */ + + dev_add_pack(pk_type); } -static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev) +/* unregister to receive lacpdus on a bond */ +static void bond_unregister_lacpdu(struct bonding *bond) +{ + dev_remove_pack(&(BOND_AD_INFO(bond).ad_pkt_type)); +} + +/*-------------------------- Device entry points ----------------------------*/ + +static int bond_open(struct net_device *bond_dev) { struct bonding *bond = (struct bonding *)bond_dev->priv; - struct slave *slave, *start_at; - int i; + struct timer_list *mii_timer = &bond->mii_timer; + struct timer_list *arp_timer = &bond->arp_timer; - read_lock(&bond->lock); + bond->kill_timers = 0; - if (!BOND_IS_OK(bond)) { - goto free_out; - } + if ((bond_mode == BOND_MODE_TLB) || + (bond_mode == BOND_MODE_ALB)) { + struct timer_list *alb_timer = &(BOND_ALB_INFO(bond).alb_timer); - read_lock(&bond->curr_slave_lock); - slave = start_at = bond->curr_active_slave; - read_unlock(&bond->curr_slave_lock); + /* bond_alb_initialize must be called before the timer + * is started. + */ + if (bond_alb_initialize(bond, (bond_mode == BOND_MODE_ALB))) { + /* something went wrong - fail the open operation */ + return -1; + } - if (!slave) { - goto free_out; + init_timer(alb_timer); + alb_timer->expires = jiffies + 1; + alb_timer->data = (unsigned long)bond; + alb_timer->function = (void *)&bond_alb_monitor; + add_timer(alb_timer); } - bond_for_each_slave_from(bond, slave, i, start_at) { - if (IS_UP(slave->dev) && - (slave->link == BOND_LINK_UP) && - (slave->state == BOND_STATE_ACTIVE)) { - skb->dev = slave->dev; - skb->priority = 1; - dev_queue_xmit(skb); - - write_lock(&bond->curr_slave_lock); - bond->curr_active_slave = slave->next; - write_unlock(&bond->curr_slave_lock); + if (miimon) { /* link check interval, in milliseconds. */ + init_timer(mii_timer); + mii_timer->expires = jiffies + 1; + mii_timer->data = (unsigned long)bond_dev; + mii_timer->function = (void *)&bond_mii_monitor; + add_timer(mii_timer); + } - goto out; + if (arp_interval) { /* arp interval, in milliseconds. */ + init_timer(arp_timer); + arp_timer->expires = jiffies + 1; + arp_timer->data = (unsigned long)bond_dev; + if (bond_mode == BOND_MODE_ACTIVEBACKUP) { + arp_timer->function = (void *)&bond_activebackup_arp_mon; + } else { + arp_timer->function = (void *)&bond_loadbalance_arp_mon; } + add_timer(arp_timer); } -out: - read_unlock(&bond->lock); - return 0; + if (bond_mode == BOND_MODE_8023AD) { + struct timer_list *ad_timer = &(BOND_AD_INFO(bond).ad_timer); + init_timer(ad_timer); + ad_timer->expires = jiffies + 1; + ad_timer->data = (unsigned long)bond; + ad_timer->function = (void *)&bond_3ad_state_machine_handler; + add_timer(ad_timer); -free_out: - /* no suitable interface, frame not sent */ - dev_kfree_skb(skb); - goto out; + /* register to receive LACPDUs */ + bond_register_lacpdu(bond); + } + + return 0; } -/* - * in XOR mode, we determine the output device by performing xor on - * the source and destination hw adresses. If this device is not - * enabled, find the next slave following this xor slave. - */ -static int bond_xmit_xor(struct sk_buff *skb, struct net_device *bond_dev) +static int bond_close(struct net_device *bond_dev) { struct bonding *bond = (struct bonding *)bond_dev->priv; - struct ethhdr *data = (struct ethhdr *)skb->data; - struct slave *slave, *start_at; - int slave_no; - int i; - - read_lock(&bond->lock); - if (!BOND_IS_OK(bond)) { - goto free_out; - } + write_lock_bh(&bond->lock); - slave_no = (data->h_dest[5]^bond_dev->dev_addr[5]) % bond->slave_cnt; + bond_mc_list_destroy(bond); - bond_for_each_slave(bond, slave, i) { - slave_no--; - if (slave_no < 0) { - break; - } + if (bond_mode == BOND_MODE_8023AD) { + /* Unregister the receive of LACPDUs */ + bond_unregister_lacpdu(bond); } - start_at = slave; - - bond_for_each_slave_from(bond, slave, i, start_at) { - if (IS_UP(slave->dev) && - (slave->link == BOND_LINK_UP) && - (slave->state == BOND_STATE_ACTIVE)) { - skb->dev = slave->dev; - skb->priority = 1; - dev_queue_xmit(skb); + /* signal timers not to re-arm */ + bond->kill_timers = 1; - goto out; - } - } + write_unlock_bh(&bond->lock); -out: - read_unlock(&bond->lock); - return 0; + /* del_timer_sync must run without holding the bond->lock + * because a running timer might be trying to hold it too + */ -free_out: - /* no suitable interface, frame not sent */ - dev_kfree_skb(skb); - goto out; -} + if (miimon) { /* link check interval, in milliseconds. */ + del_timer_sync(&bond->mii_timer); + } -/* - * in active-backup mode, we know that bond->curr_active_slave is always valid if - * the bond has a usable interface. - */ -static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_dev) -{ - struct bonding *bond = (struct bonding *)bond_dev->priv; + if (arp_interval) { /* arp interval, in milliseconds. */ + del_timer_sync(&bond->arp_timer); + } - /* if we are sending arp packets, try to at least - identify our own ip address */ - if (arp_interval && !my_ip && - (skb->protocol == __constant_htons(ETH_P_ARP))) { - char *the_ip = (((char *)skb->data)) + - sizeof(struct ethhdr) + - sizeof(struct arphdr) + - ETH_ALEN; - memcpy(&my_ip, the_ip, 4); + switch (bond_mode) { + case BOND_MODE_8023AD: + del_timer_sync(&(BOND_AD_INFO(bond).ad_timer)); + break; + case BOND_MODE_TLB: + case BOND_MODE_ALB: + del_timer_sync(&(BOND_ALB_INFO(bond).alb_timer)); + break; + default: + break; } - read_lock(&bond->lock); - read_lock(&bond->curr_slave_lock); + /* Release the bonded slaves */ + bond_release_all(bond_dev); - if (!BOND_IS_OK(bond)) { - goto free_out; + if ((bond_mode == BOND_MODE_TLB) || + (bond_mode == BOND_MODE_ALB)) { + /* Must be called only after all + * slaves have been released + */ + bond_alb_deinitialize(bond); } - if (bond->curr_active_slave) { /* one usable interface */ - skb->dev = bond->curr_active_slave->dev; - skb->priority = 1; - dev_queue_xmit(skb); - goto out; - } else { - goto free_out; - } -out: - read_unlock(&bond->curr_slave_lock); - read_unlock(&bond->lock); return 0; - -free_out: - /* no suitable interface, frame not sent */ - dev_kfree_skb(skb); - goto out; } static struct net_device_stats *bond_get_stats(struct net_device *bond_dev) @@ -3195,274 +3146,279 @@ static struct net_device_stats *bond_get return stats; } -#ifdef CONFIG_PROC_FS - -#define SEQ_START_TOKEN ((void *)1) - -static void *bond_info_seq_start(struct seq_file *seq, loff_t *pos) -{ - struct bonding *bond = seq->private; - loff_t off = 0; - struct slave *slave; - int i; - - /* make sure the bond won't be taken away */ - read_lock(&dev_base_lock); - read_lock_bh(&bond->lock); - - if (*pos == 0) { - return SEQ_START_TOKEN; - } - - bond_for_each_slave(bond, slave, i) { - if (++off == *pos) { - return slave; - } - } - - return NULL; -} - -static void *bond_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - struct bonding *bond = seq->private; - struct slave *slave = v; - - ++*pos; - if (v == SEQ_START_TOKEN) { - return bond->first_slave; - } - - slave = slave->next; - - return (slave == bond->first_slave) ? NULL : slave; -} - -static void bond_info_seq_stop(struct seq_file *seq, void *v) +static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd) { - struct bonding *bond = seq->private; + struct net_device *slave_dev = NULL; + struct ifbond *u_binfo = NULL, k_binfo; + struct ifslave *u_sinfo = NULL, k_sinfo; + struct mii_ioctl_data *mii = NULL; + int prev_abi_ver = orig_app_abi_ver; + int res = 0; - read_unlock_bh(&bond->lock); - read_unlock(&dev_base_lock); -} + dprintk("bond_ioctl: master=%s, cmd=%d\n", + bond_dev->name, cmd); -static void bond_info_show_master(struct seq_file *seq, struct bonding *bond) -{ - struct slave *curr; + switch (cmd) { + case SIOCETHTOOL: + return bond_ethtool_ioctl(bond_dev, ifr); + case SIOCGMIIPHY: + mii = (struct mii_ioctl_data *)&ifr->ifr_data; + if (!mii) { + return -EINVAL; + } + mii->phy_id = 0; + /* Fall Through */ + case SIOCGMIIREG: + /* + * We do this again just in case we were called by SIOCGMIIREG + * instead of SIOCGMIIPHY. + */ + mii = (struct mii_ioctl_data *)&ifr->ifr_data; + if (!mii) { + return -EINVAL; + } - read_lock(&bond->curr_slave_lock); - curr = bond->curr_active_slave; - read_unlock(&bond->curr_slave_lock); + if (mii->reg_num == 1) { + struct bonding *bond = (struct bonding *)bond_dev->priv; + mii->val_out = 0; + read_lock_bh(&bond->lock); + read_lock(&bond->curr_slave_lock); + if (bond->curr_active_slave) { + mii->val_out = BMSR_LSTATUS; + } + read_unlock(&bond->curr_slave_lock); + read_unlock_bh(&bond->lock); + } - seq_printf(seq, "Bonding Mode: %s\n", bond_mode_name()); + return 0; + case BOND_INFO_QUERY_OLD: + case SIOCBONDINFOQUERY: + u_binfo = (struct ifbond *)ifr->ifr_data; - if (USES_PRIMARY(bond_mode)) { - if (curr) { - seq_printf(seq, - "Currently Active Slave: %s\n", - curr->dev->name); + if (copy_from_user(&k_binfo, u_binfo, sizeof(ifbond))) { + return -EFAULT; } - } - - seq_printf(seq, "MII Status: %s\n", (curr) ? "up" : "down"); - seq_printf(seq, "MII Polling Interval (ms): %d\n", miimon); - seq_printf(seq, "Up Delay (ms): %d\n", updelay * miimon); - seq_printf(seq, "Down Delay (ms): %d\n", downdelay * miimon); - if (bond_mode == BOND_MODE_8023AD) { - struct ad_info ad_info; + res = bond_info_query(bond_dev, &k_binfo); + if (res == 0) { + if (copy_to_user(u_binfo, &k_binfo, sizeof(ifbond))) { + return -EFAULT; + } + } - seq_puts(seq, "\n802.3ad info\n"); + return res; + case BOND_SLAVE_INFO_QUERY_OLD: + case SIOCBONDSLAVEINFOQUERY: + u_sinfo = (struct ifslave *)ifr->ifr_data; - if (bond_3ad_get_active_agg_info(bond, &ad_info)) { - seq_printf(seq, "bond %s has no active aggregator\n", - bond->dev->name); - } else { - seq_printf(seq, "Active Aggregator Info:\n"); + if (copy_from_user(&k_sinfo, u_sinfo, sizeof(ifslave))) { + return -EFAULT; + } - seq_printf(seq, "\tAggregator ID: %d\n", - ad_info.aggregator_id); - seq_printf(seq, "\tNumber of ports: %d\n", - ad_info.ports); - seq_printf(seq, "\tActor Key: %d\n", - ad_info.actor_key); - seq_printf(seq, "\tPartner Key: %d\n", - ad_info.partner_key); - seq_printf(seq, "\tPartner Mac Address: %02x:%02x:%02x:%02x:%02x:%02x\n", - ad_info.partner_system[0], - ad_info.partner_system[1], - ad_info.partner_system[2], - ad_info.partner_system[3], - ad_info.partner_system[4], - ad_info.partner_system[5]); + res = bond_slave_info_query(bond_dev, &k_sinfo); + if (res == 0) { + if (copy_to_user(u_sinfo, &k_sinfo, sizeof(ifslave))) { + return -EFAULT; + } } - } -} -static void bond_info_show_slave(struct seq_file *seq, const struct slave *slave) -{ - seq_printf(seq, "\nSlave Interface: %s\n", slave->dev->name); - seq_printf(seq, "MII Status: %s\n", - (slave->link == BOND_LINK_UP) ? "up" : "down"); - seq_printf(seq, "Link Failure Count: %d\n", - slave->link_failure_count); + return res; + default: + /* Go on */ + break; + } - if (app_abi_ver >= 1) { - seq_printf(seq, - "Permanent HW addr: %02x:%02x:%02x:%02x:%02x:%02x\n", - slave->perm_hwaddr[0], - slave->perm_hwaddr[1], - slave->perm_hwaddr[2], - slave->perm_hwaddr[3], - slave->perm_hwaddr[4], - slave->perm_hwaddr[5]); + if (!capable(CAP_NET_ADMIN)) { + return -EPERM; } - if (bond_mode == BOND_MODE_8023AD) { - const struct aggregator *agg - = SLAVE_AD_INFO(slave).port.aggregator; + if (orig_app_abi_ver == -1) { + /* no orig_app_abi_ver was provided yet, so we'll use the + * current one from now on, even if it's 0 + */ + orig_app_abi_ver = app_abi_ver; - if (agg) { - seq_printf(seq, "Aggregator ID: %d\n", - agg->aggregator_identifier); - } else { - seq_puts(seq, "Aggregator ID: N/A\n"); - } + } else if (orig_app_abi_ver != app_abi_ver) { + printk(KERN_ERR DRV_NAME + ": Error: already using ifenslave ABI version %d; to " + "upgrade ifenslave to version %d, you must first " + "reload bonding.\n", + orig_app_abi_ver, app_abi_ver); + return -EINVAL; } -} -static int bond_info_seq_show(struct seq_file *seq, void *v) -{ - if (v == SEQ_START_TOKEN) { - seq_printf(seq, "%s\n", version); - bond_info_show_master(seq, seq->private); - } else { - bond_info_show_slave(seq, v); - } + slave_dev = dev_get_by_name(ifr->ifr_slave); - return 0; -} + dprintk("slave_dev=%p: \n", slave_dev); -static struct seq_operations bond_info_seq_ops = { - .start = bond_info_seq_start, - .next = bond_info_seq_next, - .stop = bond_info_seq_stop, - .show = bond_info_seq_show, -}; + if (!slave_dev) { + res = -ENODEV; + } else { + dprintk("slave_dev->name=%s: \n", slave_dev->name); + switch (cmd) { + case BOND_ENSLAVE_OLD: + case SIOCBONDENSLAVE: + res = bond_enslave(bond_dev, slave_dev); + break; + case BOND_RELEASE_OLD: + case SIOCBONDRELEASE: + res = bond_release(bond_dev, slave_dev); + break; + case BOND_SETHWADDR_OLD: + case SIOCBONDSETHWADDR: + res = bond_sethwaddr(bond_dev, slave_dev); + break; + case BOND_CHANGE_ACTIVE_OLD: + case SIOCBONDCHANGEACTIVE: + if (USES_PRIMARY(bond_mode)) { + res = bond_ioctl_change_active(bond_dev, slave_dev); + } else { + res = -EINVAL; + } + break; + default: + res = -EOPNOTSUPP; + } -static int bond_info_open(struct inode *inode, struct file *file) -{ - struct seq_file *seq; - struct proc_dir_entry *proc; - int res; + dev_put(slave_dev); + } - res = seq_open(file, &bond_info_seq_ops); - if (!res) { - /* recover the pointer buried in proc_dir_entry data */ - seq = file->private_data; - proc = PDE(inode); - seq->private = proc->data; + if (res < 0) { + /* The ioctl failed, so there's no point in changing the + * orig_app_abi_ver. We'll restore it's value just in case + * we've changed it earlier in this function. + */ + orig_app_abi_ver = prev_abi_ver; } return res; } -static struct file_operations bond_info_fops = { - .owner = THIS_MODULE, - .open = bond_info_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static int bond_create_proc_entry(struct bonding *bond) +static void bond_set_multicast_list(struct net_device *bond_dev) { - struct net_device *bond_dev = bond->dev; + struct bonding *bond = (struct bonding *)bond_dev->priv; + struct dev_mc_list *dmi; - if (bond_proc_dir) { - bond->proc_entry = create_proc_entry(bond_dev->name, - S_IRUGO, - bond_proc_dir); - if (bond->proc_entry == NULL) { - printk(KERN_WARNING DRV_NAME - ": Warning: Cannot create /proc/net/bonding/%s\n", - bond_dev->name); - } else { - bond->proc_entry->data = bond; - bond->proc_entry->proc_fops = &bond_info_fops; - bond->proc_entry->owner = THIS_MODULE; - memcpy(bond->proc_file_name, bond_dev->name, IFNAMSIZ); - } + write_lock_bh(&bond->lock); + + /* + * Do promisc before checking multicast_mode + */ + if ((bond_dev->flags & IFF_PROMISC) && !(bond->flags & IFF_PROMISC)) { + bond_set_promiscuity(bond, 1); } - return 0; -} + if (!(bond_dev->flags & IFF_PROMISC) && (bond->flags & IFF_PROMISC)) { + bond_set_promiscuity(bond, -1); + } -static void bond_remove_proc_entry(struct bonding *bond) -{ - if (bond_proc_dir && bond->proc_entry) { - remove_proc_entry(bond->proc_file_name, bond_proc_dir); - memset(bond->proc_file_name, 0, IFNAMSIZ); - bond->proc_entry = NULL; + /* set allmulti flag to slaves */ + if ((bond_dev->flags & IFF_ALLMULTI) && !(bond->flags & IFF_ALLMULTI)) { + bond_set_allmulti(bond, 1); } -} -/* Create the bonding directory under /proc/net, if doesn't exist yet. - * Caller must hold rtnl_lock. - */ -static void bond_create_proc_dir(void) -{ - int len = strlen(DRV_NAME); + if (!(bond_dev->flags & IFF_ALLMULTI) && (bond->flags & IFF_ALLMULTI)) { + bond_set_allmulti(bond, -1); + } - for (bond_proc_dir = proc_net->subdir; bond_proc_dir; - bond_proc_dir = bond_proc_dir->next) { - if ((bond_proc_dir->namelen == len) && - !memcmp(bond_proc_dir->name, DRV_NAME, len)) { - break; + bond->flags = bond_dev->flags; + + /* looking for addresses to add to slaves' mc list */ + for (dmi = bond_dev->mc_list; dmi; dmi = dmi->next) { + if (!bond_mc_list_find_dmi(dmi, bond->mc_list)) { + bond_mc_add(bond, dmi->dmi_addr, dmi->dmi_addrlen); } } - if (!bond_proc_dir) { - bond_proc_dir = proc_mkdir(DRV_NAME, proc_net); - if (bond_proc_dir) { - bond_proc_dir->owner = THIS_MODULE; - } else { - printk(KERN_WARNING DRV_NAME - ": Warning: cannot create /proc/net/%s\n", - DRV_NAME); + /* looking for addresses to delete from slaves' list */ + for (dmi = bond->mc_list; dmi; dmi = dmi->next) { + if (!bond_mc_list_find_dmi(dmi, bond_dev->mc_list)) { + bond_mc_delete(bond, dmi->dmi_addr, dmi->dmi_addrlen); } } + + /* save master's multicast list */ + bond_mc_list_destroy(bond); + bond_mc_list_copy(bond_dev->mc_list, bond, GFP_ATOMIC); + + write_unlock_bh(&bond->lock); } -/* Destroy the bonding directory under /proc/net, if empty. - * Caller must hold rtnl_lock. +/* + * Change the MTU of all of a master's slaves to match the master */ -static void bond_destroy_proc_dir(void) +static int bond_change_mtu(struct net_device *bond_dev, int new_mtu) { - struct proc_dir_entry *de; + struct bonding *bond = (struct bonding *)bond_dev->priv; + struct slave *slave, *stop_at; + int res = 0; + int i; - if (!bond_proc_dir) { - return; - } + dprintk("bond=%p, name=%s, new_mtu=%d\n", bond, + (bond_dev ? bond_dev->name : "None"), new_mtu); - /* verify that the /proc dir is empty */ - for (de = bond_proc_dir->subdir; de; de = de->next) { - /* ignore . and .. */ - if (*(de->name) != '.') { - break; + /* Can't hold bond->lock with bh disabled here since + * some base drivers panic. On the other hand we can't + * hold bond->lock without bh disabled because we'll + * deadlock. The only solution is to rely on the fact + * that we're under rtnl_lock here, and the slaves + * list won't change. This doesn't solve the problem + * of setting the slave's MTU while it is + * transmitting, but the assumption is that the base + * driver can handle that. + * + * TODO: figure out a way to safely iterate the slaves + * list, but without holding a lock around the actual + * call to the base driver. + */ + + bond_for_each_slave(bond, slave, i) { + dprintk("s %p s->p %p c_m %p\n", slave, + slave->prev, slave->dev->change_mtu); + if (slave->dev->change_mtu) { + res = slave->dev->change_mtu(slave->dev, new_mtu); + } else { + slave->dev->mtu = new_mtu; + res = 0; + } + + if (res) { + /* If we failed to set the slave's mtu to the new value + * we must abort the operation even in ACTIVE_BACKUP + * mode, because if we allow the backup slaves to have + * different mtu values than the active slave we'll + * need to change their mtu when doing a failover. That + * means changing their mtu from timer context, which + * is probably not a good idea. + */ + dprintk("err %d %s\n", res, slave->dev->name); + goto unwind; } } - if (de) { - if (bond_proc_dir->owner == THIS_MODULE) { - bond_proc_dir->owner = NULL; + bond_dev->mtu = new_mtu; + + return 0; + +unwind: + /* unwind from head to the slave that failed */ + stop_at = slave; + bond_for_each_slave_from_to(bond, slave, i, bond->first_slave, stop_at) { + int tmp_res; + + if (slave->dev->change_mtu) { + tmp_res = slave->dev->change_mtu(slave->dev, bond_dev->mtu); + if (tmp_res) { + dprintk("unwind err %d dev %s\n", tmp_res, + slave->dev->name); + } + } else { + slave->dev->mtu = bond_dev->mtu; } - } else { - remove_proc_entry(DRV_NAME, proc_net); - bond_proc_dir = NULL; } + + return res; } -#endif /* CONFIG_PROC_FS */ /* * Change HW address @@ -3530,7 +3486,7 @@ unwind: memcpy(tmp_sa.sa_data, bond_dev->dev_addr, bond_dev->addr_len); tmp_sa.sa_family = bond_dev->type; - /* unwind from the first slave that failed to head */ + /* unwind from head to the slave that failed */ stop_at = slave; bond_for_each_slave_from_to(bond, slave, i, bond->first_slave, stop_at) { int tmp_res; @@ -3545,228 +3501,221 @@ unwind: return res; } -/* - * Change the MTU of all of a master's slaves to match the master - */ -static int bond_change_mtu(struct net_device *bond_dev, int new_mtu) +static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev) { struct bonding *bond = (struct bonding *)bond_dev->priv; - struct slave *slave, *stop_at; - int res = 0; + struct slave *slave, *start_at; int i; - dprintk("bond=%p, name=%s, new_mtu=%d\n", bond, - (bond_dev ? bond_dev->name : "None"), new_mtu); + read_lock(&bond->lock); - /* Can't hold bond->lock with bh disabled here since - * some base drivers panic. On the other hand we can't - * hold bond->lock without bh disabled because we'll - * deadlock. The only solution is to rely on the fact - * that we're under rtnl_lock here, and the slaves - * list won't change. This doesn't solve the problem - * of setting the slave's MTU while it is - * transmitting, but the assumption is that the base - * driver can handle that. - * - * TODO: figure out a way to safely iterate the slaves - * list, but without holding a lock around the actual - * call to the base driver. - */ + if (!BOND_IS_OK(bond)) { + goto free_out; + } - bond_for_each_slave(bond, slave, i) { - dprintk("s %p s->p %p c_m %p\n", slave, - slave->prev, slave->dev->change_mtu); - if (slave->dev->change_mtu) { - res = slave->dev->change_mtu(slave->dev, new_mtu); - } else { - slave->dev->mtu = new_mtu; - res = 0; - } + read_lock(&bond->curr_slave_lock); + slave = start_at = bond->curr_active_slave; + read_unlock(&bond->curr_slave_lock); - if (res) { - /* If we failed to set the slave's mtu to the new value - * we must abort the operation even in ACTIVE_BACKUP - * mode, because if we allow the backup slaves to have - * different mtu values than the active slave we'll - * need to change their mtu when doing a failover. That - * means changing their mtu from timer context, which - * is probably not a good idea. - */ - dprintk("err %d %s\n", res, slave->dev->name); - goto unwind; - } + if (!slave) { + goto free_out; } - bond_dev->mtu = new_mtu; + bond_for_each_slave_from(bond, slave, i, start_at) { + if (IS_UP(slave->dev) && + (slave->link == BOND_LINK_UP) && + (slave->state == BOND_STATE_ACTIVE)) { + skb->dev = slave->dev; + skb->priority = 1; + dev_queue_xmit(skb); + + write_lock(&bond->curr_slave_lock); + bond->curr_active_slave = slave->next; + write_unlock(&bond->curr_slave_lock); + + goto out; + } + } +out: + read_unlock(&bond->lock); return 0; -unwind: - /* unwind from the first slave that failed to head */ - stop_at = slave; - bond_for_each_slave_from_to(bond, slave, i, bond->first_slave, stop_at) { - int tmp_res; +free_out: + /* no suitable interface, frame not sent */ + dev_kfree_skb(skb); + goto out; +} - if (slave->dev->change_mtu) { - tmp_res = slave->dev->change_mtu(slave->dev, bond_dev->mtu); - if (tmp_res) { - dprintk("unwind err %d dev %s\n", tmp_res, - slave->dev->name); - } - } else { - slave->dev->mtu = bond_dev->mtu; - } +/* + * in active-backup mode, we know that bond->curr_active_slave is always valid if + * the bond has a usable interface. + */ +static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_dev) +{ + struct bonding *bond = (struct bonding *)bond_dev->priv; + + /* if we are sending arp packets, try to at least + identify our own ip address */ + if (arp_interval && !my_ip && + (skb->protocol == __constant_htons(ETH_P_ARP))) { + char *the_ip = (char *)skb->data + + sizeof(struct ethhdr) + + sizeof(struct arphdr) + + ETH_ALEN; + memcpy(&my_ip, the_ip, 4); } - return res; + read_lock(&bond->lock); + read_lock(&bond->curr_slave_lock); + + if (!BOND_IS_OK(bond)) { + goto free_out; + } + + if (bond->curr_active_slave) { /* one usable interface */ + skb->dev = bond->curr_active_slave->dev; + skb->priority = 1; + dev_queue_xmit(skb); + goto out; + } else { + goto free_out; + } +out: + read_unlock(&bond->curr_slave_lock); + read_unlock(&bond->lock); + return 0; + +free_out: + /* no suitable interface, frame not sent */ + dev_kfree_skb(skb); + goto out; } -/* - * Change device name - */ -static int bond_event_changename(struct bonding *bond) -{ -#ifdef CONFIG_PROC_FS - bond_remove_proc_entry(bond); - bond_create_proc_entry(bond); -#endif +/* + * in XOR mode, we determine the output device by performing xor on + * the source and destination hw adresses. If this device is not + * enabled, find the next slave following this xor slave. + */ +static int bond_xmit_xor(struct sk_buff *skb, struct net_device *bond_dev) +{ + struct bonding *bond = (struct bonding *)bond_dev->priv; + struct ethhdr *data = (struct ethhdr *)skb->data; + struct slave *slave, *start_at; + int slave_no; + int i; + + read_lock(&bond->lock); - return NOTIFY_DONE; -} + if (!BOND_IS_OK(bond)) { + goto free_out; + } -static int bond_master_netdev_event(unsigned long event, struct net_device *bond_dev) -{ - struct bonding *event_bond = (struct bonding *)bond_dev->priv; + slave_no = (data->h_dest[5]^bond_dev->dev_addr[5]) % bond->slave_cnt; - switch (event) { - case NETDEV_CHANGENAME: - return bond_event_changename(event_bond); - case NETDEV_UNREGISTER: - /* - * TODO: remove a bond from the list? - */ - break; - default: - break; + bond_for_each_slave(bond, slave, i) { + slave_no--; + if (slave_no < 0) { + break; + } } - return NOTIFY_DONE; -} + start_at = slave; -static int bond_slave_netdev_event(unsigned long event, struct net_device *slave_dev) -{ - struct net_device *bond_dev = slave_dev->master; + bond_for_each_slave_from(bond, slave, i, start_at) { + if (IS_UP(slave->dev) && + (slave->link == BOND_LINK_UP) && + (slave->state == BOND_STATE_ACTIVE)) { + skb->dev = slave->dev; + skb->priority = 1; + dev_queue_xmit(skb); - switch (event) { - case NETDEV_UNREGISTER: - if (bond_dev) { - bond_release(bond_dev, slave_dev); + goto out; } - break; - case NETDEV_CHANGE: - /* - * TODO: is this what we get if somebody - * sets up a hierarchical bond, then rmmod's - * one of the slave bonding devices? - */ - break; - case NETDEV_DOWN: - /* - * ... Or is it this? - */ - break; - case NETDEV_CHANGEMTU: - /* - * TODO: Should slaves be allowed to - * independently alter their MTU? For - * an active-backup bond, slaves need - * not be the same type of device, so - * MTUs may vary. For other modes, - * slaves arguably should have the - * same MTUs. To do this, we'd need to - * take over the slave's change_mtu - * function for the duration of their - * servitude. - */ - break; - case NETDEV_CHANGENAME: - /* - * TODO: handle changing the primary's name - */ - break; - default: - break; } - return NOTIFY_DONE; +out: + read_unlock(&bond->lock); + return 0; + +free_out: + /* no suitable interface, frame not sent */ + dev_kfree_skb(skb); + goto out; } /* - * bond_netdev_event: handle netdev notifier chain events. - * - * This function receives events for the netdev chain. The caller (an - * ioctl handler calling notifier_call_chain) holds the necessary - * locks for us to safely manipulate the slave devices (RTNL lock, - * dev_probe_lock). + * in broadcast mode, we send everything to all usable interfaces. */ -static int bond_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) +static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *bond_dev) { - struct net_device *event_dev = (struct net_device *)ptr; + struct bonding *bond = (struct bonding *)bond_dev->priv; + struct slave *slave, *start_at; + struct net_device *tx_dev = NULL; + int i; - dprintk("event_dev: %s, event: %lx\n", - (event_dev ? event_dev->name : "None"), - event); + read_lock(&bond->lock); - if (event_dev->flags & IFF_MASTER) { - dprintk("IFF_MASTER\n"); - return bond_master_netdev_event(event, event_dev); + if (!BOND_IS_OK(bond)) { + goto free_out; } - if (event_dev->flags & IFF_SLAVE) { - dprintk("IFF_SLAVE\n"); - return bond_slave_netdev_event(event, event_dev); + read_lock(&bond->curr_slave_lock); + start_at = bond->curr_active_slave; + read_unlock(&bond->curr_slave_lock); + + if (!start_at) { + goto free_out; } - return NOTIFY_DONE; -} + bond_for_each_slave_from(bond, slave, i, start_at) { + if (IS_UP(slave->dev) && + (slave->link == BOND_LINK_UP) && + (slave->state == BOND_STATE_ACTIVE)) { + if (tx_dev) { + struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); + if (!skb2) { + printk(KERN_ERR DRV_NAME + ": Error: bond_xmit_broadcast(): " + "skb_clone() failed\n"); + continue; + } -static struct notifier_block bond_netdev_notifier = { - .notifier_call = bond_netdev_event, -}; + skb2->dev = tx_dev; + skb2->priority = 1; + dev_queue_xmit(skb2); + } + tx_dev = slave->dev; + } + } -/* De-initialize device specific data. - * Caller must hold rtnl_lock. - */ -static inline void bond_deinit(struct net_device *bond_dev) -{ - struct bonding *bond = (struct bonding *)bond_dev->priv; + if (tx_dev) { + skb->dev = tx_dev; + skb->priority = 1; + dev_queue_xmit(skb); + } else { + goto free_out; + } - list_del(&bond->bond_list); +out: + /* frame sent to all suitable interfaces */ + read_unlock(&bond->lock); + return 0; -#ifdef CONFIG_PROC_FS - bond_remove_proc_entry(bond); -#endif +free_out: + /* no suitable interface, frame not sent */ + dev_kfree_skb(skb); + goto out; } -/* Unregister and free all bond devices. - * Caller must hold rtnl_lock. - */ -static void bond_free_all(void) +#ifdef CONFIG_NET_FASTROUTE +static int bond_accept_fastpath(struct net_device *bond_dev, struct dst_entry *dst) { - struct bonding *bond, *nxt; - - list_for_each_entry_safe(bond, nxt, &bond_dev_list, bond_list) { - struct net_device *bond_dev = bond->dev; - - unregister_netdevice(bond_dev); - bond_deinit(bond_dev); - free_netdev(bond_dev); - } - -#ifdef CONFIG_PROC_FS - bond_destroy_proc_dir(); -#endif + return -1; } +#endif + +/*------------------------- Device initialization ---------------------------*/ /* * Does not allocate but creates a /proc entry. @@ -3790,16 +3739,22 @@ static int __init bond_init(struct net_d bond->primary_slave = NULL; bond->dev = bond_dev; - /* Initialize the device structure. */ + /* Initialize the device entry points */ + bond_dev->open = bond_open; + bond_dev->stop = bond_close; + bond_dev->get_stats = bond_get_stats; + bond_dev->do_ioctl = bond_do_ioctl; + bond_dev->set_multicast_list = bond_set_multicast_list; + bond_dev->change_mtu = bond_change_mtu; bond_dev->set_mac_address = bond_set_mac_address; switch (bond_mode) { - case BOND_MODE_ACTIVEBACKUP: - bond_dev->hard_start_xmit = bond_xmit_activebackup; - break; case BOND_MODE_ROUNDROBIN: bond_dev->hard_start_xmit = bond_xmit_roundrobin; break; + case BOND_MODE_ACTIVEBACKUP: + bond_dev->hard_start_xmit = bond_xmit_activebackup; + break; case BOND_MODE_XOR: bond_dev->hard_start_xmit = bond_xmit_xor; break; @@ -3807,12 +3762,12 @@ static int __init bond_init(struct net_d bond_dev->hard_start_xmit = bond_xmit_broadcast; break; case BOND_MODE_8023AD: - bond_dev->hard_start_xmit = bond_3ad_xmit_xor; + bond_dev->hard_start_xmit = bond_3ad_xmit_xor; /* extern */ break; case BOND_MODE_TLB: case BOND_MODE_ALB: - bond_dev->hard_start_xmit = bond_alb_xmit; - bond_dev->set_mac_address = bond_alb_set_mac_address; + bond_dev->hard_start_xmit = bond_alb_xmit; /* extern */ + bond_dev->set_mac_address = bond_alb_set_mac_address; /* extern */ break; default: printk(KERN_ERR DRV_NAME @@ -3821,18 +3776,14 @@ static int __init bond_init(struct net_d return -EINVAL; } - bond_dev->get_stats = bond_get_stats; - bond_dev->open = bond_open; - bond_dev->stop = bond_close; - bond_dev->set_multicast_list = bond_set_multicast_list; - bond_dev->do_ioctl = bond_do_ioctl; - bond_dev->change_mtu = bond_change_mtu; - bond_dev->tx_queue_len = 0; - bond_dev->flags |= IFF_MASTER|IFF_MULTICAST; #ifdef CONFIG_NET_FASTROUTE bond_dev->accept_fastpath = bond_accept_fastpath; #endif + /* Initialize the device options */ + bond_dev->tx_queue_len = 0; + bond_dev->flags |= IFF_MASTER|IFF_MULTICAST; + printk(KERN_INFO DRV_NAME ": %s registered with", bond_dev->name); if (miimon) { printk(" MII link monitoring set to %d ms", miimon); @@ -3864,6 +3815,42 @@ static int __init bond_init(struct net_d return 0; } +/* De-initialize device specific data. + * Caller must hold rtnl_lock. + */ +static inline void bond_deinit(struct net_device *bond_dev) +{ + struct bonding *bond = (struct bonding *)bond_dev->priv; + + list_del(&bond->bond_list); + +#ifdef CONFIG_PROC_FS + bond_remove_proc_entry(bond); +#endif +} + +/* Unregister and free all bond devices. + * Caller must hold rtnl_lock. + */ +static void bond_free_all(void) +{ + struct bonding *bond, *nxt; + + list_for_each_entry_safe(bond, nxt, &bond_dev_list, bond_list) { + struct net_device *bond_dev = bond->dev; + + unregister_netdevice(bond_dev); + bond_deinit(bond_dev); + free_netdev(bond_dev); + } + +#ifdef CONFIG_PROC_FS + bond_destroy_proc_dir(); +#endif +} + +/*------------------------- Module initialization ---------------------------*/ + /* * Convert string input module parms. Accept either the * number of the mode or its string name. @@ -4045,7 +4032,7 @@ static int bond_check_params(void) arp_interval = BOND_LINK_ARP_INTERV; } - for (arp_ip_count=0 ; + for (arp_ip_count = 0; (arp_ip_count < MAX_ARP_IP_TARGETS) && arp_ip_target[arp_ip_count]; arp_ip_count++) { /* not complete check, but should be good enough to @@ -4177,6 +4164,8 @@ module_init(bonding_init); module_exit(bonding_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION(DRV_DESCRIPTION ", v" DRV_VERSION); +MODULE_AUTHOR("Thomas Davis, tadavis@lbl.gov and many others"); +MODULE_SUPPORTED_DEVICE("most ethernet devices"); /* * Local variables: diff -Nuarp a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h --- a/drivers/net/bonding/bonding.h Sun Dec 21 16:09:04 2003 +++ b/drivers/net/bonding/bonding.h Sun Dec 21 16:09:05 2003 @@ -22,6 +22,9 @@ * * 2003/05/01 - Shmulik Hen * - Added support for Transmit load balancing mode. + * + * 2003/09/24 - Shmulik Hen + * - Code cleanup and style changes */ #ifndef _LINUX_BONDING_H @@ -29,11 +32,12 @@ #include #include +#include #include "bond_3ad.h" #include "bond_alb.h" -#define DRV_VERSION "2.4.1" -#define DRV_RELDATE "September 15, 2003" +#define DRV_VERSION "2.5.0" +#define DRV_RELDATE "December 1, 2003" #define DRV_NAME "bonding" #define DRV_DESCRIPTION "Ethernet Channel Bonding Driver" @@ -70,6 +74,65 @@ ((slave)->state == BOND_STATE_ACTIVE)) +#define USES_PRIMARY(mode) \ + (((mode) == BOND_MODE_ACTIVEBACKUP) || \ + ((mode) == BOND_MODE_TLB) || \ + ((mode) == BOND_MODE_ALB)) + +/* + * Less bad way to call ioctl from within the kernel; this needs to be + * done some other way to get the call out of interrupt context. + * Needs "ioctl" variable to be supplied by calling context. + */ +#define IOCTL(dev, arg, cmd) ({ \ + int res = 0; \ + mm_segment_t fs = get_fs(); \ + set_fs(get_ds()); \ + res = ioctl(dev, arg, cmd); \ + set_fs(fs); \ + res; }) + +/** + * bond_for_each_slave_from - iterate the slaves list from a starting point + * @bond: the bond holding this list. + * @pos: current slave. + * @cnt: counter for max number of moves + * @start: starting point. + * + * Caller must hold bond->lock + */ +#define bond_for_each_slave_from(bond, pos, cnt, start) \ + for (cnt = 0, pos = start; \ + cnt < (bond)->slave_cnt; \ + cnt++, pos = (pos)->next) + +/** + * bond_for_each_slave_from_to - iterate the slaves list from start point to stop point + * @bond: the bond holding this list. + * @pos: current slave. + * @cnt: counter for number max of moves + * @start: start point. + * @stop: stop point. + * + * Caller must hold bond->lock + */ +#define bond_for_each_slave_from_to(bond, pos, cnt, start, stop) \ + for (cnt = 0, pos = start; \ + ((cnt < (bond)->slave_cnt) && (pos != (stop)->next)); \ + cnt++, pos = (pos)->next) + +/** + * bond_for_each_slave - iterate the slaves list from head + * @bond: the bond holding this list. + * @pos: current slave. + * @cnt: counter for max number of moves + * + * Caller must hold bond->lock + */ +#define bond_for_each_slave(bond, pos, cnt) \ + bond_for_each_slave_from(bond, pos, cnt, (bond)->first_slave) + + struct slave { struct net_device *dev; /* first - usefull for panic debug */ struct slave *next; @@ -121,46 +184,6 @@ struct bonding { }; /** - * bond_for_each_slave_from - iterate the slaves list from a starting point - * @bond: the bond holding this list. - * @pos: current slave. - * @cnt: counter for max number of moves - * @start: starting point. - * - * Caller must hold bond->lock - */ -#define bond_for_each_slave_from(bond, pos, cnt, start) \ - for (cnt = 0, pos = start; \ - cnt < (bond)->slave_cnt; \ - cnt++, pos = (pos)->next) - -/** - * bond_for_each_slave_from_to - iterate the slaves list from start point to stop point - * @bond: the bond holding this list. - * @pos: current slave. - * @cnt: counter for number max of moves - * @start: start point. - * @stop: stop point. - * - * Caller must hold bond->lock - */ -#define bond_for_each_slave_from_to(bond, pos, cnt, start, stop) \ - for (cnt = 0, pos = start; \ - ((cnt < (bond)->slave_cnt) && (pos != (stop)->next)); \ - cnt++, pos = (pos)->next) - -/** - * bond_for_each_slave - iterate the slaves list from head - * @bond: the bond holding this list. - * @pos: current slave. - * @cnt: counter for max number of moves - * - * Caller must hold bond->lock - */ -#define bond_for_each_slave(bond, pos, cnt) \ - bond_for_each_slave_from(bond, pos, cnt, (bond)->first_slave) - -/** * Returns NULL if the net_device does not belong to any of the bond's slaves * * Caller must hold bond lock for read @@ -188,9 +211,17 @@ extern inline struct bonding *bond_get_b return (struct bonding *)slave->dev->master->priv; } -/* Forward declarations */ -void bond_set_slave_active_flags(struct slave *slave); -void bond_set_slave_inactive_flags(struct slave *slave); +extern inline void bond_set_slave_inactive_flags(struct slave *slave) +{ + slave->state = BOND_STATE_BACKUP; + slave->dev->flags |= IFF_NOARP; +} + +extern inline void bond_set_slave_active_flags(struct slave *slave) +{ + slave->state = BOND_STATE_ACTIVE; + slave->dev->flags &= ~IFF_NOARP; +} #endif /* _LINUX_BONDING_H */