概括

現在的OVS使用microflow+megaflow緩存查詢流表，ovs整體流程是從ovs_vport_receive（datapath/vport.c）開始，然后進入ovs_dp_process_packet（datapath/datapath.c），這個時候調用ovs_flow_tbl_lookup_stats（datapath/flow_table.c）開始查，查microflow獲得mask_array里的索引索找到mask，通過mask去找megaflow里的掩碼元素，再去定位哈希桶，如果沒找到就upcall去用戶態：

查找 microflow 緩存：根據數據報文 SKB 的 hash 值，定位到 mask_cache_entry 數組中的某個元素，並得到該元素緩存的掩碼數組索引值；

查找 megaflow 緩存：根據步驟 1 中查找到的掩碼數組索引值，定位到掩碼數組中的某個元素，並得到該元素的掩碼，然后根據掩碼定位到具體的哈希桶，並遍歷該哈希桶中的所有節點，直到找到匹配的 flow。

說的是流表過程，所以就從ovs_flow_tbl_lookup_stats開始。

正文

查找 microflow 緩存

OVS 內核態流表查找的入口函數是定義在 datapath/flow_table.c 文件中的,在ovs_dp_process_packet里調用： flow = ovs_flow_tbl_lookup_stats(&dp->table, key, skb_get_hash(skb), &n_mask_hit);

struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
                                          const struct sw_flow_key *key,
                                          u32 skb_hash,
                                          u32 *n_mask_hit)
{
        struct mask_array *ma = rcu_dereference(tbl->mask_array);
        struct table_instance *ti = rcu_dereference(tbl->ti);
        struct mask_cache_entry *entries, *ce;
        struct sw_flow *flow;
        u32 hash;
        int seg;
        *n_mask_hit = 0;
        if (unlikely(!skb_hash)) {
                u32 mask_index = 0;
                return flow_lookup(tbl, ti, ma, key, n_mask_hit, &mask_index);
        }
        /* Pre and post recirulation flows usually have the same skb_hash
         * value. To avoid hash collisions, rehash the 'skb_hash' with
         * 'recirc_id'.  */
        if (key->recirc_id)
                skb_hash = jhash_1word(skb_hash, key->recirc_id);
        ce = NULL;
        hash = skb_hash;
        entries = this_cpu_ptr(tbl->mask_cache);
        /* Find the cache entry 'ce' to operate on. */
        for (seg = 0; seg < MC_HASH_SEGS; seg++) {
                int index = hash & (MC_HASH_ENTRIES - 1);
                struct mask_cache_entry *e;
                e = &entries[index];
                if (e->skb_hash == skb_hash) {
                        flow = flow_lookup(tbl, ti, ma, key, n_mask_hit,
                                           &e->mask_index);
                        if (!flow)
                                e->skb_hash = 0;
                        return flow;
                }
                if (!ce || e->skb_hash < ce->skb_hash)
                        ce = e;  /* A better replacement cache candidate. */
                hash >>= MC_HASH_SHIFT;
        }
        /* Cache miss, do full lookup. */
        flow = flow_lookup(tbl, ti, ma, key, n_mask_hit, &ce->mask_index);
        if (flow)
                ce->skb_hash = skb_hash;
        return flow;
}

ovs_flow_tbl_lookup_stats() 的函數參數如下：

tbl：類型為 struct flow_table，表示專屬於每個 datapath 的流表組織結構；

key：類型為 struct sw_flow_key，表示從數據報文提取出來的匹配關鍵字；

skb_hash：表示數據報文 SKB 的 hash 值；

n_mask_hit：輸出參數，表示嘗試匹配掩碼的次數。

1.當skb_hash為0的時候，完全查找mask_array表，不更新cache

// 如果 skb_hash 為 0，則 full lookup
if (unlikely(!skb_hash)) {
    u32 mask_index = 0;
    return flow_lookup(tbl, ti, ma, key, n_mask_hit, &mask_index);
}

// 當數據報文需要在 OVS 中重新進入流水線
if (key->recirc_id)
    skb_hash = jhash_1word(skb_hash, key->recirc_id);

這里說的不更新的：因為skb_hash默認就是0，如果找到了，更不更新都是0，沒找到就更不影響了

2.找到mask_cache_entry存在mask_index

ce = NULL;
hash = skb_hash;
// mask_cache_entry 數組，大小為 256，即 microflow cache
// 獲取當前cpu的mash_cache
entries = this_cpu_ptr(tbl->mask_cache);

/* Find the cache entry 'ce' to operate on. */
// 將 hash 分為 4 個字節，從低到高的順序，進行查找，這樣一個hash可以用4個桶，效率高
//MC_HASH_SEGS = 4
for (seg = 0; seg < MC_HASH_SEGS; seg++) {
    //MC_HASH_ENTRIES = 256
    int index = hash & (MC_HASH_ENTRIES - 1); // 255是8位1，這樣就是獲得最后8位（1字節）
    struct mask_cache_entry *e;

    e = &entries[index];
    if (e->skb_hash == skb_hash) {   
        flow = flow_lookup(tbl, ti, ma, key, n_mask_hit,
                   &e->mask_index);
        if (!flow)
            e->skb_hash = 0;
        return flow;
    }

    // 選出 4 個字節中 skb hash 值最小的那個，作為沒找到緩存時的最佳候選
    if (!ce || e->skb_hash < ce->skb_hash)
        ce = e;  /* A better replacement cache candidate. */
    // MC_HASH_SHIFT = 8
    hash >>= MC_HASH_SHIFT;
}

主要說一下hash：

32位的hash值，變成4個8位，正好是mask_cache_entry[256]大小，相當於一個hash值對應4個桶的位置，有一個匹配就行，這種好處就是減小hash沖突的覆蓋，如果4個桶都沒有匹配，就找一個的最小的mask_cache_entry->skb_hash，更新這個mask_cache_entry。

3.沒找到mask_cache_entry就遍歷找mask_array表，並且更新

flow = flow_lookup(tbl, ti, ma, key, n_mask_hit, &ce->mask_index);
if (flow)
    ce->skb_hash = skb_hash;

flow_lookup里： 
flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
if (flow) { /* Found */
    *index = i;
    return flow;
}

首先是更新mask_index，傳的就是地址，在flow_lookup里會更新。如果找到了flow，把skb_hash更新一下就行了。這整個過程就是相當於一級緩存。

查找 megaflow 緩存

查找 megaflow 緩存的入口函數是定義在 datapath/flow_table.c 文件中的 flow_lookup 函數：

static struct sw_flow *flow_lookup(struct flow_table *tbl,
                   struct table_instance *ti,
                   const struct mask_array *ma,
                   const struct sw_flow_key *key,
                   u32 *n_mask_hit,
                   u32 *index)
{
    struct sw_flow_mask *mask;
    struct sw_flow *flow;
    int i;

    if (*index < ma->max) {
        mask = rcu_dereference_ovsl(ma->masks[*index]);
        if (mask) {
            flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
            if (flow)
                return flow;
        }
    }

    for (i = 0; i < ma->max; i++)  {

        if (i == *index)
            continue;

        mask = rcu_dereference_ovsl(ma->masks[i]);
        if (!mask)
            continue;

        flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
        if (flow) { /* Found */
            *index = i;
            return flow;
        }
    }

    return NULL;
}

1.傳進來的mask_array索引值index有效

// 根據傳入的 index 獲取到掩碼數組的掩碼，根據該掩碼進行查找
if (*index < ma->max) {
    // 從掩碼數組里獲取掩碼
    mask = rcu_dereference_ovsl(ma->masks[*index]);
    if (mask) {
        flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
        if (flow)
            return flow;
    }
}

index在掩碼數組的范圍內，先通過rcu_dereference_ovsl獲取mask，然后看能否找到flow，找到了就可以返回了。真正進行megaflow查詢的是masked_flow_lookup函數，下邊講。

2.索引值index無效，就遍歷每個mask_array

for (i = 0; i < ma->max; i++)  {
    if (i == *index) // 剛才已經查找過
        continue;
    
    mask = rcu_dereference_ovsl(ma->masks[i]); // 從掩碼數組里獲取掩碼
    if (!mask)
        continue;

    flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
    if (flow) { /* Found */
        *index = i;   // 找到了就更新mask_cache_entry
        return flow;
    }
}

真正查找megaflow的函數：masked_flow_lookup()

static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
                      const struct sw_flow_key *unmasked,
                      const struct sw_flow_mask *mask,
                      u32 *n_mask_hit)
{
    struct sw_flow *flow;
    struct hlist_head *head;
    u32 hash;
    struct sw_flow_key masked_key;
    // 根據mask，計算masked后的key，用以支持通配符
    ovs_flow_mask_key(&masked_key, unmasked, false, mask);
    
    // 根據masked key和mask.range 計算hash值
    hash = flow_hash(&masked_key, &mask->range);
    
    // 根據hash值，找到sw_flow的鏈表頭
    head = find_bucket(ti, hash);
    
    // mask命中次數+1
    (*n_mask_hit)++;
    
    // 遍歷鏈表，解決hash沖突用的拉鏈法，所以是一條鏈
    hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver]) {
        // mask相同、hash相同並且key相同，則匹配到流表
        if (flow->mask == mask && flow->flow_table.hash == hash &&
            flow_cmp_masked_key(flow, &masked_key, &mask->range))
            return flow;
    }
    return NULL;
}

find_bucket 函數

static struct hlist_head *find_bucket(struct table_instance *ti, u32 hash)
{
        hash = jhash_1word(hash, ti->hash_seed);
        return &ti->buckets[hash & (ti->n_buckets - 1)]; // hash的低N位作為index
}

這樣就找到了flow

緩存沒有命中

直接看datapath.c

未命中就會執行upcall了

參考：

深入理解 Open vSwitch（四）：內核態流表查找分析

【OVS2.5.0源碼分析】datapath之流表查詢

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 ovs流表 ovs流表高級特性（一） OVS中arp響應的流表的實現查詢某個表被那些存儲過程用到 Oracle查詢某個表被那些存儲過程引用【Oracle】查詢某個表被哪些存儲過程引用 OVS SQL Server查詢各個表的記錄數，查詢某個表被哪些存儲過程/視圖調用 sql server 查詢表名，存儲過程，列名等 linux內核exec過程