···1414=============15151616Using special makers (called 'nulls') is a convenient way1717-to solve following problem :1717+to solve following problem.18181919-A typical RCU linked list managing objects which are2020-allocated with SLAB_TYPESAFE_BY_RCU kmem_cache can2121-use following algos :1919+Without 'nulls', a typical RCU linked list managing objects which are2020+allocated with SLAB_TYPESAFE_BY_RCU kmem_cache can use the following2121+algorithms:22222323-1) Lookup algo2424---------------2323+1) Lookup algorithm2424+-------------------25252626::27272828- rcu_read_lock()2928 begin:2929+ rcu_read_lock()3030 obj = lockless_lookup(key);3131 if (obj) {3232 if (!try_get_ref(obj)) // might fail for free objects···3838 */3939 if (obj->key != key) { // not the object we expected4040 put_ref(obj);4141+ rcu_read_unlock();4142 goto begin;4243 }4344 }···5352 {5453 struct hlist_node *node, *next;5554 for (pos = rcu_dereference((head)->first);5656- pos && ({ next = pos->next; smp_rmb(); prefetch(next); 1; }) &&5757- ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; });5858- pos = rcu_dereference(next))5555+ pos && ({ next = pos->next; smp_rmb(); prefetch(next); 1; }) &&5656+ ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; });5757+ pos = rcu_dereference(next))5958 if (obj->key == key)6059 return obj;6160 return NULL;···65646665 struct hlist_node *node;6766 for (pos = rcu_dereference((head)->first);6868- pos && ({ prefetch(pos->next); 1; }) &&6969- ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; });7070- pos = rcu_dereference(pos->next))6767+ pos && ({ prefetch(pos->next); 1; }) &&6868+ ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; });6969+ pos = rcu_dereference(pos->next))7170 if (obj->key == key)7271 return obj;7372 return NULL;···8382 solved by pre-fetching the "next" field (with proper barriers) before8483 checking the key."85848686-2) Insert algo8787---------------8585+2) Insertion algorithm8686+----------------------88878988We need to make sure a reader cannot read the new 'obj->obj_next' value9090-and previous value of 'obj->key'. Or else, an item could be deleted8989+and previous value of 'obj->key'. Otherwise, an item could be deleted9190from a chain, and inserted into another chain. If new chain was empty9292-before the move, 'next' pointer is NULL, and lockless reader can9393-not detect it missed following items in original chain.9191+before the move, 'next' pointer is NULL, and lockless reader can not9292+detect the fact that it missed following items in original chain.94939594::96959796 /*9898- * Please note that new inserts are done at the head of list,9999- * not in the middle or end.100100- */9797+ * Please note that new inserts are done at the head of list,9898+ * not in the middle or end.9999+ */101100 obj = kmem_cache_alloc(...);102101 lock_chain(); // typically a spin_lock()103102 obj->key = key;104104- /*105105- * we need to make sure obj->key is updated before obj->next106106- * or obj->refcnt107107- */108108- smp_wmb();109109- atomic_set(&obj->refcnt, 1);103103+ atomic_set_release(&obj->refcnt, 1); // key before refcnt110104 hlist_add_head_rcu(&obj->obj_node, list);111105 unlock_chain(); // typically a spin_unlock()112106113107114114-3) Remove algo115115---------------108108+3) Removal algorithm109109+--------------------110110+116111Nothing special here, we can use a standard RCU hlist deletion.117112But thanks to SLAB_TYPESAFE_BY_RCU, beware a deleted object can be reused118113very very fast (before the end of RCU grace period)···130133========================131134132135With hlist_nulls we can avoid extra smp_rmb() in lockless_lookup()133133-and extra smp_wmb() in insert function.136136+and extra _release() in insert function.134137135138For example, if we choose to store the slot number as the 'nulls'136139end-of-list marker for each slot of the hash table, we can detect···139142the lookup met the end of chain. If final 'nulls' value140143is not the slot number, then we must restart the lookup at141144the beginning. If the object was moved to the same chain,142142-then the reader doesn't care : It might eventually145145+then the reader doesn't care: It might occasionally143146scan the list again without harm.144147145148146146-1) lookup algo147147---------------149149+1) lookup algorithm150150+-------------------148151149152::150153151154 head = &table[slot];152152- rcu_read_lock();153155 begin:156156+ rcu_read_lock();154157 hlist_nulls_for_each_entry_rcu(obj, node, head, member) {155158 if (obj->key == key) {156156- if (!try_get_ref(obj)) // might fail for free objects157157- goto begin;158158- if (obj->key != key) { // not the object we expected159159- put_ref(obj);159159+ if (!try_get_ref(obj)) { // might fail for free objects160160+ rcu_read_unlock();160161 goto begin;161162 }162162- goto out;163163+ if (obj->key != key) { // not the object we expected164164+ put_ref(obj);165165+ rcu_read_unlock();166166+ goto begin;167167+ }168168+ goto out;169169+ }163170 }164164- /*165165- * if the nulls value we got at the end of this lookup is166166- * not the expected one, we must restart lookup.167167- * We probably met an item that was moved to another chain.168168- */169169- if (get_nulls_value(node) != slot)170170- goto begin;171171+172172+ // If the nulls value we got at the end of this lookup is173173+ // not the expected one, we must restart lookup.174174+ // We probably met an item that was moved to another chain.175175+ if (get_nulls_value(node) != slot) {176176+ put_ref(obj);177177+ rcu_read_unlock();178178+ goto begin;179179+ }171180 obj = NULL;172181173182 out:174183 rcu_read_unlock();175184176176-2) Insert function177177-------------------185185+2) Insert algorithm186186+-------------------178187179188::180189181190 /*182182- * Please note that new inserts are done at the head of list,183183- * not in the middle or end.184184- */191191+ * Please note that new inserts are done at the head of list,192192+ * not in the middle or end.193193+ */185194 obj = kmem_cache_alloc(cachep);186195 lock_chain(); // typically a spin_lock()187196 obj->key = key;197197+ atomic_set_release(&obj->refcnt, 1); // key before refcnt188198 /*189189- * changes to obj->key must be visible before refcnt one190190- */191191- smp_wmb();192192- atomic_set(&obj->refcnt, 1);193193- /*194194- * insert obj in RCU way (readers might be traversing chain)195195- */199199+ * insert obj in RCU way (readers might be traversing chain)200200+ */196201 hlist_nulls_add_head_rcu(&obj->obj_node, list);197202 unlock_chain(); // typically a spin_unlock()