NET Meetup #22 Minsk 2017
Software Engineer @Adform
@nikitin_a_a
https://github.com/alexandrnikitin/
https://alexandrnikitin.github.io/blog/
Sources:
Pricing:
IAB General Member: $4 000
IAB Associate Member: $7 000
Non-Member: $14 000
googlebot
bingbot
twitterbot
duckduckbot
curl
“If you can not measure it, you can not improve it.”
Lord Kelvin
An animated version
http://blog.ivank.net/aho-corasick-algorithm-in-as3.html
Alternatives: xunit.performance, NBench
Alternatives: ildasm, dotPeek, Just Decompile, dnSpy
public class AhoCorasickTree
{
internal AhoCorasickTreeNode Root { get; set; }
public AhoCorasickTree(IEnumerable<string> keywords) { ... }
public bool Contains(string text) { ... }
}
internal class AhoCorasickTreeNode
{
public char Value { get; private set; }
public AhoCorasickTreeNode Failure { get; set; }
private readonly Dictionary<char, AhoCorasickTreeNode> _transitionsDictionary;
}
public bool Contains(string text) { return Contains(text, false); }
private bool Contains(string text, bool onlyStarts)
{
var pointer = Root;
foreach (var c in text)
{
var transition = GetTransition(c, ref pointer);
if (transition != null)
pointer = transition;
else if (onlyStarts)
return false;
if (pointer.Results.Any())
return true;
}
return false;
}
private AhoCorasickTreeNode GetTransition(char c, ref AhoCorasickTreeNode pointer)
{
AhoCorasickTreeNode transition = null;
while (transition == null)
{
transition = pointer.GetTransition(c);
if (pointer == Root)
break;
if (transition == null)
pointer = pointer.Failure;
}
return transition;
}
public AhoCorasickTreeNode GetTransition(char c)
{
return _transitionsDictionary.ContainsKey(c)
? _transitionsDictionary[c]
: null;
}
public class SimpleManyKeywordsBenchmark
{
// a common user agent string
private const string UserAgent = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36";
// create the SUT
private readonly AhoCorasickTree _tree;
public SimpleManyKeywordsBenchmark()
{
_tree = new AhoCorasickTree(ResourcesUtils.GetKeywords().ToArray());
}
[Benchmark]
public bool Baseline()
{
return _tree.Contains(UserAgent);
}
}
``` ini
BenchmarkDotNet=v0.10.3.0, OS=Microsoft Windows NT 6.2.9200.0
Processor=Intel(R) Core(TM) i7-4600U CPU 2.10GHz, ProcessorCount=4
Frequency=2630635 Hz, Resolution=380.1364 ns, Timer=TSC
[Host] : Clr 4.0.30319.42000, 64bit RyuJIT-v4.6.1637.0
Job-ECROUK : Clr 4.0.30319.42000, 64bit RyuJIT-v4.6.1637.0
Jit=RyuJit Platform=X64 LaunchCount=5
TargetCount=20 WarmupCount=20
```
| Method | Mean | StdDev |
|--------- |---------- |---------- |
| Baseline | 6.1364 us | 0.0314 us |
public AhoCorasickTreeNode GetTransition(char c)
{
return _transitionsDictionary.ContainsKey(c)
? _transitionsDictionary[c]
: null;
public AhoCorasickTreeNode GetTransition(char c)
{
_transitionsDictionary.TryGetValue(c, out AhoCorasickTreeNode node);
return node;
}
5% faster
public static bool Any<TSource>(this IEnumerable<TSource> source)
{
...
using (IEnumerator<TSource> enumerator = source.GetEnumerator())
{
if (enumerator.MoveNext())
return true;
}
return false;
}
public class List<T> : ... IEnumerable<T> ...
{
...
IEnumerator<T> IEnumerable<T>.GetEnumerator()
{
return (IEnumerator<T>) new List<T>.Enumerator(this);
}
}
...
IL_0000: ldarg.0
IL_0001: newobj instance void valuetype System.Collections.Generic.List`1/Enumerator<!T>::.ctor(class System.Collections.Generic.List`1<!0>)
IL_0006: box valuetype System.Collections.Generic.List`1/Enumerator<!T>
IL_000b: ret
if (pointer.Results.Any())
return true;
if (pointer.Results.Count > 0)
return true;
| Method | Mean | StdDev | Median | Scaled | Scaled-StdDev |
|---------- |---------- |---------- |---------- |------- |-------------- |
| Control | 5.7016 us | 0.0669 us | 5.6759 us | 1.00 | 0.00 |
| Treatment | 2.8440 us | 0.0357 us | 2.8433 us | 0.50 | 0.01 |
private int FindEntry(TKey key)
{
if ((object) key == null) ThrowHelper.ThrowArgumentNullException(ExceptionArgument.key);
if (this.buckets != null)
{
int num = this.comparer.GetHashCode(key) & int.MaxValue;
for (int index = this.buckets[num % this.buckets.Length]; index >= 0; index = this.entries[index].next)
{
if (this.entries[index].hashCode == num && this.comparer.Equals(this.entries[index].key, key))
return index;
}
}
return -1;
}
Dictionary<TKey, TValue>.TryGetValue()
Dictionary<TKey, TValue>.FindEntry()
GenericEqualityComparer<T>.GetHashCode()
Char.GetHashCode() (inlined)
GenericEqualityComparer<TKey>.Equals()
Char.Equals() (inlined)
// repeat if hash collision
internal class AhoCorasickTreeNode
{
private int[] _buckets;
private Entry[] _entries;
...
public AhoCorasickTreeNode GetTransition(char c)
{
var bucket = c % _buckets.Length;
for (int i = _buckets[bucket]; i >= 0; i = _entries[i].Next)
{
if (_entries[i].Key == c)
{
return _entries[i].Value;
}
}
return null;
}
...
}
| Method | Mean | StdDev | Scaled | Scaled-StdDev |
|---------- |---------- |---------- |------- |-------------- |
| Control | 2.7514 us | 0.0249 us | 1.00 | 0.00 |
| Treatment | 1.7416 us | 0.0216 us | 0.63 | 0.01 |
Caches:
Cache line = 64 bytes
L1 Data cache = 32 KB
L1 Instruction cache = 32 KB
L2 cache = 256 KB
L3 cache = 8 MB
Latency:
L1 Data Cache Latency = 4 cycles for simple access via pointer
L1 Data Cache Latency = 5 cycles for access with complex address calculation
L2 Cache Latency = 12 cycles
L3 Cache Latency = 36 cycles
RAM Latency = 36 cycles + 57 ns
internal class AhoCorasickTreeNode
{
// We have two arrays located in different places of the heap.
// one for buckets
private int[] _buckets;
// another for values
private Entry[] _entries;
public AhoCorasickTreeNode GetTransition(char c)
{
// access the length of the bucket array's
var bucket = c % _buckets.Length;
// access an element of the bucket array
for (int i = _buckets[bucket]; i >= 0; i = _entries[i].Next)
{
// access the value array somewhere on the heap
if (_entries[i].Key == c)
{
return _entries[i].Value;
}
// we use the separate chaining method for hash collision resolution
// if case of hash collision we follow the "Next" pointer
// which can point to any place of the values array
}
return null;
}
}
hash collision is resolved by probing
Linear, Quadratic, Double hashing
is not a silver bullet
| Method | Mean | StdDev | Median | Scaled | Scaled-StdDev |
|---------- |-------------- |----------- |-------------- |------- |-------------- |
| Control | 1,732.3850 ns | 22.5726 ns | 1,726.0844 ns | 1.00 | 0.00 |
| Treatment | 723.2748 ns | 6.9478 ns | 722.9084 ns | 0.42 | 0.01 |
public AhoCorasickTreeNode GetTransition(char c)
{
if (_size == 0) return null;
=> var ind = c % _size;
var keyThere = _entries[ind].Key;
...
}
...
; r8d has _size
movzx r9d, dx ; char argument to r9d
mov eax, r9d ; r9d to eax
cdq ; clear edx
idiv r8d ; divide eax by r8d (_size); quotient in ax, remainder in dx
mov eax, edx ; result to eax
...
public AhoCorasickTreeNode GetTransition(char c)
{
if (_size == 0) return null;
var ind = c & (_size - 1); // _size needs to be a power of 2
var keyThere = _entries[ind].Key;
...
}
More bithacks on the Stanford university website
| Method | Mean | StdDev | Scaled | Scaled-StdDev |
|---------- |------------ |---------- |------- |-------------- |
| Control | 757.1089 ns | 8.5518 ns | 1.00 | 0.00 |
| Treatment | 427.0176 ns | 6.4534 ns | 0.56 | 0.01 |
for (var i = 0; i < userAgent.Length; i++)
{
var c = userAgent[i];
...
}
...
; Block 2:
movsxd r8, eax
movzx r8d, word ptr [rcx+r8*2+0xc]
inc eax
cmp edx, eax
jnle 0x7ffdeecd3d39 ; Block 2
...
fixed (char* p = userAgent)
{
var len = userAgent.Length * 2;
var cptr = p;
while (len > 0)
{
var c = *cptr;
cptr++;
len -= 2;
...
}
}
...
; Block 4:
movzx ecx, word ptr [rax]
add rax, 0x2
add edx, 0xfffffffe
test edx, edx
jnle 0x7ffdeecd3d83 ; Block 4
...
| Method | Mean | StdDev | Median | Scaled | Scaled-StdDev |
|---------------- |------------ |---------- |------------ |------- |-------------- |
| Traverse | 132.5022 ns | 0.9385 ns | 132.1233 ns | 1.00 | 0.00 |
| TraverseUnsafe | 70.7301 ns | 0.1986 ns | 70.6969 ns | 0.53 | 0.00 |
[
Node1: [Size, FailureIndex, [Key1, NodeIndex1; KeyN, NodeIndexN;]];
NodeN: ...
]
private readonly byte[] _data;
private static unsafe byte GetKey(byte* currentNodePtr, int ind)
{
return *(byte*)(currentNodePtr + SizeOfSize + SizeOfFailure + ind * (SizeOfKey + SizeOfNode));
}
public unsafe bool Contains(string text)
{
fixed (byte* b = _data)
fixed (char* p = text)
{
var len = text.Length * 2;
var currentNodePtr = b;
var cptr = p;
while (len > 0)
{
var c = *cptr;
cptr++;
len -= 2;
CheckFailure:
var size = *currentNodePtr;
var ind = c & (size - 1);
var key = GetKey(currentNodePtr, ind);
if (key == c)
{
currentNodePtr = GetNext(b, currentNodePtr, ind);
if (currentNodePtr == b) return true;
}
else
{
currentNodePtr = GetFailure(b, currentNodePtr);
if (currentNodePtr != b) goto CheckFailure;
}
}
}
return false;
}
private static readonly byte[] Data = new byte[16 * 1024 * 1024];
for (var i = 0; i < 1000000; i++)
{
// clear the CPU caches
var sum = 0;
for (var j = 0; j < Data.Length; j++)
{
sum += Data[j];
}
// the code we profile
tree.Contains(UserAgent);
}