1 module lincount; 2 3 import std.traits; 4 5 /++ 6 Simple Linear Probabilistic Counter. 7 +/ 8 struct LPCounter 9 { 10 import mir.ndslice.allocation: slice; 11 import mir.ndslice.slice; 12 import mir.ndslice.field: BitwiseField; 13 import mir.ndslice.iterator: FieldIterator; 14 import mir.ndslice.topology: bitwise; 15 16 import std.uuid: UUID; 17 18 private Slice!(SliceKind.contiguous, [1], FieldIterator!(BitwiseField!(size_t*))) map; 19 private size_t _length = 0; 20 21 @disable this(); 22 23 invariant 24 { 25 assert(map.length); 26 assert(map.length % (1024 * 8) == 0); 27 } 28 29 private void set(size_t index) pure nothrow @nogc 30 { 31 if(map[index] == false) 32 { 33 map[index] = true; 34 _length++; 35 } 36 } 37 38 private auto updateLength() 39 { 40 import mir.ndslice.algorithm: count; 41 _length = map.count!"a"; // uses popcnt / llvm_ctpop 42 } 43 44 /// Constructs counter with appropriate size. 45 this(size_t kilobytes) pure nothrow 46 { 47 map = slice!size_t(1024 * kilobytes / size_t.sizeof).bitwise; 48 _length = 0; 49 } 50 51 /++ 52 Constructs counter with predefined dump. 53 Params: 54 dump = 8-byte aligned non-empty data. `dump` must be rounded to kilobytes: `dump.length % 1024 == 0`. 55 +/ 56 this(void[] dump) pure 57 { 58 if (dump.length % 1024) 59 throw new Exception("LPCounter: dump is broken."); 60 map = sliced(cast(size_t[])dump).bitwise; 61 updateLength; 62 } 63 64 /++ 65 Puts integer to a counter. 66 +/ 67 void put(uint data) pure nothrow @nogc 68 { 69 set(cast(size_t)(fmix(data) % map.length)); 70 } 71 72 /// ditto 73 void put(ulong data) pure nothrow @nogc 74 { 75 set(cast(size_t)(fmix(data) % map.length)); 76 } 77 78 /// Puts `UUID` to a counter. 79 void put(UUID data) pure nothrow @nogc 80 { 81 set(data.toHash % map.length); 82 } 83 84 /// Puts raw data to a counter. 85 void put(in void[] data) pure nothrow @nogc 86 { 87 //hashOf(data); 88 static if (__VERSION__ >= 2072) 89 { 90 import std.digest.digest: digest; 91 import std.digest.murmurhash: MurmurHash3; 92 alias D = MurmurHash3!(128, 64); 93 } 94 else 95 { 96 import lincount.murmurhash; 97 alias D = MurmurHash3_x64_128; 98 } 99 auto hashed = cast(ulong[2])digest!D(data); 100 set((hashed[0] ^ hashed[1]) % map.length); 101 } 102 103 104 /++ 105 Merges a counter into this one. 106 Params: 107 counters = Counter with the same size. 108 +/ 109 void opOpAssign(string op : "+")(const LPCounter counter) 110 { 111 import std.exception; 112 enforce(counter.size == size, "The size of counters must be the same."); 113 auto repr = cast(size_t[])dump; 114 repr[] |= (cast(const(size_t)[])counter.dump)[]; 115 updateLength; 116 } 117 118 /++ 119 Merges a set of $(LREF LPCounter)s into this one. 120 Params: 121 counters = An iterable set of counters. All counters must have the same sizes as this counter. 122 +/ 123 void opOpAssign(string op : "+", Range)(Range counters) 124 if (isIterable!Range && (is(ForeachType!Range : const(LPCounter)) || is(ForeachType!Range : const(LPCounter)*))) 125 { 126 import std.exception; 127 auto repr = cast(size_t[])dump; 128 foreach (counter; counters) 129 { 130 auto r = cast(const(size_t)[])counter.dump; 131 enforce (r.length == repr.length, "All counters must have the same sizes."); 132 repr[] |= r[]; 133 } 134 updateLength; 135 } 136 137 /// Returns: approximate number of elements. 138 ulong count() nothrow @nogc 139 { 140 import std.math: log, lround; 141 if(map.length > _length) 142 return lround(map.length * -log(real(map.length - _length) / map.length)); 143 else 144 return map.length; 145 } 146 147 /// Returns: size of the counter in kilobytes. 148 size_t size() const @property pure nothrow @nogc 149 { 150 return map.length() / (size_t.sizeof * 1024); 151 } 152 153 /// Returns: raw representation of a counter. 154 const(ubyte)[] dump() const pure nothrow @nogc 155 out(res) { 156 assert(res.length % 1024 == 0); 157 } 158 body { 159 return cast(ubyte[]) map._iterator._field._field[0 .. map.length / (size_t.sizeof * 8)]; 160 } 161 } 162 163 /// 164 unittest 165 { 166 auto counter = LPCounter(32); 167 counter.put(100U); 168 counter.put(100UL); 169 counter.put("100"); 170 assert(counter.count == 3); 171 counter.put("101"); 172 assert(counter.count == 4); 173 } 174 175 /// 176 unittest 177 { 178 auto a = LPCounter(32); 179 a.put(100U); 180 a.put(100UL); 181 a.put("100"); 182 assert(a.count == 3); 183 184 auto b = LPCounter(32); 185 b.put(100U); // intersection 186 b.put(200U); 187 b.put("LP"); 188 assert(b.count == 3); 189 190 auto c = LPCounter(32); 191 192 c += [a, b]; 193 assert(c.count == 5); 194 195 a += b; 196 assert(a.count == 5); 197 } 198 199 // Murmurhash mix 200 private: 201 202 uint fmix(uint h) pure nothrow @nogc 203 { 204 h ^= h >> 16; 205 h *= 0x85ebca6b; 206 h ^= h >> 13; 207 h *= 0xc2b2ae35; 208 h ^= h >> 16; 209 return h; 210 } 211 212 ulong fmix(ulong k) pure nothrow @nogc 213 { 214 k ^= k >> 33; 215 k *= 0xff51afd7ed558ccd; 216 k ^= k >> 33; 217 k *= 0xc4ceb9fe1a85ec53; 218 k ^= k >> 33; 219 return k; 220 }