1 module lincount;
2 
3 import std.traits;
4 
5 /++
6 Simple Linear Probabilistic Counter.
7 +/
8 struct LPCounter
9 {
10 	import mir.ndslice.allocation: slice;
11 	import mir.ndslice.slice;
12 	import mir.ndslice.field: BitwiseField;
13 	import mir.ndslice.iterator: FieldIterator;
14 	import mir.ndslice.topology: bitwise;
15 
16 	import std.uuid: UUID;
17 
18 	private Slice!(SliceKind.contiguous, [1], FieldIterator!(BitwiseField!(size_t*))) map;
19 	private size_t _length = 0;
20 
21 	@disable this();
22 
23 	invariant
24 	{
25 		assert(map.length);
26 		assert(map.length % (1024 * 8) == 0);
27 	}
28 
29 	private void set(size_t index) pure nothrow @nogc
30 	{
31 		if(map[index] == false)
32 		{
33 			map[index] = true;
34 			_length++;
35 		}
36 	}
37 
38 	private auto updateLength()
39 	{
40 		import mir.ndslice.algorithm: count;
41 		_length = map.count!"a";  // uses popcnt / llvm_ctpop
42 	}
43 
44 	/// Constructs counter with appropriate size.
45 	this(size_t kilobytes) pure nothrow
46 	{
47 		map = slice!size_t(1024 * kilobytes / size_t.sizeof).bitwise;
48 		_length = 0;
49 	}
50 
51 	/++
52 	Constructs counter with predefined dump.
53 	Params:
54 		dump = 8-byte aligned non-empty data. `dump` must be rounded to kilobytes: `dump.length % 1024 == 0`.
55 	+/
56 	this(void[] dump) pure
57 	{
58 		if (dump.length % 1024)
59 			throw new Exception("LPCounter: dump is broken.");
60 		map = sliced(cast(size_t[])dump).bitwise;
61 		updateLength;
62 	}
63 
64 	/++
65 	Puts integer to a counter.
66 	+/
67 	void put(uint data) pure nothrow @nogc
68 	{
69 		set(cast(size_t)(fmix(data) % map.length));
70 	}
71 
72 	/// ditto
73 	void put(ulong data) pure nothrow @nogc
74 	{
75 		set(cast(size_t)(fmix(data) % map.length));
76 	}
77 
78 	/// Puts `UUID` to a counter.
79 	void put(UUID data) pure nothrow @nogc
80 	{
81 		set(data.toHash % map.length);
82 	}
83 
84 	/// Puts raw data to a counter.
85 	void put(in void[] data) pure nothrow @nogc
86 	{
87 		//hashOf(data);
88 		static if (__VERSION__ >= 2072)
89 		{
90 			import std.digest.digest: digest;
91 			import std.digest.murmurhash: MurmurHash3;
92 			alias D = MurmurHash3!(128, 64);
93 		}
94 		else
95 		{
96 			import lincount.murmurhash;
97 			alias D = MurmurHash3_x64_128;
98 		}
99 		auto hashed = cast(ulong[2])digest!D(data);
100 		set((hashed[0] ^ hashed[1]) % map.length);
101 	}
102 
103 
104 	/++
105 	Merges a counter into this one.
106 	Params:
107 		counters = Counter with the same size.
108 	+/
109 	void opOpAssign(string op : "+")(const LPCounter counter)
110 	{
111 		import std.exception;
112 		enforce(counter.size == size, "The size of counters must be the same.");
113 		auto repr = cast(size_t[])dump;
114 		repr[] |= (cast(const(size_t)[])counter.dump)[];
115 		updateLength;
116 	}
117 
118 	/++
119 	Merges a set of $(LREF LPCounter)s into this one.
120 	Params:
121 		counters = An iterable set of counters. All counters must have the same sizes as this counter.
122 	+/
123 	void opOpAssign(string op : "+", Range)(Range counters)
124 		if (isIterable!Range && (is(ForeachType!Range : const(LPCounter)) || is(ForeachType!Range : const(LPCounter)*)))
125 	{
126 		import std.exception;
127 		auto repr = cast(size_t[])dump;
128 		foreach (counter; counters)
129 		{
130 			auto r = cast(const(size_t)[])counter.dump;
131 			enforce (r.length == repr.length, "All counters must have the same sizes.");
132 			repr[] |= r[];
133 		}
134 		updateLength;
135 	}
136 
137 	/// Returns: approximate number of elements.
138 	ulong count() nothrow @nogc
139 	{
140 		import std.math: log, lround;
141 		if(map.length > _length)
142 			return lround(map.length * -log(real(map.length - _length) / map.length));
143 		else
144 			return map.length;
145 	}
146 
147 	/// Returns: size of the counter in kilobytes.
148 	size_t size() const @property pure nothrow @nogc
149 	{
150 		return map.length() / (size_t.sizeof * 1024);
151 	}
152 
153 	/// Returns: raw representation of a counter.
154 	const(ubyte)[] dump() const pure nothrow @nogc
155 	out(res) {
156 		assert(res.length % 1024 == 0);
157 	}
158 	body {
159 		return cast(ubyte[]) map._iterator._field._field[0 .. map.length / (size_t.sizeof * 8)];
160 	}
161 }
162 
163 ///
164 unittest
165 {
166 	auto counter = LPCounter(32);
167 	counter.put(100U);
168 	counter.put(100UL);
169 	counter.put("100");
170 	assert(counter.count == 3);
171 	counter.put("101");
172 	assert(counter.count == 4);
173 }
174 
175 ///
176 unittest
177 {
178 	auto a = LPCounter(32);
179 	a.put(100U);
180 	a.put(100UL);
181 	a.put("100");
182 	assert(a.count == 3);
183 	
184 	auto b = LPCounter(32);
185 	b.put(100U); // intersection
186 	b.put(200U);
187 	b.put("LP");
188 	assert(b.count == 3);
189 
190 	auto c = LPCounter(32);
191 
192 	c += [a, b];
193 	assert(c.count == 5);
194 
195 	a += b;
196 	assert(a.count == 5);
197 }
198 
199 // Murmurhash mix
200 private:
201 
202 uint fmix(uint h) pure nothrow @nogc
203 {
204     h ^= h >> 16;
205     h *= 0x85ebca6b;
206     h ^= h >> 13;
207     h *= 0xc2b2ae35;
208     h ^= h >> 16;
209     return h;
210 }
211 
212 ulong fmix(ulong k) pure nothrow @nogc
213 {
214     k ^= k >> 33;
215     k *= 0xff51afd7ed558ccd;
216     k ^= k >> 33;
217     k *= 0xc4ceb9fe1a85ec53;
218     k ^= k >> 33;
219     return k;
220 }