1 /**
2 Copyright: Copyright (c) 2020, Joakim Brännström. All rights reserved.
3 License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost Software License 1.0)
4 Author: Joakim Brännström (joakim.brannstrom@gmx.com)
5 
6 A convenient library for calculating the hash of bits of data.
7 */
8 module my.hash;
9 
10 import std.digest.crc : CRC64ISO;
11 import std.digest.murmurhash : MurmurHash3;
12 
13 import std.format : FormatSpec;
14 import std.format : formatValue, formattedWrite;
15 import std.range.primitives : put;
16 
17 import my.path : AbsolutePath;
18 
19 alias BuildChecksum64 = CRC64ISO;
20 alias Checksum64 = Crc64Iso;
21 alias makeChecksum64 = makeCrc64Iso;
22 alias toChecksum64 = toCrc64Iso;
23 
24 alias BuildChecksum128 = MurmurHash3!(128, 64);
25 alias Checksum128 = Murmur3;
26 alias makeChecksum128 = makeMurmur3;
27 alias toChecksum128 = toMurmur3;
28 
29 /// Checksum a file.
30 auto checksum(alias checksumFn)(AbsolutePath p) {
31     import std.mmfile : MmFile;
32 
33     scope content = new MmFile(p.toString);
34     return checksumFn(cast(const(ubyte)[]) content[]);
35 }
36 
37 @("shall calculate the checksum")
38 unittest {
39     auto cs = checksum!makeMurmur3(AbsolutePath("/bin/true"));
40 }
41 
42 /// Convert a value to its ubyte representation.
43 /// Note: this is very slow. Prefer std.bitmanip.nativeToBigEndian.
44 auto toBytes(T)(T v) @trusted pure nothrow @nogc {
45     import std.conv : emplace;
46 
47     ubyte[T.sizeof] d;
48     T* p = cast(T*)&d;
49     cast(void) emplace!T(p, v);
50     return d;
51 }
52 
53 long toLong(ubyte[8] v) @trusted pure nothrow @nogc {
54     return *(cast(long*)&v);
55 }
56 
57 ulong toUlong(ubyte[8] v) @trusted pure nothrow @nogc {
58     return *(cast(ulong*)&v);
59 }
60 
61 /// Convert to size_to for use in e.g. operator overload toHash.
62 size_t toSizeT(T)(T v) if (is(T : uint) || is(T : ulong)) {
63     static if (size_t.sizeof == 4 && T.sizeof == 8)
64         return cast(uint) v + cast(uint)(v >> 32);
65     else
66         return v;
67 }
68 
69 /// ditto.
70 size_t toSizeT(const(ubyte)[4] v) @trusted pure nothrow @nogc {
71     return toSizeT(*(cast(const(uint)*)&v));
72 }
73 
74 /// ditto.
75 size_t toSizeT(const(ubyte)[8] v) @trusted pure nothrow @nogc {
76     return toSizeT(*(cast(const(ulong)*)&v));
77 }
78 
79 Murmur3 makeMurmur3(const(ubyte)[] p) @safe nothrow {
80     BuildChecksum128 hasher;
81     hasher.put(p);
82     return toMurmur3(hasher);
83 }
84 
85 /// Convenient function to convert to a checksum type.
86 Murmur3 toMurmur3(const(ubyte)[16] p) @trusted pure nothrow @nogc {
87     ulong a = *(cast(ulong*)&p[0]);
88     ulong b = *(cast(ulong*)&p[8]);
89     return Murmur3(a, b);
90 }
91 
92 Murmur3 toMurmur3(ref BuildChecksum128 h) @safe pure nothrow @nogc {
93     return toMurmur3(h.finish);
94 }
95 
96 /// 128bit hash.
97 struct Murmur3 {
98     ulong c0;
99     ulong c1;
100 
101     size_t toHash() @safe nothrow const pure @nogc {
102         return (c0 + c1).toSizeT;
103     }
104 
105     bool opEquals(const typeof(this) o) const nothrow @safe pure @nogc {
106         return c0 == o.c0 && c1 == o.c1;
107     }
108 
109     int opCmp(ref const typeof(this) rhs) @safe pure nothrow const @nogc {
110         // return -1 if "this" is less than rhs, 1 if bigger and zero equal
111         if (c0 < rhs.c0)
112             return -1;
113         if (c0 > rhs.c0)
114             return 1;
115         if (c1 < rhs.c1)
116             return -1;
117         if (c1 > rhs.c1)
118             return 1;
119         return 0;
120     }
121 
122     void toString(Writer, Char)(scope Writer w, FormatSpec!Char fmt) const {
123         if (fmt.spec == 'x')
124             formattedWrite(w, "%x_%x", c0, c1);
125         else
126             formattedWrite(w, "%s_%s", c0, c1);
127     }
128 }
129 
130 /// Create a 64bit hash.
131 Crc64Iso makeCrc64Iso(const(ubyte)[] p) @trusted pure nothrow @nogc {
132     BuildChecksum64 hash;
133     hash.put(p);
134     return toCrc64Iso(hash);
135 }
136 
137 /// Convenient function to convert to a checksum type.
138 Crc64Iso toCrc64Iso(const(ubyte)[8] p) @trusted pure nothrow @nogc {
139     return Crc64Iso(*(cast(ulong*)&p[0]));
140 }
141 
142 Crc64Iso toCrc64Iso(ref BuildChecksum64 h) @trusted pure nothrow @nogc {
143     ubyte[8] v = h.peek;
144     return Crc64Iso(*(cast(ulong*)&v[0]));
145 }
146 
147 /** 64-bit checksum.
148  *
149  * It is intended to be generically used in Dextool when such a checksum is needed.
150  *
151  * CRC64 ISO is used because there exist implementations in other languages
152  * which makes it possible to calculate the checksum in e.g. python and compare
153  * with the one from Dextool.
154  *
155  * TODO: check if python have a 64ISO or 64ECMA implementation.
156  */
157 struct Crc64Iso {
158     ulong c0;
159 
160     size_t toHash() @safe pure nothrow const @nogc scope {
161         return c0;
162     }
163 
164     bool opEquals(const typeof(this) s) @safe pure nothrow const @nogc scope {
165         return c0 == s.c0;
166     }
167 
168     void toString(Writer, Char)(scope Writer w, FormatSpec!Char fmt) const {
169         if (fmt.spec == 'x')
170             formattedWrite(w, "%x", c0);
171         else
172             formattedWrite(w, "%s", c0);
173     }
174 }