Consistent Hashing SHA2 512
Hashing algorithms work with byte values and as such to develop a consistent hashing algorithm across disparate code bases you need to be consistent with your byte encodings that are supplied to the hashing algorithm,
Given I have a class like this
public class FullName
{
public string FirstName { get; set; }
public string LastName { get; set; }
}
With a helper extension method like so to convert byte -> strings
public static class ByteExtensions
{
public static string AsText(this byte[] bytes)
{
return string.Join("", (bytes ?? new byte[] { }).Select(z => z.ToString("X2")));
}
}
The code to create a SHA2 512 hash is
public static string CodeHash(FullName name)
{
using var hasher = new SHA512Managed();
var jsonText = JsonConvert.SerializeObject(name);
var bytes = hasher.ComputeHash(System.Text.Encoding.UTF8.GetBytes(jsonText));
return bytes.AsText();
}
Serializes to this JSON
{"FirstName":"Rhade","LastName":"McDowell"}
Output is
6C5C7181B579B8B82BD57A99C7CFBA4E2563E6ED3098B3E16996CE5F8434C22E8EA0B33F0751268B4DBE0B0E6E652411A47A4E494FF8FBB265FC7409974CE3B3
Ok, this is going well now let's write the same code in SQL
SELECT HASHBYTES('SHA2_512', '{"FirstName":"Rhade","LastName":"McDowell"}')
Output is
6C5C7181B579B8B82BD57A99C7CFBA4E2563E6ED3098B3E16996CE5F8434C22E8EA0B33F0751268B4DBE0B0E6E652411A47A4E494FF8FBB265FC7409974CE3B3
This matches so great now convert to parameterized query to make it Dapper compatible by converting parameters into JSON path
DECLARE @firstName varchar(100) = 'Rhade'
DECLARE @LastName varchar(100) = 'McDowell'
DECLARE @jsonText varchar(MAX)
SELECT @jsonText = (SELECT @FirstName as FirstName, @LastName as LastName FOR JSON PATH, WITHOUT_ARRAY_WRAPPER)
SELECT HASHBYTES('SHA2_512', @jsonText)
Output still the same, now remove the intermediate @jsonText
SQL variable because want to make it parameterized without Dynamic SQL calls
DECLARE @firstName varchar(100) = 'Rhade'
DECLARE @LastName varchar(100) = 'McDowell'
SELECT HASHBYTES('SHA2_512',(SELECT @FirstName as FirstName, @LastName as LastName FOR JSON PATH, WITHOUT_ARRAY_WRAPPER))
Output
EA89569EE1527F7CFCC06424381D21505543D8696DCD078FBCFB5AF082DE7F1B87172199B54D5818B7EF7F8176D68A4083A81DF42803BFC568CA5C15665DF167
Uh oh what happened? The sql to json text is still the same, but the inferred type from the statement is different (SELECT @FirstName as FirstName, @LastName as LastName FOR JSON PATH, WITHOUT_ARRAY_WRAPPER)
After doing quite some digging found that it is nvarchar
not varchar
so that means the above code encoding should encode to Unicode
instead of UTF8
Change hash code to use Unicode encoder
public static string CodeHash(FullName name)
{
using var hasher = new SHA512Managed();
var jsonText = JsonConvert.SerializeObject(name);
var bytes = hasher.ComputeHash(System.Text.Encoding.Unicode.GetBytes(jsonText));
return bytes.AsText();
}
Output
EA89569EE1527F7CFCC06424381D21505543D8696DCD078FBCFB5AF082DE7F1B87172199B54D5818B7EF7F8176D68A4083A81DF42803BFC568CA5C15665DF167
Ok things are consistent now, but let's be sure on conversion in SQL therefore
SELECT HASHBYTES('SHA2_512', (SELECT CONVERT(nvarchar(MAX), (SELECT @firstName as FirstName, @LastName as LastName FOR JSON PATH, WITHOUT_ARRAY_WRAPPER))))
Now convert this into the Dapper query
public static string DbHash(FullName name)
{
using var conn = new SqlConnection(new SqlConnectionStringBuilder
{
InitialCatalog = "model",
DataSource
= "localhost",
IntegratedSecurity = true
}.ConnectionString);
var bytes = conn.QueryFirst<byte[]>(
@"SELECT HASHBYTES('SHA2_512', (SELECT CONVERT(nvarchar(MAX), (SELECT @firstName as FirstName, @LastName as LastName FOR JSON PATH, WITHOUT_ARRAY_WRAPPER))))",
new
{
name.FirstName,
name.LastName
});
return bytes.AsText();
}
And compare
static void Main(string[] args)
{
var name = new FullName
{
FirstName = "Rhade",
LastName = "McDowell"
};
var hashes = new
{
FromCode = CodeHash(name),
FromDb = DbHash(name)
};
Console.WriteLine($"Code Hash: {hashes.FromCode}");
Console.WriteLine($"DB Hash: {hashes.FromDb}");
Console.WriteLine(string.Equals(hashes.FromCode, hashes.FromDb, StringComparison.OrdinalIgnoreCase)
? "Hashes match"
: "Hashes don't match");
Console.ReadLine();
}
Great works consistently now
Complete Example Program
using System;
using System.Data.SqlClient;
using System.Linq;
using System.Security.Cryptography;
using Dapper;
using Newtonsoft.Json;
namespace ConsoleApp1
{
public class FullName
{
public string FirstName { get; set; }
public string LastName { get; set; }
}
public static class ByteExtensions
{
public static string AsText(this byte[] bytes)
{
return string.Join("", (bytes ?? new byte[] { }).Select(z => z.ToString("X2")));
}
}
class Program
{
public static string CodeHash(FullName name)
{
using var hasher = new SHA512Managed();
var jsonText = JsonConvert.SerializeObject(name);
var bytes = hasher.ComputeHash(System.Text.Encoding.Unicode.GetBytes(jsonText));
return bytes.AsText();
}
public static string DbHash(FullName name)
{
using var conn = new SqlConnection(new SqlConnectionStringBuilder
{
InitialCatalog = "model",
DataSource
= "localhost",
IntegratedSecurity = true
}.ConnectionString);
var bytes = conn.QueryFirst<byte[]>(
@"SELECT HASHBYTES('SHA2_512', (SELECT CONVERT(nvarchar(MAX), (SELECT @firstName as FirstName, @LastName as LastName FOR JSON PATH, WITHOUT_ARRAY_WRAPPER))))",
new
{
name.FirstName,
name.LastName
});
return bytes.AsText();
}
static void Main(string[] args)
{
var name = new FullName
{
FirstName = "Rhade",
LastName = "McDowell"
};
var hashes = new
{
FromCode = CodeHash(name),
FromDb = DbHash(name)
};
Console.WriteLine($"Code Hash: {hashes.FromCode}");
Console.WriteLine($"DB Hash: {hashes.FromDb}");
Console.WriteLine(string.Equals(hashes.FromCode, hashes.FromDb, StringComparison.OrdinalIgnoreCase)
? "Hashes match"
: "Hashes don't match");
Console.ReadLine();
}
}
}