کد:
{Copyright: Hagen Reddmann Nachricht an: HaReddmann bei T-Online punkt de
All rights reserved,
der Autor übernimmt keinerlei Haftungen, wie gesehen so gekauft
Author: Hagen Reddmann
Version: Freeware,
Delphi 5, designed and testet under D5
Description: schnelle Dateivergleichsroutinen basierend auf dem MD4 Hash
Remarks: Einiges zur Geschwindigkeit. Testfall sind 160Mb große Dateien
auf einer eher veralteten HD auf Win2k P4 1.5GHz 512Mb RAM.
Man muß beim Vergleich zweier Dateien drei Fälle untersuchen.
a) die Dateien haben den gleichen Dateinamen oder unterschiedliche
Dateigrößen, das ist der BestCase und dauert nur wenige
Millisekunden um überprüft zu werden.
b) die Dateien sind absolut identisch nur der Dateiname unterscheidet sich
Dieser Fall ist der WorstCase der aber auf Grund der MD4 Prüfsummen
sehr sehr unwahrscheinlich ist -> 1/2^128.
Ohne Vorberechnung des MD4 Hashs eienr der beiden Dateien dauert
der Vergleich ca. 37 Sekunden.
Mit Vorberechnung des MD4 Hashs einer der beiden Dateien dauert
der Vergleich ca. 26 Sekunden, also 140% schneller. Dies ist
immer dann der Fall wenn man den Hash schon in einer DB gespeichert hatte.
c) die dateien haben gleiche Größe, unterschiedliche Dateinamen
sind aber nicht binär identisch. Dieser Fall tritt mit einer
Wahrscheinlichkeit von 1/2^16-1/2^63 auf.
Ohne vorberechneten Hash dauerte das ca. 12 Sekunden, mit
vorberechneten Hash dauerte dies ca. 6 Sekunden.
Man kann also theoretisch sagen das Pi*Daumen
2^16-1 von 2^16 Dateien in wenigen Millisekunden überprüft werden, auf
Grund ihrer unterschiedlichen Dateigrößen
2^128-1 von 2^128 Dateien sich in ihren Hashwerten unterscheiden und
innerhalb von 6 oder 12 Sekunden überprüft werden, je nach dem ob
der eine Hashwert schon vorberechnet wurde.
1 von 2^128 * 2^16 Dateien wirklich physikalisch verglichen werden
müssen, was in meinem Testfall 26 oder 37 Sekunden gedauert hat.
}
unit FileCompare;
interface
// Dategröße als Int64 holen
function GetFileSize(const FileName: String): Int64;
// MD4 Hash für Datei erzeugen
function HashFile(const FileName: String): String;
// zwei Dateien physikalisch binär vergleichen
function CompareFilePhysical(const FileName1, FileName2: String): Boolean;
// zwei Dateien vergleichen, vergleicht Dateinamen + Dateigrößen + MD4 Hash + binär physikalisch
function CompareFile(const FileName1, FileName2: String): Boolean; overload;
// zwei Dateien vergleichen, vergleicht Dateinamen + Dateigrößen + MD4 Hash + binär physikalisch
// wobei aber der MD4 Hash der zweiten Datei schon vorberechnet wurde, zb. aus einer DB
function CompareFile(const FileName1, FileName2, FileHash2: String): Boolean; overload;
// zwei Dateien vergleichen, vergleicht Dateinamen + Dateigrößen + MD4 Hash + binär physikalisch
// wobei aber der MD4 Hash und die Dateigröße der zweiten Datei schon vorberechnet wurde, zb. aus einer DB
function CompareFile(const FileName1, FileName2, FileHash2: String; const FileSize2: Int64): Boolean; overload;
implementation
uses SysUtils, Classes;
const
BufferSize = 65536;
function GetFileSize(const FileName: String): Int64;
var
F: TSearchRec;
begin
if FindFirst(FileName, faAnyFile, F) = 0 then
try
Int64Rec(Result).Hi := F.FindData.nFileSizeHigh;
Int64Rec(Result).Lo := F.FindData.nFileSizeLow;
finally
FindClose(F);
end else RaiseLastWin32Error;
end;
function HashFile(const FileName: String): String;
type
PMD4Digest = ^TMD4Digest;
TMD4Digest = array[0..3] of Cardinal;
PMD4Buffer = ^TMD4Buffer;
TMD4Buffer = array[0..15] of Cardinal;
PByte = ^Byte;
procedure MD4Init(var Digest: TMD4Digest);
begin
Digest[0] := $67452301;
Digest[1] := $EFCDAB89;
Digest[2] := $98BADCFE;
Digest[3] := $10325476;
end;
procedure MD4Update(var Digest: TMD4Digest; Data: Pointer; Size: LongInt);
// ATTENTION: that's a tuncated MD4, we don't need to do real MD4 done
{$DEFINE UseASM}
{$IFNDEF UseASM}
procedure MD4Calc(var Digest: TMD4Digest; Buffer: PMD4Buffer);
var
A,B,C,D: Cardinal;
begin
A := Digest[0];
B := Digest[1];
C := Digest[2];
D := Digest[3];
Inc(A, Buffer[ 0] + (B and C or not B and D)); A := A shl 3 or A shr 29;
Inc(D, Buffer[ 1] + (A and B or not A and C)); D := D shl 7 or D shr 25;
Inc(C, Buffer[ 2] + (D and A or not D and B)); C := C shl 11 or C shr 21;
Inc(B, Buffer[ 3] + (C and D or not C and A)); B := B shl 19 or B shr 13;
Inc(A, Buffer[ 4] + (B and C or not B and D)); A := A shl 3 or A shr 29;
Inc(D, Buffer[ 5] + (A and B or not A and C)); D := D shl 7 or D shr 25;
Inc(C, Buffer[ 6] + (D and A or not D and B)); C := C shl 11 or C shr 21;
Inc(B, Buffer[ 7] + (C and D or not C and A)); B := B shl 19 or B shr 13;
Inc(A, Buffer[ 8] + (B and C or not B and D)); A := A shl 3 or A shr 29;
Inc(D, Buffer[ 9] + (A and B or not A and C)); D := D shl 7 or D shr 25;
Inc(C, Buffer[10] + (D and A or not D and B)); C := C shl 11 or C shr 21;
Inc(B, Buffer[11] + (C and D or not C and A)); B := B shl 19 or B shr 13;
Inc(A, Buffer[12] + (B and C or not B and D)); A := A shl 3 or A shr 29;
Inc(D, Buffer[13] + (A and B or not A and C)); D := D shl 7 or D shr 25;
Inc(C, Buffer[14] + (D and A or not D and B)); C := C shl 11 or C shr 21;
Inc(B, Buffer[15] + (C and D or not C and A)); B := B shl 19 or B shr 13;
Inc(A, Buffer[ 0] + $5A827999 + (B and C or B and D or C and D)); A := A shl 3 or A shr 29;
Inc(D, Buffer[ 4] + $5A827999 + (A and B or A and C or B and C)); D := D shl 5 or D shr 27;
Inc(C, Buffer[ 8] + $5A827999 + (D and A or D and B or A and B)); C := C shl 9 or C shr 23;
Inc(B, Buffer[12] + $5A827999 + (C and D or C and A or D and A)); B := B shl 13 or B shr 19;
Inc(A, Buffer[ 1] + $5A827999 + (B and C or B and D or C and D)); A := A shl 3 or A shr 29;
Inc(D, Buffer[ 5] + $5A827999 + (A and B or A and C or B and C)); D := D shl 5 or D shr 27;
Inc(C, Buffer[ 9] + $5A827999 + (D and A or D and B or A and B)); C := C shl 9 or C shr 23;
Inc(B, Buffer[13] + $5A827999 + (C and D or C and A or D and A)); B := B shl 13 or B shr 19;
Inc(A, Buffer[ 2] + $5A827999 + (B and C or B and D or C and D)); A := A shl 3 or A shr 29;
Inc(D, Buffer[ 6] + $5A827999 + (A and B or A and C or B and C)); D := D shl 5 or D shr 27;
Inc(C, Buffer[10] + $5A827999 + (D and A or D and B or A and B)); C := C shl 9 or C shr 23;
Inc(B, Buffer[14] + $5A827999 + (C and D or C and A or D and A)); B := B shl 13 or B shr 19;
Inc(A, Buffer[ 3] + $5A827999 + (B and C or B and D or C and D)); A := A shl 3 or A shr 29;
Inc(D, Buffer[ 7] + $5A827999 + (A and B or A and C or B and C)); D := D shl 5 or D shr 27;
Inc(C, Buffer[11] + $5A827999 + (D and A or D and B or A and B)); C := C shl 9 or C shr 23;
Inc(B, Buffer[15] + $5A827999 + (C and D or C and A or D and A)); B := B shl 13 or B shr 19;
Inc(A, Buffer[ 0] + $6ED9EBA1 + (B xor C xor D)); A := A shl 3 or A shr 29;
Inc(D, Buffer[ 8] + $6ED9EBA1 + (A xor B xor C)); D := D shl 9 or D shr 23;
Inc(C, Buffer[ 4] + $6ED9EBA1 + (D xor A xor B)); C := C shl 11 or C shr 21;
Inc(B, Buffer[12] + $6ED9EBA1 + (C xor D xor A)); B := B shl 15 or B shr 17;
Inc(A, Buffer[ 2] + $6ED9EBA1 + (B xor C xor D)); A := A shl 3 or A shr 29;
Inc(D, Buffer[10] + $6ED9EBA1 + (A xor B xor C)); D := D shl 9 or D shr 23;
Inc(C, Buffer[ 6] + $6ED9EBA1 + (D xor A xor B)); C := C shl 11 or C shr 21;
Inc(B, Buffer[14] + $6ED9EBA1 + (C xor D xor A)); B := B shl 15 or B shr 17;
Inc(A, Buffer[ 1] + $6ED9EBA1 + (B xor C xor D)); A := A shl 3 or A shr 29;
Inc(D, Buffer[ 9] + $6ED9EBA1 + (A xor B xor C)); D := D shl 9 or D shr 23;
Inc(C, Buffer[ 5] + $6ED9EBA1 + (D xor A xor B)); C := C shl 11 or C shr 21;
Inc(B, Buffer[13] + $6ED9EBA1 + (C xor D xor A)); B := B shl 15 or B shr 17;
Inc(A, Buffer[ 3] + $6ED9EBA1 + (B xor C xor D)); A := A shl 3 or A shr 29;
Inc(D, Buffer[11] + $6ED9EBA1 + (A xor B xor C)); D := D shl 9 or D shr 23;
Inc(C, Buffer[ 7] + $6ED9EBA1 + (D xor A xor B)); C := C shl 11 or C shr 21;
Inc(B, Buffer[15] + $6ED9EBA1 + (C xor D xor A)); B := B shl 15 or B shr 17;
Inc(Digest[0], A);
Inc(Digest[1], B);
Inc(Digest[2], C);
Inc(Digest[3], D);
end;
{$ELSE}
procedure MD4Calc(var Digest: TMD4Digest; Buffer: PMD4Buffer);
asm
push ebx
push esi
push edi
push ebp
push eax // store Digest
mov esi,edx // let esi points to Buffer
mov edx,[eax + 12] // D
mov ecx,[eax + 8] // C
mov ebx,[eax + 4] // B
mov eax,[eax + 0] // A
push edx
mov edi, ecx
push ecx
xor edi, edx
push ebx
push eax
and edi, ebx
mov ebp, [esi]
xor edi, edx
add eax, ebp
mov ebp, ebx
add eax, edi
rol eax, 3
xor ebp, ecx
mov edi, [esi+4]
and ebp, eax
add edx, edi
xor ebp, ecx
mov edi, eax
add edx, ebp
xor edi, ebx
rol edx, 7
and edi, edx
mov ebp, [esi+8]
xor edi, ebx
add ecx, ebp
mov ebp, edx
add ecx, edi
rol ecx, 0Bh
xor ebp, eax
mov edi, [esi+0Ch]
and ebp, ecx
add ebx, edi
xor ebp, eax
mov edi, ecx
add ebx, ebp
xor edi, edx
rol ebx, 13h
and edi, ebx
mov ebp, [esi+10h]
xor edi, edx
add eax, ebp
mov ebp, ebx
add eax, edi
rol eax, 3
xor ebp, ecx
mov edi, [esi+14h]
and ebp, eax
add edx, edi
xor ebp, ecx
mov edi, eax
add edx, ebp
xor edi, ebx
rol edx, 7
and edi, edx
mov ebp, [esi+18h]
xor edi, ebx
add ecx, ebp
mov ebp, edx
add ecx, edi
rol ecx, 0Bh
xor ebp, eax
mov edi, [esi+1Ch]
and ebp, ecx
add ebx, edi
xor ebp, eax
mov edi, ecx
add ebx, ebp
xor edi, edx
rol ebx, 13h
and edi, ebx
mov ebp, [esi+20h]
xor edi, edx
add eax, ebp
mov ebp, ebx
add eax, edi
rol eax, 3
xor ebp, ecx
mov edi, [esi+24h]
and ebp, eax
add edx, edi
xor ebp, ecx
mov edi, eax
add edx, ebp
xor edi, ebx
rol edx, 7
and edi, edx
mov ebp, [esi+28h]
xor edi, ebx
add ecx, ebp
mov ebp, edx
add ecx, edi
rol ecx, 0Bh
xor ebp, eax
mov edi, [esi+2Ch]
and ebp, ecx
add ebx, edi
xor ebp, eax
mov edi, ecx
add ebx, ebp
xor edi, edx
rol ebx, 13h
and edi, ebx
mov ebp, [esi+30h]
xor edi, edx
add eax, ebp
mov ebp, ebx
add eax, edi
rol eax, 3
xor ebp, ecx
mov edi, [esi+34h]
and ebp, eax
add edx, edi
xor ebp, ecx
mov edi, eax
add edx, ebp
xor edi, ebx
rol edx, 7
and edi, edx
mov ebp, [esi+38h]
xor edi, ebx
add ecx, ebp
mov ebp, edx
add ecx, edi
rol ecx, 0Bh
xor ebp, eax
mov edi, [esi+3Ch]
and ebp, ecx
add ebx, edi
xor ebp, eax
mov edi, edx
add ebx, ebp
mov ebp, edx
rol ebx, 13h
or edi, ecx
and ebp, ecx
and edi, ebx
or ebp, edi
mov edi, [esi]
lea eax, [eax+edi+5A827999h]
mov edi, ecx
add eax, ebp
mov ebp, ecx
rol eax, 3
or edi, ebx
and ebp, ebx
and edi, eax
or ebp, edi
mov edi, [esi+10h]
lea edx, [edx+edi+5A827999h]
mov edi, ebx
add edx, ebp
mov ebp, ebx
rol edx, 5
or edi, eax
and ebp, eax
and edi, edx
or ebp, edi
mov edi, [esi+20h]
lea ecx, [ecx+edi+5A827999h]
mov edi, eax
add ecx, ebp
mov ebp, eax
rol ecx, 9
or edi, edx
and ebp, edx
and edi, ecx
or ebp, edi
mov edi, [esi+30h]
lea ebx, [ebx+edi+5A827999h]
mov edi, edx
add ebx, ebp
mov ebp, edx
rol ebx, 0Dh
or edi, ecx
and ebp, ecx
and edi, ebx
or ebp, edi
mov edi, [esi+4]
lea eax, [eax+edi+5A827999h]
mov edi, ecx
add eax, ebp
mov ebp, ecx
rol eax, 3
or edi, ebx
and ebp, ebx
and edi, eax
or ebp, edi
mov edi, [esi+14h]
lea edx, [edx+edi+5A827999h]
mov edi, ebx
add edx, ebp
mov ebp, ebx
rol edx, 5
or edi, eax
and ebp, eax
and edi, edx
or ebp, edi
mov edi, [esi+24h]
lea ecx, [ecx+edi+5A827999h]
mov edi, eax
add ecx, ebp
mov ebp, eax
rol ecx, 9
or edi, edx
and ebp, edx
and edi, ecx
or ebp, edi
mov edi, [esi+34h]
lea ebx, [ebx+edi+5A827999h]
mov edi, edx
add ebx, ebp
mov ebp, edx
rol ebx, 0Dh
or edi, ecx
and ebp, ecx
and edi, ebx
or ebp, edi
mov edi, [esi+8]
lea eax, [eax+edi+5A827999h]
mov edi, ecx
add eax, ebp
mov ebp, ecx
rol eax, 3
or edi, ebx
and ebp, ebx
and edi, eax
or ebp, edi
mov edi, [esi+18h]
lea edx, [edx+edi+5A827999h]
mov edi, ebx
add edx, ebp
mov ebp, ebx
rol edx, 5
or edi, eax
and ebp, eax
and edi, edx
or ebp, edi
mov edi, [esi+28h]
lea ecx, [ecx+edi+5A827999h]
mov edi, eax
add ecx, ebp
mov ebp, eax
rol ecx, 9
or edi, edx
and ebp, edx
and edi, ecx
or ebp, edi
mov edi, [esi+38h]
lea ebx, [ebx+edi+5A827999h]
mov edi, edx
add ebx, ebp
mov ebp, edx
rol ebx, 0Dh
or edi, ecx
and ebp, ecx
and edi, ebx
or ebp, edi
mov edi, [esi+0Ch]
lea eax, [eax+edi+5A827999h]
mov edi, ecx
add eax, ebp
mov ebp, ecx
rol eax, 3
or edi, ebx
and ebp, ebx
and edi, eax
or ebp, edi
mov edi, [esi+1Ch]
lea edx, [edx+edi+5A827999h]
mov edi, ebx
add edx, ebp
mov ebp, ebx
rol edx, 5
or edi, eax
and ebp, eax
and edi, edx
or ebp, edi
mov edi, [esi+2Ch]
lea ecx, [ecx+edi+5A827999h]
mov edi, eax
add ecx, ebp
mov ebp, eax
rol ecx, 9
or edi, edx
and ebp, edx
and edi, ecx
or ebp, edi
mov edi, [esi+3Ch]
lea ebx, [ebx+edi+5A827999h]
mov edi, edx
add ebx, ebp
mov ebp, edx
rol ebx, 0Dh
xor edi, ecx
add eax, [esi]
xor edi, ebx
mov ebp, ecx
lea eax, [eax+edi+6ED9EBA1h]
xor ebp, ebx
rol eax, 3
add edx, [esi+20h]
xor ebp, eax
mov edi, ebx
lea edx, [edx+ebp+6ED9EBA1h]
rol edx, 9
xor edi, eax
add ecx, [esi+10h]
xor edi, edx
mov ebp, eax
lea ecx, [ecx+edi+6ED9EBA1h]
xor ebp, edx
rol ecx, 0Bh
add ebx, [esi+30h]
xor ebp, ecx
mov edi, edx
lea ebx, [ebx+ebp+6ED9EBA1h]
rol ebx, 0Fh
xor edi, ecx
add eax, [esi+8]
xor edi, ebx
mov ebp, ecx
lea eax, [eax+edi+6ED9EBA1h]
xor ebp, ebx
rol eax, 3
add edx, [esi+28h]
xor ebp, eax
mov edi, ebx
lea edx, [edx+ebp+6ED9EBA1h]
rol edx, 9
xor edi, eax
add ecx, [esi+18h]
xor edi, edx
mov ebp, eax
lea ecx, [ecx+edi+6ED9EBA1h]
xor ebp, edx
rol ecx, 0Bh
add ebx, [esi+38h]
xor ebp, ecx
mov edi, edx
lea ebx, [ebx+ebp+6ED9EBA1h]
rol ebx, 0Fh
xor edi, ecx
add eax, [esi+4]
xor edi, ebx
mov ebp, ecx
lea eax, [eax+edi+6ED9EBA1h]
xor ebp, ebx
rol eax, 3
add edx, [esi+24h]
xor ebp, eax
mov edi, ebx
lea edx, [edx+ebp+6ED9EBA1h]
rol edx, 9
xor edi, eax
add ecx, [esi+14h]
xor edi, edx
mov ebp, eax
lea ecx, [ecx+edi+6ED9EBA1h]
xor ebp, edx
rol ecx, 0Bh
add ebx, [esi+34h]
xor ebp, ecx
mov edi, edx
lea ebx, [ebx+ebp+6ED9EBA1h]
rol ebx, 0Fh
xor edi, ecx
add eax, [esi+0Ch]
xor edi, ebx
mov ebp, ecx
lea eax, [eax+edi+6ED9EBA1h]
xor ebp, ebx
rol eax, 3
add edx, [esi+2Ch]
xor ebp, eax
mov edi, ebx
lea edx, [edx+ebp+6ED9EBA1h]
rol edx, 9
xor edi, eax
add ecx, [esi+1Ch]
xor edi, edx
mov ebp, eax
lea ecx, [ecx+edi+6ED9EBA1h]
xor ebp, edx
rol ecx, 0Bh
add ebx, [esi+3Ch]
xor ebp, ecx
lea ebx, [ebx+ebp+6ED9EBA1h]
rol ebx, 0Fh
pop edi
pop ebp
add eax, edi
add ebx, ebp
pop edi
pop ebp
add ecx, edi
add edx, ebp
pop esi // restore digest
mov [esi + 0], eax // A
mov [esi + 4], ebx // B
mov [esi + 8], ecx // C
mov [esi + 12], edx // D
pop ebp
pop edi
pop esi
pop ebx
end;
{$ENDIF}
var
Buffer: TMD4Buffer;
Remain: LongInt;
begin
while Size >= SizeOf(TMD4Buffer) do
begin
MD4Calc(Digest, Data);
Dec(Size, SizeOf(TMD4Buffer));
Inc(PChar(Data), SizeOf(TMD4Buffer));
end;
Remain := Size mod SizeOf(TMD4Buffer);
if Remain > 0 then
begin
Move(Data^, Buffer, Size);
FillChar(PByteArray(@Buffer)[Size], SizeOf(Buffer) - Size, 0);
MD4Calc(Digest, @Buffer);
end;
end;
function MD4Done(const Digest: TMD4Digest): String;
const
sHEX: PChar = '0123456789abcdef';
var
I: Integer;
D: PByte;
R: PChar;
begin
SetLength(Result, SizeOf(Digest) * 2);
R := Pointer(Result);
D := @Digest;
for I := 0 to SizeOf(Digest) -1 do
begin
R[0] := sHEX[D^ shr 4];
R[1] := sHEX[D^ and $F];
Inc(R, 2);
Inc(D);
end;
end;
var
Digest: TMD4Digest;
Stream: TStream;
CurSize: LongInt;
Buffer: array of Byte;
begin
MD4Init(Digest);
Stream := TFileStream.Create(FileName, fmOpenRead or fmShareDenyWrite);
try
SetLength(Buffer, BufferSize);
repeat
CurSize := Stream.Read(Buffer[0], BufferSize);
MD4Update(Digest, @Buffer[0], CurSize);
until CurSize <= 0;
finally
Stream.Free;
end;
Result := MD4Done(Digest);
end;
function CompareFilePhysical(const FileName1, FileName2: String): Boolean;
var
CurSize1,CurSize2: LongInt;
Stream1,Stream2: TStream;
Buffer1,Buffer2: array of Byte;
begin
Stream1 := TFileStream.Create(FileName1, fmOpenRead or fmShareDenyWrite);
try
Stream2 := TFileStream.Create(FileName2, fmOpenRead or fmShareDenyWrite);
try
SetLength(Buffer1, BufferSize);
SetLength(Buffer2, BufferSize);
repeat
CurSize1 := Stream1.Read(Buffer1[0], BufferSize);
CurSize2 := Stream2.Read(Buffer2[0], BufferSize);
Result := (CurSize1 = CurSize2) and CompareMem(@Buffer1[0], @Buffer2[0], CurSize1);
until not Result or (CurSize1 <= 0);
finally
Stream2.Free;
end;
finally
Stream1.Free;
end;
end;
function CompareFile(const FileName1, FileName2: String): Boolean; overload;
begin
Result := (AnsiCompareText(FileName1, FileName2) = 0) or
((GetFileSize(FileName1) = GetFileSize(FileName2)) and
(HashFile(FileName1) = HashFile(FileName2)) and
CompareFilePhysical(FileName1, FileName2));
end;
function CompareFile(const FileName1, FileName2, FileHash2: String): Boolean; overload;
begin
Result := (AnsiCompareText(FileName1, FileName2) = 0) or
((GetFileSize(FileName1) = GetFileSize(FileName2)) and
(HashFile(FileName1) = FileHash2) and
CompareFilePhysical(FileName1, FileName2));
end;
function CompareFile(const FileName1, FileName2, FileHash2: String; const FileSize2: Int64): Boolean; overload;
begin
Result := (AnsiCompareText(FileName1, FileName2) = 0) or
((GetFileSize(FileName1) = FileSize2) and
(HashFile(FileName1) = FileHash2) and
CompareFilePhysical(FileName1, FileName2));
end;
end.