git: 90fa789cfab5 - main - sys/crypto: import md5c.c C rewrite

From: Robert Clausecker <fuz_at_FreeBSD.org>
Date: Fri, 24 Oct 2025 10:18:48 UTC
The branch main has been updated by fuz:

URL: https://cgit.FreeBSD.org/src/commit/?id=90fa789cfab54294d4c3a0ac74e01747ce074386

commit 90fa789cfab54294d4c3a0ac74e01747ce074386
Author:     Robert Clausecker <fuz@FreeBSD.org>
AuthorDate: 2025-10-04 21:59:56 +0000
Commit:     Robert Clausecker <fuz@FreeBSD.org>
CommitDate: 2025-10-24 10:16:58 +0000

    sys/crypto: import md5c.c C rewrite
    
    The reimplementation is a bit cleaner than the original code,
    although it is also slightly slower. This shouldn't matter too
    much as we will have asm code for the major platforms.
    
    The code is unrolled when built in the kernel or user space, but
    not when in libsa, as to reduce the code size.
    
    Differential Revision:  https://reviews.freebsd.org/D45670
    Reviewed by:    jrtc27, imp
    Approved by:    markj (mentor)
    MFC after:      1 month
---
 stand/libsa/Makefile |   1 +
 sys/crypto/md5c.c    | 542 ++++++++++++++++++++++++---------------------------
 2 files changed, 260 insertions(+), 283 deletions(-)

diff --git a/stand/libsa/Makefile b/stand/libsa/Makefile
index c1e03c7a5789..470c03032f61 100644
--- a/stand/libsa/Makefile
+++ b/stand/libsa/Makefile
@@ -191,6 +191,7 @@ SRCS+=		sha256c.c sha512c.c
 
 .PATH: ${SYSDIR}/crypto
 SRCS+=		md5c.c
+CFLAGS.md5c.c+= -DSTANDALONE_SMALL
 
 .if ${DO32:U0} == 0
 MAN=libsa.3
diff --git a/sys/crypto/md5c.c b/sys/crypto/md5c.c
index 0922d0f8cc61..f9ffb602afdb 100644
--- a/sys/crypto/md5c.c
+++ b/sys/crypto/md5c.c
@@ -1,331 +1,307 @@
 /*-
- * SPDX-License-Identifier: RSA-MD
+ * Copyright (c) 2024, 2025 Robert Clausecker <fuz@FreeBSD.org>
  *
- * MD5C.C - RSA Data Security, Inc., MD5 message-digest algorithm
- *
- * Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
- * rights reserved.
- *
- * License to copy and use this software is granted provided that it
- * is identified as the "RSA Data Security, Inc. MD5 Message-Digest
- * Algorithm" in all material mentioning or referencing this software
- * or this function.
- *
- * License is also granted to make and use derivative works provided
- * that such works are identified as "derived from the RSA Data
- * Security, Inc. MD5 Message-Digest Algorithm" in all material
- * mentioning or referencing the derived work.
- *
- * RSA Data Security, Inc. makes no representations concerning either
- * the merchantability of this software or the suitability of this
- * software for any particular purpose. It is provided "as is"
- * without express or implied warranty of any kind.
- *
- * These notices must be retained in any copies of any part of this
- * documentation and/or software.
- *
- * This code is the same as the code published by RSA Inc.  It has been
- * edited for clarity and style only.
+ * SPDX-License-Identifier: BSD-2-Clause
  */
 
+#include <sys/endian.h>
 #include <sys/types.h>
+#include <sys/md5.h>
 
 #ifdef _KERNEL
+#include <sys/param.h>
+#include <sys/stdint.h>
 #include <sys/systm.h>
+#define assert(expr) MPASS(expr)
 #else
+#include <assert.h>
+#include <stdint.h>
 #include <string.h>
-#endif
-
-#include <machine/endian.h>
-#include <sys/endian.h>
-#include <sys/md5.h>
-
-static void MD5Transform(uint32_t [4], const unsigned char [64]);
-
-#if (BYTE_ORDER == LITTLE_ENDIAN)
-#define Encode memcpy
-#define Decode memcpy
-#else 
-
-/*
- * Encodes input (uint32_t) into output (unsigned char). Assumes len is
- * a multiple of 4.
- */
-
-static void
-Encode (unsigned char *output, uint32_t *input, unsigned int len)
-{
-	unsigned int i;
-	uint32_t ip;
-
-	for (i = 0; i < len / 4; i++) {
-		ip = input[i];
-		*output++ = ip;
-		*output++ = ip >> 8;
-		*output++ = ip >> 16;
-		*output++ = ip >> 24;
-	}
-}
-
-/*
- * Decodes input (unsigned char) into output (uint32_t). Assumes len is
- * a multiple of 4.
- */
+#include <strings.h>
+#endif /* defined(_KERNEL) */
 
-static void
-Decode (uint32_t *output, const unsigned char *input, unsigned int len)
-{
-	unsigned int i;
-
-	for (i = 0; i < len; i += 4) { 
-		*output++ = input[i] | (input[i+1] << 8) | (input[i+2] << 16) |
-		    (input[i+3] << 24);
-	}
-}
+#define md5block _libmd_md5block
+#ifdef MD5_ASM
+extern void	md5block(MD5_CTX *, const void *, size_t);
+#else
+static void	md5block(MD5_CTX *, const void *, size_t);
 #endif
 
-static unsigned char PADDING[64] = {
-  0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-};
-
-/* F, G, H and I are basic MD5 functions. */
-#define F(x, y, z) (((x) & (y)) | ((~x) & (z)))
-#define G(x, y, z) (((x) & (z)) | ((y) & (~z)))
-#define H(x, y, z) ((x) ^ (y) ^ (z))
-#define I(x, y, z) ((y) ^ ((x) | (~z)))
-
-/* ROTATE_LEFT rotates x left n bits. */
-#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n))))
-
-/*
- * FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4.
- * Rotation is separate from addition to prevent recomputation.
- */
-#define FF(a, b, c, d, x, s, ac) { \
-	(a) += F ((b), (c), (d)) + (x) + (uint32_t)(ac); \
-	(a) = ROTATE_LEFT ((a), (s)); \
-	(a) += (b); \
-	}
-#define GG(a, b, c, d, x, s, ac) { \
-	(a) += G ((b), (c), (d)) + (x) + (uint32_t)(ac); \
-	(a) = ROTATE_LEFT ((a), (s)); \
-	(a) += (b); \
-	}
-#define HH(a, b, c, d, x, s, ac) { \
-	(a) += H ((b), (c), (d)) + (x) + (uint32_t)(ac); \
-	(a) = ROTATE_LEFT ((a), (s)); \
-	(a) += (b); \
-	}
-#define II(a, b, c, d, x, s, ac) { \
-	(a) += I ((b), (c), (d)) + (x) + (uint32_t)(ac); \
-	(a) = ROTATE_LEFT ((a), (s)); \
-	(a) += (b); \
-	}
-
-/* MD5 initialization. Begins an MD5 operation, writing a new context. */
+/* don't unroll in bootloader */
+#ifdef STANDALONE_SMALL
+#define UNROLL
+#else
+#define UNROLL _Pragma("unroll")
+#endif
 
 void
-MD5Init(MD5_CTX *context)
+MD5Init(MD5_CTX *ctx)
 {
+	ctx->state[0] = 0x67452301;
+	ctx->state[1] = 0xefcdab89;
+	ctx->state[2] = 0x98badcfe;
+	ctx->state[3] = 0x10325476;
 
-	context->count[0] = context->count[1] = 0;
-
-	/* Load magic initialization constants.  */
-	context->state[0] = 0x67452301;
-	context->state[1] = 0xefcdab89;
-	context->state[2] = 0x98badcfe;
-	context->state[3] = 0x10325476;
+	ctx->count[0] = 0;
+	ctx->count[1] = 0;
 }
 
-/* 
- * MD5 block update operation. Continues an MD5 message-digest
- * operation, processing another message block, and updating the
- * context.
- */
-
 void
-MD5Update(MD5_CTX *context, const void *in, unsigned int inputLen)
+MD5Update(MD5_CTX *ctx, const void *data, unsigned int len)
 {
-	unsigned int i, index, partLen;
-	const unsigned char *input = in;
+	uint64_t nn;
+	const char *p = data;
+	unsigned num;
 
-	/* Compute number of bytes mod 64 */
-	index = (unsigned int)((context->count[0] >> 3) & 0x3F);
+	num = ctx->count[0] % MD5_BLOCK_LENGTH;
+	nn = (uint64_t)ctx->count[0] | (uint64_t)ctx->count[1] << 32;
+	nn += len;
+	ctx->count[0] = (uint32_t)nn;
+	ctx->count[1] = (uint32_t)(nn >> 32);
 
-	/* Update number of bits */
-	if ((context->count[0] += ((uint32_t)inputLen << 3))
-	    < ((uint32_t)inputLen << 3))
-		context->count[1]++;
-	context->count[1] += ((uint32_t)inputLen >> 29);
+	if (num > 0) {
+		unsigned int n = MD5_BLOCK_LENGTH - num;
 
-	partLen = 64 - index;
+		if (n > len)
+			n = len;
 
-	/* Transform as many times as possible. */
-	if (inputLen >= partLen) {
-		memcpy((void *)&context->buffer[index], (const void *)input,
-		    partLen);
-		MD5Transform (context->state, context->buffer);
+		memcpy((char *)ctx->buffer + num, p, n);
+		num += n;
+		if (num == MD5_BLOCK_LENGTH)
+			md5block(ctx, (void *)ctx->buffer, MD5_BLOCK_LENGTH);
 
-		for (i = partLen; i + 63 < inputLen; i += 64)
-			MD5Transform (context->state, &input[i]);
+		p += n;
+		len -= n;
+	}
 
-		index = 0;
+	if (len >= MD5_BLOCK_LENGTH) {
+		unsigned n = len & ~(unsigned)(MD5_BLOCK_LENGTH - 1);
+
+		md5block(ctx, p, n);
+		p += n;
+		len -= n;
 	}
-	else
-		i = 0;
 
-	/* Buffer remaining input */
-	memcpy ((void *)&context->buffer[index], (const void *)&input[i],
-	    inputLen-i);
+	if (len > 0)
+		memcpy((void *)ctx->buffer, p, len);
 }
 
-/*
- * MD5 padding. Adds padding followed by original length.
- */
-
 static void
-MD5Pad(MD5_CTX *context)
+MD5Pad(MD5_CTX *ctx)
 {
-	unsigned char bits[8];
-	unsigned int index, padLen;
-
-	/* Save number of bits */
-	Encode (bits, context->count, 8);
-
-	/* Pad out to 56 mod 64. */
-	index = (unsigned int)((context->count[0] >> 3) & 0x3f);
-	padLen = (index < 56) ? (56 - index) : (120 - index);
-	MD5Update (context, PADDING, padLen);
-
-	/* Append length (before padding) */
-	MD5Update (context, bits, 8);
+	uint64_t len;
+	unsigned t;
+	unsigned char tmp[MD5_BLOCK_LENGTH + sizeof(uint64_t)] = {0x80, 0};
+
+	len = (uint64_t)ctx->count[0] | (uint64_t)ctx->count[1] << 32;
+	t = 64 + 56 - ctx->count[0] % 64;
+	if (t > 64)
+		t -= 64;
+
+	/* length in bits */
+	len <<= 3;
+	le64enc(tmp + t, len);
+	MD5Update(ctx, tmp, t + 8);
+	assert(ctx->count[0] % MD5_BLOCK_LENGTH == 0);
 }
 
-/*
- * MD5 finalization. Ends an MD5 message-digest operation, writing the
- * the message digest and zeroizing the context.
- */
-
 void
-MD5Final(unsigned char digest[static MD5_DIGEST_LENGTH], MD5_CTX *context)
+MD5Final(unsigned char md[16], MD5_CTX *ctx)
 {
-	/* Do padding. */
-	MD5Pad (context);
+	MD5Pad(ctx);
 
-	/* Store state in digest */
-	Encode (digest, context->state, MD5_DIGEST_LENGTH);
+	le32enc(md +  0, ctx->state[0]);
+	le32enc(md +  4, ctx->state[1]);
+	le32enc(md +  8, ctx->state[2]);
+	le32enc(md + 12, ctx->state[3]);
 
-	/* Zeroize sensitive information. */
-	explicit_bzero (context, sizeof (*context));
+	explicit_bzero(ctx, sizeof(ctx));
 }
 
-/* MD5 basic transformation. Transforms state based on block. */
+#ifndef MD5_ASM
+static const uint32_t K[64] = {
+	0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee,
+	0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
+	0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,
+	0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
+	0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,
+	0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
+	0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed,
+	0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
+	0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,
+	0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
+	0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05,
+	0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
+	0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039,
+	0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
+	0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
+	0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391,
+};
+
+static inline uint32_t
+rol32(uint32_t a, int b)
+{
+	return (a << b | a >> (32 - b));
+}
 
 static void
-MD5Transform(uint32_t state[4], const unsigned char block[64])
+md5block(MD5_CTX *ctx, const void *data, size_t len)
 {
-	uint32_t a = state[0], b = state[1], c = state[2], d = state[3], x[16];
-
-	Decode (x, block, 64);
-
-	/* Round 1 */
-#define S11 7
-#define S12 12
-#define S13 17
-#define S14 22
-	FF (a, b, c, d, x[ 0], S11, 0xd76aa478); /* 1 */
-	FF (d, a, b, c, x[ 1], S12, 0xe8c7b756); /* 2 */
-	FF (c, d, a, b, x[ 2], S13, 0x242070db); /* 3 */
-	FF (b, c, d, a, x[ 3], S14, 0xc1bdceee); /* 4 */
-	FF (a, b, c, d, x[ 4], S11, 0xf57c0faf); /* 5 */
-	FF (d, a, b, c, x[ 5], S12, 0x4787c62a); /* 6 */
-	FF (c, d, a, b, x[ 6], S13, 0xa8304613); /* 7 */
-	FF (b, c, d, a, x[ 7], S14, 0xfd469501); /* 8 */
-	FF (a, b, c, d, x[ 8], S11, 0x698098d8); /* 9 */
-	FF (d, a, b, c, x[ 9], S12, 0x8b44f7af); /* 10 */
-	FF (c, d, a, b, x[10], S13, 0xffff5bb1); /* 11 */
-	FF (b, c, d, a, x[11], S14, 0x895cd7be); /* 12 */
-	FF (a, b, c, d, x[12], S11, 0x6b901122); /* 13 */
-	FF (d, a, b, c, x[13], S12, 0xfd987193); /* 14 */
-	FF (c, d, a, b, x[14], S13, 0xa679438e); /* 15 */
-	FF (b, c, d, a, x[15], S14, 0x49b40821); /* 16 */
-
-	/* Round 2 */
-#define S21 5
-#define S22 9
-#define S23 14
-#define S24 20
-	GG (a, b, c, d, x[ 1], S21, 0xf61e2562); /* 17 */
-	GG (d, a, b, c, x[ 6], S22, 0xc040b340); /* 18 */
-	GG (c, d, a, b, x[11], S23, 0x265e5a51); /* 19 */
-	GG (b, c, d, a, x[ 0], S24, 0xe9b6c7aa); /* 20 */
-	GG (a, b, c, d, x[ 5], S21, 0xd62f105d); /* 21 */
-	GG (d, a, b, c, x[10], S22,  0x2441453); /* 22 */
-	GG (c, d, a, b, x[15], S23, 0xd8a1e681); /* 23 */
-	GG (b, c, d, a, x[ 4], S24, 0xe7d3fbc8); /* 24 */
-	GG (a, b, c, d, x[ 9], S21, 0x21e1cde6); /* 25 */
-	GG (d, a, b, c, x[14], S22, 0xc33707d6); /* 26 */
-	GG (c, d, a, b, x[ 3], S23, 0xf4d50d87); /* 27 */
-	GG (b, c, d, a, x[ 8], S24, 0x455a14ed); /* 28 */
-	GG (a, b, c, d, x[13], S21, 0xa9e3e905); /* 29 */
-	GG (d, a, b, c, x[ 2], S22, 0xfcefa3f8); /* 30 */
-	GG (c, d, a, b, x[ 7], S23, 0x676f02d9); /* 31 */
-	GG (b, c, d, a, x[12], S24, 0x8d2a4c8a); /* 32 */
-
-	/* Round 3 */
-#define S31 4
-#define S32 11
-#define S33 16
-#define S34 23
-	HH (a, b, c, d, x[ 5], S31, 0xfffa3942); /* 33 */
-	HH (d, a, b, c, x[ 8], S32, 0x8771f681); /* 34 */
-	HH (c, d, a, b, x[11], S33, 0x6d9d6122); /* 35 */
-	HH (b, c, d, a, x[14], S34, 0xfde5380c); /* 36 */
-	HH (a, b, c, d, x[ 1], S31, 0xa4beea44); /* 37 */
-	HH (d, a, b, c, x[ 4], S32, 0x4bdecfa9); /* 38 */
-	HH (c, d, a, b, x[ 7], S33, 0xf6bb4b60); /* 39 */
-	HH (b, c, d, a, x[10], S34, 0xbebfbc70); /* 40 */
-	HH (a, b, c, d, x[13], S31, 0x289b7ec6); /* 41 */
-	HH (d, a, b, c, x[ 0], S32, 0xeaa127fa); /* 42 */
-	HH (c, d, a, b, x[ 3], S33, 0xd4ef3085); /* 43 */
-	HH (b, c, d, a, x[ 6], S34,  0x4881d05); /* 44 */
-	HH (a, b, c, d, x[ 9], S31, 0xd9d4d039); /* 45 */
-	HH (d, a, b, c, x[12], S32, 0xe6db99e5); /* 46 */
-	HH (c, d, a, b, x[15], S33, 0x1fa27cf8); /* 47 */
-	HH (b, c, d, a, x[ 2], S34, 0xc4ac5665); /* 48 */
-
-	/* Round 4 */
-#define S41 6
-#define S42 10
-#define S43 15
-#define S44 21
-	II (a, b, c, d, x[ 0], S41, 0xf4292244); /* 49 */
-	II (d, a, b, c, x[ 7], S42, 0x432aff97); /* 50 */
-	II (c, d, a, b, x[14], S43, 0xab9423a7); /* 51 */
-	II (b, c, d, a, x[ 5], S44, 0xfc93a039); /* 52 */
-	II (a, b, c, d, x[12], S41, 0x655b59c3); /* 53 */
-	II (d, a, b, c, x[ 3], S42, 0x8f0ccc92); /* 54 */
-	II (c, d, a, b, x[10], S43, 0xffeff47d); /* 55 */
-	II (b, c, d, a, x[ 1], S44, 0x85845dd1); /* 56 */
-	II (a, b, c, d, x[ 8], S41, 0x6fa87e4f); /* 57 */
-	II (d, a, b, c, x[15], S42, 0xfe2ce6e0); /* 58 */
-	II (c, d, a, b, x[ 6], S43, 0xa3014314); /* 59 */
-	II (b, c, d, a, x[13], S44, 0x4e0811a1); /* 60 */
-	II (a, b, c, d, x[ 4], S41, 0xf7537e82); /* 61 */
-	II (d, a, b, c, x[11], S42, 0xbd3af235); /* 62 */
-	II (c, d, a, b, x[ 2], S43, 0x2ad7d2bb); /* 63 */
-	II (b, c, d, a, x[ 9], S44, 0xeb86d391); /* 64 */
-
-	state[0] += a;
-	state[1] += b;
-	state[2] += c;
-	state[3] += d;
-
-	/* Zeroize sensitive information. */
-	memset ((void *)x, 0, sizeof (x));
+	uint32_t m[16], a0, b0, c0, d0;
+	const char *p = data;
+
+	a0 = ctx->state[0];
+	b0 = ctx->state[1];
+	c0 = ctx->state[2];
+	d0 = ctx->state[3];
+
+	while (len >= MD5_BLOCK_LENGTH) {
+		size_t i;
+		uint32_t a = a0, b = b0, c = c0, d = d0, f, tmp;
+
+		UNROLL
+		for (i = 0; i < 16; i++)
+			m[i] = le32dec(p + 4*i);
+
+		UNROLL
+		for (i = 0; i < 16; i += 4) {
+			f = d ^ (b & (c ^ d));
+			tmp = d;
+			d = c;
+			c = b;
+			b += rol32(a + f + K[i] + m[i], 7);
+			a = tmp;
+
+			f = d ^ (b & (c ^ d));
+			tmp = d;
+			d = c;
+			c = b;
+			b += rol32(a + f + K[i + 1] + m[i + 1], 12);
+			a = tmp;
+
+			f = d ^ (b & (c ^ d));
+			tmp = d;
+			d = c;
+			c = b;
+			b += rol32(a + f + K[i + 2] + m[i + 2], 17);
+			a = tmp;
+
+			f = d ^ (b & (c ^ d));
+			tmp = d;
+			d = c;
+			c = b;
+			b += rol32(a + f + K[i + 3] + m[i + 3], 22);
+			a = tmp;
+		}
+
+		UNROLL
+		for (; i < 32; i += 4) {
+			f = c ^ (d & (b ^ c));
+			tmp = d;
+			d = c;
+			c = b;
+			b += rol32(a + f + K[i] + m[(5*i + 1) % 16], 5);
+			a = tmp;
+
+			f = c ^ (d & (b ^ c));
+			tmp = d;
+			d = c;
+			c = b;
+			b += rol32(a + f + K[i + 1] + m[(5*i + 6) % 16], 9);
+			a = tmp;
+
+			f = c ^ (d & (b ^ c));
+			tmp = d;
+			d = c;
+			c = b;
+			b += rol32(a + f + K[i + 2] + m[(5*i + 11) % 16], 14);
+			a = tmp;
+
+			f = c ^ (d & (b ^ c));
+			tmp = d;
+			d = c;
+			c = b;
+			b += rol32(a + f + K[i + 3] + m[5*i % 16], 20);
+			a = tmp;
+		}
+
+		UNROLL
+		for (; i < 48; i += 4) {
+			f = b ^ c ^ d;
+			tmp = d;
+			d = c;
+			c = b;
+			b += rol32(a + f + K[i] + m[(3*i + 5) % 16], 4);
+			a = tmp;
+
+			f = b ^ c ^ d;
+			tmp = d;
+			d = c;
+			c = b;
+			b += rol32(a + f + K[i + 1] + m[(3*i + 8) % 16], 11);
+			a = tmp;
+
+			f = b ^ c ^ d;
+			tmp = d;
+			d = c;
+			c = b;
+			b += rol32(a + f + K[i + 2] + m[(3*i + 11) % 16], 16);
+			a = tmp;
+
+			f = b ^ c ^ d;
+			tmp = d;
+			d = c;
+			c = b;
+			b += rol32(a + f + K[i + 3] + m[(3*i + 14) % 16], 23);
+			a = tmp;
+		}
+
+		UNROLL
+		for (; i < 64; i += 4) {
+			f = c ^ (b | ~d);
+			tmp = d;
+			d = c;
+			c = b;
+			b += rol32(a + f + K[i] + m[7*i % 16], 6);
+			a = tmp;
+
+			f = c ^ (b | ~d);
+			tmp = d;
+			d = c;
+			c = b;
+			b += rol32(a + f + K[i + 1] + m[(7*i + 7) % 16], 10);
+			a = tmp;
+
+			f = c ^ (b | ~d);
+			tmp = d;
+			d = c;
+			c = b;
+			b += rol32(a + f + K[i + 2] + m[(7*i + 14) % 16], 15);
+			a = tmp;
+
+			f = c ^ (b | ~d);
+			tmp = d;
+			d = c;
+			c = b;
+			b += rol32(a + f + K[i + 3] + m[(7*i + 5) % 16], 21);
+			a = tmp;
+		}
+
+		a0 += a;
+		b0 += b;
+		c0 += c;
+		d0 += d;
+
+		p += MD5_BLOCK_LENGTH;
+		len -= MD5_BLOCK_LENGTH;
+	}
+
+	ctx->state[0] = a0;
+	ctx->state[1] = b0;
+	ctx->state[2] = c0;
+	ctx->state[3] = d0;
 }
+#endif /* !defined(MD5_ASM) */
 
 #ifdef WEAK_REFS
 /* When building libmd, provide weak references. Note: this is not