From 667b99dc3a4271dcf3ec2cc2a16ad5fcbae04ef8 Mon Sep 17 00:00:00 2001 From: Nate Rosenblum Date: Tue, 16 Mar 2021 12:22:46 -0600 Subject: [PATCH] Use strtoul instead of sscanf for numeric prefix sscanf consumes the entire string and is much slower than strtoul when reading the block size at the prefix of digest strings. This is the dominant cost of digest comparisons with incomparable block sizes (~ 80% slower than using strtoul), but is still a significant contribution (> 15%) of full digest comparisons. --- fuzzy.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/fuzzy.c b/fuzzy.c index 9ef2666..301b41e 100644 --- a/fuzzy.c +++ b/fuzzy.c @@ -864,14 +864,24 @@ int fuzzy_compare(const char *str1, const char *str2) size_t s1b1len, s1b2len, s2b1len, s2b2len; char s1b1[SPAMSUM_LENGTH], s1b2[SPAMSUM_LENGTH]; char s2b1[SPAMSUM_LENGTH], s2b2[SPAMSUM_LENGTH]; - char *s1p, *s2p, *tmp; + char *s1p, *s2p, *tmp, *endptr = NULL; if (NULL == str1 || NULL == str2) return -1; // each spamsum is prefixed by its block size - if (sscanf(str1, "%lu:", &block_size1) != 1 || - sscanf(str2, "%lu:", &block_size2) != 1) { + block_size1 = strtoul(str1, &endptr, 10); + if (endptr == str1 || *endptr != ':') { + return -1; + } + if (block_size1 == ULONG_MAX && errno == ERANGE) { + return -1; + } + block_size2 = strtoul(str2, &endptr, 10); + if (endptr == str2 || *endptr != ':') { + return -1; + } + if (block_size2 == ULONG_MAX && errno == ERANGE) { return -1; }