refactor: optimize key comparator for indicates key sort (#6555)

#### What type of PR is this?
/kind improvement
/area core
/milestone 2.19.x

#### What this PR does / why we need it:
重构 KeyComparator 并通过更多的测试用例来确保排序功能的正确性

同时修复了可能存在溢出导致比较结果不正确的问题,目前:
1. 字符串长度比较:在 compareStrings 方法中,字符串的长度比较使用 Integer.compare,这部分代码不会产生整数溢出问题。
2. 数字部分的比较:在 compareNumbers 方法中,数字的比较是基于字符比较的(即逐位比较每个数字字符),没有涉及到将数3. 字字符串转化为 int 或 long 类型的操作,所以不会存在整数溢出问题。
4. 处理小数部分的比较:在 compareDecimalNumbers 方法中,类似地,比较操作也是基于字符的,不涉及到数值转换,因此也不存在整数溢出问题

#### Which issue(s) this PR fixes:
Fixes #6466

#### Does this PR introduce a user-facing change?
```release-note
修复由于索引比较时可能出现整数溢出导致文章偶尔无法访问的问题
```
pull/6563/head
guqing 2024-08-30 17:39:28 +08:00 committed by GitHub
parent 856d61537d
commit ba49dcab35
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 577 additions and 21 deletions

View File

@ -7,7 +7,7 @@ public class KeyComparator implements Comparator<String> {
public static final KeyComparator INSTANCE = new KeyComparator();
@Override
public int compare(@Nullable String a, @Nullable String b) {
public int compare(@Nullable String a, @Nullable String b) {
if (a == null && b == null) {
return 0;
} else if (a == null) {
@ -17,31 +17,169 @@ public class KeyComparator implements Comparator<String> {
// null less than everything
return -1;
}
return compareStrings(a, b);
}
private int compareStrings(String a, String b) {
int i = 0;
int j = 0;
while (i < a.length() && j < b.length()) {
if (Character.isDigit(a.charAt(i)) && Character.isDigit(b.charAt(j))) {
// handle number part
int num1 = 0;
int num2 = 0;
while (i < a.length() && Character.isDigit(a.charAt(i))) {
num1 = num1 * 10 + (a.charAt(i++) - '0');
char charA = a.charAt(i);
char charB = b.charAt(j);
if (Character.isDigit(charA) && Character.isDigit(charB)) {
// Both characters are digits, compare as numbers
int compareResult = compareNumbers(a, b, i, j);
if (compareResult != 0) {
return compareResult;
}
while (j < b.length() && Character.isDigit(b.charAt(j))) {
num2 = num2 * 10 + (b.charAt(j++) - '0');
}
if (num1 != num2) {
return num1 - num2;
}
} else if (a.charAt(i) != b.charAt(j)) {
// handle non-number part
return a.charAt(i) - b.charAt(j);
} else {
// Move indices past the compared number segments
i = moveIndexToNextNonDigit(a, i);
j = moveIndexToNextNonDigit(b, j);
} else if (charA == charB) {
// Characters are the same, continue
i++;
j++;
} else if (Character.isDigit(charA)) {
// If charA is digit and charB is not, digit comes first
return -1;
} else if (Character.isDigit(charB)) {
// If charB is digit and charA is not, digit comes first
return 1;
} else {
// Both are non-digits, compare directly
return Character.compare(charA, charB);
}
}
return a.length() - b.length();
return Integer.compare(a.length(), b.length());
}
private int compareNumbers(String a, String b, int startA, int startB) {
int i = startA;
int j = startB;
// Skip leading zeros for both numbers
while (i < a.length() && a.charAt(i) == '0') {
i++;
}
while (j < b.length() && b.charAt(j) == '0') {
j++;
}
// Compare lengths of remaining digits
int lengthA = countDigits(a, i);
int lengthB = countDigits(b, j);
if (lengthA != lengthB) {
return Integer.compare(lengthA, lengthB);
}
// Compare digits one by one
for (int k = 0; k < lengthA && i < a.length() && j < b.length(); k++, i++, j++) {
char charA = a.charAt(i);
char charB = b.charAt(j);
if (charA != charB) {
return Character.compare(charA, charB);
}
}
// If both numbers have decimal points, compare decimal parts
boolean hasDecimalA = i < a.length() && a.charAt(i) == '.';
boolean hasDecimalB = j < b.length() && b.charAt(j) == '.';
if (hasDecimalA || hasDecimalB) {
return compareDecimalNumbers(a, b, i, j);
}
return 0;
}
private int compareDecimalNumbers(String a, String b, int startA, int startB) {
// Find decimal point positions
int pointA = a.indexOf('.', startA);
int pointB = b.indexOf('.', startB);
// Compare integer parts before the decimal point
int integerComparison = compareIntegerPart(a, b, startA, startB, pointA, pointB);
if (integerComparison != 0) {
return integerComparison;
}
// Compare fractional parts after the decimal point
return compareFractionalPart(a, b, pointA + 1, pointB + 1);
}
private int compareIntegerPart(String a, String b, int startA, int startB, int pointA,
int pointB) {
int i = startA;
int j = startB;
while (i < pointA && a.charAt(i) == '0') {
i++;
}
while (j < pointB && b.charAt(j) == '0') {
j++;
}
int lengthA = pointA - i;
int lengthB = pointB - j;
if (lengthA != lengthB) {
return Integer.compare(lengthA, lengthB);
}
while (i < pointA && j < pointB) {
char charA = a.charAt(i);
char charB = b.charAt(j);
if (charA != charB) {
return Character.compare(charA, charB);
}
i++;
j++;
}
return 0;
}
private int compareFractionalPart(String a, String b, int i, int j) {
while (i < a.length() && j < b.length()
&& Character.isDigit(a.charAt(i)) && Character.isDigit(b.charAt(j))) {
if (a.charAt(i) != b.charAt(j)) {
return Character.compare(a.charAt(i), b.charAt(j));
}
i++;
j++;
}
while (i < a.length() && Character.isDigit(a.charAt(i))) {
if (a.charAt(i) != '0') {
return 1;
}
i++;
}
while (j < b.length() && Character.isDigit(b.charAt(j))) {
if (b.charAt(j) != '0') {
return -1;
}
j++;
}
return 0;
}
private int countDigits(String s, int start) {
int count = 0;
while (start < s.length() && Character.isDigit(s.charAt(start))) {
count++;
start++;
}
return count;
}
private int moveIndexToNextNonDigit(String s, int index) {
while (index < s.length() && (Character.isDigit(s.charAt(index))
|| s.charAt(index) == '.')) {
index++;
}
return index;
}
}

View File

@ -4,6 +4,11 @@ import static org.assertj.core.api.Assertions.assertThat;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import java.util.TreeMap;
import java.util.TreeSet;
import org.junit.jupiter.api.Nested;
import org.junit.jupiter.api.RepeatedTest;
import org.junit.jupiter.api.Test;
/**
@ -13,10 +18,10 @@ import org.junit.jupiter.api.Test;
* @since 2.12.0
*/
class KeyComparatorTest {
private final KeyComparator comparator = KeyComparator.INSTANCE;
@Test
void keyComparator() {
var comparator = KeyComparator.INSTANCE;
String[] strings = {"103", "101", "102", "1011", "1013", "1021", "1022", "1012", "1023"};
Arrays.sort(strings, comparator);
assertThat(strings).isEqualTo(
@ -34,7 +39,6 @@ class KeyComparatorTest {
@Test
void keyComparator2() {
var comparator = KeyComparator.INSTANCE;
String[] strings =
{"moment-101", "moment-102", "moment-103", "moment-1011", "moment-1013", "moment-1021",
"moment-1022", "moment-1012", "moment-1023"};
@ -76,4 +80,418 @@ class KeyComparatorTest {
Arrays.sort(strings, comparator);
assertThat(strings).isEqualTo(new String[] {"123", "abc", "xyz", null});
}
}
@Test
void complexStringTest() {
var strings = new String[] {
"1719560085223",
"1719564195757",
"AJHQ9JKT",
"1719565849173",
"5InykKCe",
"123123123",
"adJhTqEo",
"123123",
"Ahvcq7Wn",
"asda",
"b5jHcxfe"
};
Arrays.sort(strings, comparator);
assertThat(strings).containsExactly(
"5InykKCe",
"123123",
"123123123",
"1719560085223",
"1719564195757",
"1719565849173",
"AJHQ9JKT",
"Ahvcq7Wn",
"adJhTqEo",
"asda",
"b5jHcxfe"
);
}
@Test
void complexButSkewedStringTest() {
var strings = new String[] {
"chu-shi-hua-gong-neng-you-hua-halo-2.9.0-fa-bu",
"cxcc",
"d",
"dddd",
"ddddd",
"de-dao",
"dENMr6tX",
"dian-shang-ke-fu",
"dong-tai-she-ji-shi-xi-25jie",
"eeeeeeee",
"ejqRrTp4",
"Fh8Jd09T",
"g5gZaGvS",
};
Arrays.sort(strings, comparator);
assertThat(strings).containsExactly(
"Fh8Jd09T",
"chu-shi-hua-gong-neng-you-hua-halo-2.9.0-fa-bu",
"cxcc",
"d",
"dENMr6tX",
"dddd",
"ddddd",
"de-dao",
"dian-shang-ke-fu",
"dong-tai-she-ji-shi-xi-25jie",
"eeeeeeee",
"ejqRrTp4",
"g5gZaGvS"
);
}
@Test
void mixLetterCaseStringTest() {
var strings = new String[] {
"VpLBxBJ7", "AJHQ9JKT", "asda", "Tq5EgH2V", "Fh8Jd09T", "J7KMLQeK", "adJhTqEo",
"Ahvcq7Wn",
};
Arrays.sort(strings, comparator);
assertThat(strings).containsExactly(
"AJHQ9JKT", "Ahvcq7Wn", "Fh8Jd09T", "J7KMLQeK", "Tq5EgH2V", "VpLBxBJ7", "adJhTqEo",
"asda"
);
}
@Test
void mixLetterCaseAndNumberTest() {
var strings = new String[] {
"1719565849173", "1719564195757", "1703040584263",
"AJHQ9JKT", "Ahvcq7Wn", "Fh8Jd09T", "adJhTqEo",
"asda", "1703053590063", "1702955288482",
"zhi-chi-bei-fen-hui-fu-halo-2.8.0-fa-bu",
"zhi-chi-ge-ren-zhong-xin-halo-2.11.0-fa-bu",
"J7KMLQeK", "Tq5EgH2V", "VpLBxBJ7",
"b5jHcxfe", "cao-ni-ma-a-huang-jian-ming", "chu-ji-ying-jian-kai-fa",
"ddddd", "de-dao", "dian-shang-ke-fu", "eeeeeeee", "ejqRrTp4",
"halo-maintainer-2023-nian-du-bang-dan", "hello-halo", "hello-world",
"dong-tai-she-ji-shi-xi-25jie", "halo-nuan-dong-li-yu-quan-chang-qi-zhe-qi", "hello",
"kai-fang-gong-gong-api-halo-2.5.0-fa-bu",
"xing-neng-you-hua-yu-gong-neng-gai-jin-halo-2.13-fa-bu", "ye-wu-tuo-zhan-jing-li",
"ying-qu-jing-mei-zhou-bian-halo-ying-yong-shi-chang-zhu-ti-you-jiang-zheng-ji",
"zhi-chi-bao-chi-deng-lu-hui-hua-halo-2.16.0-fa-bu",
};
Arrays.sort(strings, comparator);
assertThat(strings).containsExactly(
"1702955288482",
"1703040584263",
"1703053590063",
"1719564195757",
"1719565849173",
"AJHQ9JKT",
"Ahvcq7Wn",
"Fh8Jd09T",
"J7KMLQeK",
"Tq5EgH2V",
"VpLBxBJ7",
"adJhTqEo",
"asda",
"b5jHcxfe",
"cao-ni-ma-a-huang-jian-ming",
"chu-ji-ying-jian-kai-fa",
"ddddd",
"de-dao",
"dian-shang-ke-fu",
"dong-tai-she-ji-shi-xi-25jie",
"eeeeeeee",
"ejqRrTp4",
"halo-maintainer-2023-nian-du-bang-dan",
"halo-nuan-dong-li-yu-quan-chang-qi-zhe-qi",
"hello",
"hello-halo",
"hello-world",
"kai-fang-gong-gong-api-halo-2.5.0-fa-bu",
"xing-neng-you-hua-yu-gong-neng-gai-jin-halo-2.13-fa-bu",
"ye-wu-tuo-zhan-jing-li",
"ying-qu-jing-mei-zhou-bian-halo-ying-yong-shi-chang-zhu-ti-you-jiang-zheng-ji",
"zhi-chi-bao-chi-deng-lu-hui-hua-halo-2.16.0-fa-bu",
"zhi-chi-bei-fen-hui-fu-halo-2.8.0-fa-bu",
"zhi-chi-ge-ren-zhong-xin-halo-2.11.0-fa-bu");
}
@Test
public void sortingWithComplexStringsTest() {
List<String> strings = Arrays.asList("abc10", "abc2", "abc1", "abc20", "abc100");
strings.sort(comparator);
assertThat(strings).containsExactly("abc1", "abc2", "abc10", "abc20", "abc100");
}
@Test
public void sortingWithDecimalStringsTest() {
List<String> strings =
Arrays.asList("1.2", "1.10", "1.1", "1.20", "1.02", "1.22", "1.001", "1.002");
strings.sort(comparator);
assertThat(strings).containsExactly("1.001", "1.002", "1.02", "1.1", "1.10", "1.2", "1.20",
"1.22");
}
@Test
public void treeSetWithComparatorTest() {
TreeSet<String> set = new TreeSet<>(comparator);
set.add("abc123");
set.add("abc1");
set.add("abc12");
set.add("abc2");
assertThat(set).containsExactly("abc1", "abc2", "abc12", "abc123");
}
@Test
public void testTreeMap_WithComparator() {
TreeMap<String, String> map = new TreeMap<>(comparator);
map.put("2024-08-29", "date1");
map.put("2024-08-28", "date2");
map.put("2024-08-30", "date3");
assertThat(map.keySet()).containsExactly("2024-08-28", "2024-08-29", "2024-08-30");
assertThat(map.get("2024-08-29")).isEqualTo("date1");
}
@Test
public void integerPartDifferentTest() {
// Create strings with different integer parts to cover the compareIntegerPart code
// block
String[] strings = {"abc10", "abc2", "abc1", "abc20", "abc10022229"};
Arrays.sort(strings, comparator);
String[] expectedOrder = {"abc1", "abc2", "abc10", "abc20", "abc10022229"};
assertThat(strings).containsExactly(expectedOrder);
}
@Test
public void integerPartDifferentWithDecimalTest() {
// To specifically test integer part comparison
String str1 = "abc12.5";
String str2 = "abc11.5";
// Compare should return a positive number since "12" > "11"
assertThat(comparator.compare(str1, str2)).isPositive();
String str3 = "abc11.5";
String str4 = "abc12.5";
// Compare should return a negative number since "11" < "12"
assertThat(comparator.compare(str3, str4)).isNegative();
// Test for multiple decimal points
assertThat(comparator.compare("1.23.4", "1.23")).isGreaterThan(0);
assertThat(comparator.compare("1.23", "1.23.4")).isLessThan(0);
assertThat(comparator.compare("1..23", "1.23")).isLessThan(0);
assertThat(comparator.compare("1.23..", "1.23")).isGreaterThan(0);
assertThat(comparator.compare("", "1.23")).isLessThan(0);
assertThat(comparator.compare("1.23", "")).isGreaterThan(0);
assertThat(comparator.compare("1.23", "1.23")).isZero();
}
@Nested
class ComparatorCharacteristicTest {
@Test
public void reflexiveTest() {
// Reflexive: a == a should always return 0
assertThat(comparator.compare("test", "test")).isZero();
assertThat(comparator.compare("", "")).isZero();
assertThat(comparator.compare("123", "123")).isZero();
assertThat(comparator.compare(null, null)).isZero();
}
@Test
public void symmetricTest() {
// Symmetric: a > b implies b < a
assertThat(comparator.compare("123", "test")).isNegative();
assertThat(comparator.compare("test", "123")).isPositive();
assertThat(comparator.compare("1.023", "1.23")).isNegative();
assertThat(comparator.compare("1.23", "1.023")).isPositive();
}
@Test
public void transitiveTest() {
// Transitive: a > b and b > c implies a > c
assertThat(comparator.compare("test2", "test1")).isPositive();
assertThat(comparator.compare("test1", "test0")).isPositive();
assertThat(comparator.compare("test2", "test0")).isPositive();
}
@RepeatedTest(50)
public void consistencyTest() {
// Consistency: a == b should always return 0 if not changed
assertThat(comparator.compare("123abc", "123abc")).isZero();
assertThat(comparator.compare("test", "test")).isZero();
assertThat(comparator.compare("123abc", "123abc"))
.isEqualTo(comparator.compare("123abc", "123abc"));
}
@Test
public void withNumbersTest() {
// Numbers should be compared numerically
assertThat(comparator.compare("item2", "item10")).isNegative();
assertThat(comparator.compare("item10", "item2")).isPositive();
assertThat(comparator.compare("item10", "item10")).isZero();
}
@Test
public void mixedContentTest() {
// Mixed content comparison
assertThat(comparator.compare("abc123", "abc124")).isNegative();
assertThat(comparator.compare("abc124", "abc123")).isPositive();
assertThat(comparator.compare("abc123", "abc123")).isZero();
}
@Test
public void nullHandlingTest() {
// Null handling
assertThat(comparator.compare(null, "test")).isPositive();
assertThat(comparator.compare("test", null)).isNegative();
assertThat(comparator.compare(null, null)).isZero();
}
@Test
public void lengthDifferenceTest() {
// Length difference should affect comparison
assertThat(comparator.compare("test", "testa")).isNegative();
assertThat(comparator.compare("testa", "test")).isPositive();
}
@Test
public void specialCharactersTest() {
// Special character comparison
assertThat(comparator.compare("a#1", "a#2")).isNegative();
assertThat(comparator.compare("a#2", "a#1")).isPositive();
assertThat(comparator.compare("a#1", "a#1")).isZero();
}
@Test
public void emptyStringsTest() {
// Empty string comparison
assertThat(comparator.compare("", "test")).isNegative();
assertThat(comparator.compare("test", "")).isPositive();
assertThat(comparator.compare("", "")).isZero();
}
}
@Nested
class ComparatorEdgeTest {
@Test
public void pureNumbersTest() {
assertThat(comparator.compare("123", "123")).isEqualTo(0);
assertThat(comparator.compare("123", "124")).isLessThan(0);
assertThat(comparator.compare("124", "123")).isGreaterThan(0);
// Leading zeros
assertThat(comparator.compare("00123", "123") > 0).isTrue();
}
@Test
public void mumbersWithOverflowTest() {
// Max long value
String largeNumber1 = "9223372036854775807";
// One more than max long value
String largeNumber2 = "9223372036854775808";
assertThat(comparator.compare(largeNumber1, largeNumber2)).isLessThan(0);
assertThat(comparator.compare(largeNumber2, largeNumber1)).isGreaterThan(0);
// large number str comparison
assertThat(comparator.compare("123456789012345678901234567890",
"123456789012345678901234567891")).isLessThan(0);
assertThat(comparator.compare("123456789012345678901234567890",
"123456789012345678901234567891")).isNotPositive();
assertThat(comparator.compare("999999999999999999999999999999",
"999999999999999999999999999998")).isGreaterThan(0);
assertThat(comparator.compare("999999999999999999999999999999",
"999999999999999999999999999998")).isNotNegative();
assertThat(comparator.compare("9999999999999999999999999999999999999999999999",
"9999999999999999999999999999999999999999999998")).isGreaterThan(0);
assertThat(comparator.compare("100000000000000000000000000000",
"100000000000000000000000000000")).isEqualTo(0);
// This specific case is to test the overflow for a real-world scenario
assertThat(comparator.compare("5InykKCe", "1710683457874") < 0).isTrue();
assertThat(comparator.compare("5InykKce", "1717477435943") > 0).isFalse();
assertThat(comparator.compare("0",
"9999999999999999999999999999999999999999999998")).isLessThan(0);
assertThat(comparator.compare("9999999999999999999999999999999999999999999998",
"0")).isGreaterThan(0);
}
@Test
public void decimalStringsTest() {
assertThat(comparator.compare("123.45", "123.45")).isEqualTo(0);
assertThat(comparator.compare("123.45", "123.46")).isLessThan(0);
assertThat(comparator.compare("123.46", "123.45")).isGreaterThan(0);
// Decimal equivalence
assertThat(comparator.compare("123.5", "123.50")).isLessThan(0);
}
@Test
public void lettersAndNumbersTest() {
assertThat(comparator.compare("abc123", "abc123")).isEqualTo(0);
assertThat(comparator.compare("abc123", "abc124")).isLessThan(0);
assertThat(comparator.compare("abc124", "abc123")).isGreaterThan(0);
assertThat(comparator.compare("abc123", "abcd123")).isLessThan(0);
}
@Test
public void pureLettersTest() {
assertThat(comparator.compare("abc", "abc")).isEqualTo(0);
assertThat(comparator.compare("abc", "abcd")).isLessThan(0);
assertThat(comparator.compare("abcd", "abc")).isGreaterThan(0);
// Case sensitivity
assertThat(comparator.compare("ABC", "abc")).isLessThan(0);
}
@Test
public void dateStringsTest() {
assertThat(comparator.compare("2024-08-29", "2024-08-29")).isEqualTo(0);
assertThat(comparator.compare("2024-08-29", "2024-08-30")).isLessThan(0);
assertThat(comparator.compare("2024-08-30", "2024-08-29")).isGreaterThan(0);
// Time comparison
assertThat(comparator.compare("2024-08-29T12:00:00.000Z", "2024-08-29T12:00:00.001Z"))
.isLessThan(0);
assertThat(comparator.compare("2024-08-29T12:00:00.001Z", "2024-08-29T12:00:00.000Z"))
.isGreaterThan(0);
assertThat(comparator.compare("2024-08-29T12:00:00.000Z", "2024-08-29T12:00:01.000Z"))
.isLessThan(0);
assertThat(comparator.compare("2024-08-29T12:00:01.000Z", "2024-08-29T12:00:00.000Z"))
.isGreaterThan(0);
assertThat(comparator.compare("2024-08-29T12:00:00.000Z", "2024-08-29T12:01:00.000Z"))
.isLessThan(0);
assertThat(comparator.compare("2024-08-29T12:01:00.000Z", "2024-08-29T12:00:00.000Z"))
.isGreaterThan(0);
assertThat(comparator.compare("2024-08-29T12:00:00.000Z", "2024-08-29T13:00:00.000Z"))
.isLessThan(0);
assertThat(comparator.compare("2024-08-29T13:00:00.000Z", "2024-08-29T12:00:00.000Z"))
.isGreaterThan(0);
assertThat(comparator.compare("2024-08-29T12:00:00.000Z", "2024-08-30T12:00:00.000Z"))
.isLessThan(0);
assertThat(comparator.compare("2024-08-30T12:00:00.000Z", "2024-08-29T12:00:00.000Z"))
.isGreaterThan(0);
}
@Test
public void booleanStringsTest() {
assertThat(comparator.compare("true", "false")).isGreaterThan(0);
assertThat(comparator.compare("false", "true")).isLessThan(0);
assertThat(comparator.compare("true", "true")).isEqualTo(0);
assertThat(comparator.compare("false", "false")).isEqualTo(0);
}
@Test
public void complexMixedStringsTest() {
assertThat(comparator.compare("abc123xyz456", "abc123xyz456")).isEqualTo(0);
assertThat(comparator.compare("abc123xyz456", "abc124xyz456")).isLessThan(0);
assertThat(comparator.compare("abc124xyz456", "abc123xyz456")).isGreaterThan(0);
assertThat(comparator.compare("abc123xyz456", "abc123xyz457")).isLessThan(0);
}
}
}