From 5eb9b682096cb6bc8d917588723d60350a0432a8 Mon Sep 17 00:00:00 2001
From: guqing <38999863+guqing@users.noreply.github.com>
Date: Thu, 20 Jul 2023 16:55:56 +0800
Subject: [PATCH] refactor: optimizing regex pettern for html meta matching
(#4235)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
#### What type of PR is this?
/kind improvement
/area core
/milestone 2.8.x
#### What this PR does / why we need it:
优化去除 Html Meta 重复标签的正则表达式
see #4234 for more details.
#### Which issue(s) this PR fixes:
Fixes #4234
#### Does this PR introduce a user-facing change?
```release-note
优化去除 Html Meta 重复标签的正则表达式
```
---
.../dialect/DuplicateMetaTagProcessor.java | 17 ++++---
.../DuplicateMetaTagProcessorTest.java | 51 +++++++++++++++++++
.../ThemeMessageResolverIntegrationTest.java | 30 ++++++++---
3 files changed, 84 insertions(+), 14 deletions(-)
create mode 100644 application/src/test/java/run/halo/app/theme/dialect/DuplicateMetaTagProcessorTest.java
diff --git a/application/src/main/java/run/halo/app/theme/dialect/DuplicateMetaTagProcessor.java b/application/src/main/java/run/halo/app/theme/dialect/DuplicateMetaTagProcessor.java
index 609041372..b9dac2f4d 100644
--- a/application/src/main/java/run/halo/app/theme/dialect/DuplicateMetaTagProcessor.java
+++ b/application/src/main/java/run/halo/app/theme/dialect/DuplicateMetaTagProcessor.java
@@ -8,7 +8,6 @@ import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import lombok.AllArgsConstructor;
-import org.apache.commons.lang3.StringUtils;
import org.springframework.core.annotation.Order;
import org.springframework.stereotype.Component;
import org.thymeleaf.context.ITemplateContext;
@@ -30,7 +29,7 @@ import reactor.core.publisher.Mono;
@Component
@AllArgsConstructor
public class DuplicateMetaTagProcessor implements TemplateHeadProcessor {
- static final Pattern META_PATTERN = Pattern.compile("");
+ static final Pattern META_PATTERN = Pattern.compile("]+?name=\"([^\"]+)\"[^>]*>\\n*");
@Override
public Mono process(ITemplateContext context, IModel model,
@@ -49,15 +48,17 @@ public class DuplicateMetaTagProcessor implements TemplateHeadProcessor {
while (matcher.find()) {
String tagLine = matcher.group(0);
String nameAttribute = matcher.group(1);
- IText metaTagNode = context.getModelFactory().createText(tagLine);
+ // create a new text node to replace the original text node
+ // replace multiple line breaks with one line break
+ IText metaTagNode = context.getModelFactory()
+ .createText(tagLine.replaceAll("\\n+", "\n"));
uniqueMetaTags.put(nameAttribute, new IndexedModel(i, metaTagNode));
text = text.replace(tagLine, "");
}
- if (StringUtils.isNotBlank(text)) {
- IText otherText = context.getModelFactory()
- .createText(text);
- otherModel.add(new IndexedModel(i, otherText));
- }
+ // put the rest of the text into the other model
+ IText otherText = context.getModelFactory()
+ .createText(text);
+ otherModel.add(new IndexedModel(i, otherText));
} else {
otherModel.add(new IndexedModel(i, templateEvent));
}
diff --git a/application/src/test/java/run/halo/app/theme/dialect/DuplicateMetaTagProcessorTest.java b/application/src/test/java/run/halo/app/theme/dialect/DuplicateMetaTagProcessorTest.java
new file mode 100644
index 000000000..d48e65861
--- /dev/null
+++ b/application/src/test/java/run/halo/app/theme/dialect/DuplicateMetaTagProcessorTest.java
@@ -0,0 +1,51 @@
+package run.halo.app.theme.dialect;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+import java.util.regex.Matcher;
+import org.junit.jupiter.api.Test;
+
+/**
+ * Tests for {@link DuplicateMetaTagProcessor}.
+ *
+ * @author guqing
+ * @since 2.8.0
+ */
+class DuplicateMetaTagProcessorTest {
+
+ @Test
+ void extractMetaTag() {
+ // normal
+ String text = "";
+ Matcher matcher = DuplicateMetaTagProcessor.META_PATTERN.matcher(text);
+ assertThat(matcher.find()).isTrue();
+ assertThat(matcher.group(1)).isEqualTo("description");
+
+ // name and content are not in the general order
+ text = "";
+ matcher = DuplicateMetaTagProcessor.META_PATTERN.matcher(text);
+ assertThat(matcher.find()).isTrue();
+ assertThat(matcher.group(1)).isEqualTo("keywords");
+
+ // no closing slash
+ text = "";
+ matcher = DuplicateMetaTagProcessor.META_PATTERN.matcher(text);
+ assertThat(matcher.find()).isTrue();
+ assertThat(matcher.group(1)).isEqualTo("keywords");
+
+ // multiple line breaks and other stuff
+ text = """
+
+
+
+ """;
+ matcher = DuplicateMetaTagProcessor.META_PATTERN.matcher(text);
+ assertThat(matcher.find()).isTrue();
+ assertThat(matcher.group(1)).isEqualTo("description");
+ }
+}
diff --git a/application/src/test/java/run/halo/app/theme/message/ThemeMessageResolverIntegrationTest.java b/application/src/test/java/run/halo/app/theme/message/ThemeMessageResolverIntegrationTest.java
index aed217a10..2ce586c91 100644
--- a/application/src/test/java/run/halo/app/theme/message/ThemeMessageResolverIntegrationTest.java
+++ b/application/src/test/java/run/halo/app/theme/message/ThemeMessageResolverIntegrationTest.java
@@ -72,7 +72,10 @@ public class ThemeMessageResolverIntegrationTest {
.isEqualTo("""
- Title
+
+
+ Title
+
index
zh
@@ -93,7 +96,10 @@ public class ThemeMessageResolverIntegrationTest {
.isEqualTo("""
- Title
+
+
+ Title
+
index
en
@@ -114,7 +120,10 @@ public class ThemeMessageResolverIntegrationTest {
.isEqualTo("""
- Title
+
+
+ Title
+
index
foo
@@ -135,7 +144,10 @@ public class ThemeMessageResolverIntegrationTest {
.isEqualTo("""
- Title
+
+
+ Title
+
index
zh
@@ -154,7 +166,10 @@ public class ThemeMessageResolverIntegrationTest {
.isEqualTo("""
- Other theme title
+
+
+ Other theme title
+
Other 首页
@@ -167,7 +182,10 @@ public class ThemeMessageResolverIntegrationTest {
.isEqualTo("""
- Other theme title
+
+
+ Other theme title
+
other index