From 5eb9b682096cb6bc8d917588723d60350a0432a8 Mon Sep 17 00:00:00 2001 From: guqing <38999863+guqing@users.noreply.github.com> Date: Thu, 20 Jul 2023 16:55:56 +0800 Subject: [PATCH] refactor: optimizing regex pettern for html meta matching (#4235) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit #### What type of PR is this? /kind improvement /area core /milestone 2.8.x #### What this PR does / why we need it: 优化去除 Html Meta 重复标签的正则表达式 see #4234 for more details. #### Which issue(s) this PR fixes: Fixes #4234 #### Does this PR introduce a user-facing change? ```release-note 优化去除 Html Meta 重复标签的正则表达式 ``` --- .../dialect/DuplicateMetaTagProcessor.java | 17 ++++--- .../DuplicateMetaTagProcessorTest.java | 51 +++++++++++++++++++ .../ThemeMessageResolverIntegrationTest.java | 30 ++++++++--- 3 files changed, 84 insertions(+), 14 deletions(-) create mode 100644 application/src/test/java/run/halo/app/theme/dialect/DuplicateMetaTagProcessorTest.java diff --git a/application/src/main/java/run/halo/app/theme/dialect/DuplicateMetaTagProcessor.java b/application/src/main/java/run/halo/app/theme/dialect/DuplicateMetaTagProcessor.java index 609041372..b9dac2f4d 100644 --- a/application/src/main/java/run/halo/app/theme/dialect/DuplicateMetaTagProcessor.java +++ b/application/src/main/java/run/halo/app/theme/dialect/DuplicateMetaTagProcessor.java @@ -8,7 +8,6 @@ import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; import lombok.AllArgsConstructor; -import org.apache.commons.lang3.StringUtils; import org.springframework.core.annotation.Order; import org.springframework.stereotype.Component; import org.thymeleaf.context.ITemplateContext; @@ -30,7 +29,7 @@ import reactor.core.publisher.Mono; @Component @AllArgsConstructor public class DuplicateMetaTagProcessor implements TemplateHeadProcessor { - static final Pattern META_PATTERN = Pattern.compile(""); + static final Pattern META_PATTERN = Pattern.compile("]+?name=\"([^\"]+)\"[^>]*>\\n*"); @Override public Mono process(ITemplateContext context, IModel model, @@ -49,15 +48,17 @@ public class DuplicateMetaTagProcessor implements TemplateHeadProcessor { while (matcher.find()) { String tagLine = matcher.group(0); String nameAttribute = matcher.group(1); - IText metaTagNode = context.getModelFactory().createText(tagLine); + // create a new text node to replace the original text node + // replace multiple line breaks with one line break + IText metaTagNode = context.getModelFactory() + .createText(tagLine.replaceAll("\\n+", "\n")); uniqueMetaTags.put(nameAttribute, new IndexedModel(i, metaTagNode)); text = text.replace(tagLine, ""); } - if (StringUtils.isNotBlank(text)) { - IText otherText = context.getModelFactory() - .createText(text); - otherModel.add(new IndexedModel(i, otherText)); - } + // put the rest of the text into the other model + IText otherText = context.getModelFactory() + .createText(text); + otherModel.add(new IndexedModel(i, otherText)); } else { otherModel.add(new IndexedModel(i, templateEvent)); } diff --git a/application/src/test/java/run/halo/app/theme/dialect/DuplicateMetaTagProcessorTest.java b/application/src/test/java/run/halo/app/theme/dialect/DuplicateMetaTagProcessorTest.java new file mode 100644 index 000000000..d48e65861 --- /dev/null +++ b/application/src/test/java/run/halo/app/theme/dialect/DuplicateMetaTagProcessorTest.java @@ -0,0 +1,51 @@ +package run.halo.app.theme.dialect; + +import static org.assertj.core.api.Assertions.assertThat; + +import java.util.regex.Matcher; +import org.junit.jupiter.api.Test; + +/** + * Tests for {@link DuplicateMetaTagProcessor}. + * + * @author guqing + * @since 2.8.0 + */ +class DuplicateMetaTagProcessorTest { + + @Test + void extractMetaTag() { + // normal + String text = ""; + Matcher matcher = DuplicateMetaTagProcessor.META_PATTERN.matcher(text); + assertThat(matcher.find()).isTrue(); + assertThat(matcher.group(1)).isEqualTo("description"); + + // name and content are not in the general order + text = ""; + matcher = DuplicateMetaTagProcessor.META_PATTERN.matcher(text); + assertThat(matcher.find()).isTrue(); + assertThat(matcher.group(1)).isEqualTo("keywords"); + + // no closing slash + text = ""; + matcher = DuplicateMetaTagProcessor.META_PATTERN.matcher(text); + assertThat(matcher.find()).isTrue(); + assertThat(matcher.group(1)).isEqualTo("keywords"); + + // multiple line breaks and other stuff + text = """ + + + + """; + matcher = DuplicateMetaTagProcessor.META_PATTERN.matcher(text); + assertThat(matcher.find()).isTrue(); + assertThat(matcher.group(1)).isEqualTo("description"); + } +} diff --git a/application/src/test/java/run/halo/app/theme/message/ThemeMessageResolverIntegrationTest.java b/application/src/test/java/run/halo/app/theme/message/ThemeMessageResolverIntegrationTest.java index aed217a10..2ce586c91 100644 --- a/application/src/test/java/run/halo/app/theme/message/ThemeMessageResolverIntegrationTest.java +++ b/application/src/test/java/run/halo/app/theme/message/ThemeMessageResolverIntegrationTest.java @@ -72,7 +72,10 @@ public class ThemeMessageResolverIntegrationTest { .isEqualTo(""" - Title + + + Title + index
zh
@@ -93,7 +96,10 @@ public class ThemeMessageResolverIntegrationTest { .isEqualTo(""" - Title + + + Title + index
en
@@ -114,7 +120,10 @@ public class ThemeMessageResolverIntegrationTest { .isEqualTo(""" - Title + + + Title + index
foo
@@ -135,7 +144,10 @@ public class ThemeMessageResolverIntegrationTest { .isEqualTo(""" - Title + + + Title + index
zh
@@ -154,7 +166,10 @@ public class ThemeMessageResolverIntegrationTest { .isEqualTo(""" - Other theme title + + + Other theme title +

Other 首页

@@ -167,7 +182,10 @@ public class ThemeMessageResolverIntegrationTest { .isEqualTo(""" - Other theme title + + + Other theme title +

other index