refactor: optimizing regex pettern for html meta matching (#4235)

#### What type of PR is this?
/kind improvement
/area core
/milestone 2.8.x

#### What this PR does / why we need it:
优化去除 Html Meta 重复标签的正则表达式

see #4234 for more details.

#### Which issue(s) this PR fixes:

Fixes #4234

#### Does this PR introduce a user-facing change?

```release-note
优化去除 Html Meta 重复标签的正则表达式
```
pull/4270/head
guqing 2023-07-20 16:55:56 +08:00 committed by GitHub
parent 6b70296956
commit 5eb9b68209
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 84 additions and 14 deletions

View File

@ -8,7 +8,6 @@ import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import lombok.AllArgsConstructor;
import org.apache.commons.lang3.StringUtils;
import org.springframework.core.annotation.Order;
import org.springframework.stereotype.Component;
import org.thymeleaf.context.ITemplateContext;
@ -30,7 +29,7 @@ import reactor.core.publisher.Mono;
@Component
@AllArgsConstructor
public class DuplicateMetaTagProcessor implements TemplateHeadProcessor {
static final Pattern META_PATTERN = Pattern.compile("<meta\\s+name=\"(\\w+)\"(.*?)>");
static final Pattern META_PATTERN = Pattern.compile("<meta[^>]+?name=\"([^\"]+)\"[^>]*>\\n*");
@Override
public Mono<Void> process(ITemplateContext context, IModel model,
@ -49,15 +48,17 @@ public class DuplicateMetaTagProcessor implements TemplateHeadProcessor {
while (matcher.find()) {
String tagLine = matcher.group(0);
String nameAttribute = matcher.group(1);
IText metaTagNode = context.getModelFactory().createText(tagLine);
// create a new text node to replace the original text node
// replace multiple line breaks with one line break
IText metaTagNode = context.getModelFactory()
.createText(tagLine.replaceAll("\\n+", "\n"));
uniqueMetaTags.put(nameAttribute, new IndexedModel(i, metaTagNode));
text = text.replace(tagLine, "");
}
if (StringUtils.isNotBlank(text)) {
IText otherText = context.getModelFactory()
.createText(text);
otherModel.add(new IndexedModel(i, otherText));
}
// put the rest of the text into the other model
IText otherText = context.getModelFactory()
.createText(text);
otherModel.add(new IndexedModel(i, otherText));
} else {
otherModel.add(new IndexedModel(i, templateEvent));
}

View File

@ -0,0 +1,51 @@
package run.halo.app.theme.dialect;
import static org.assertj.core.api.Assertions.assertThat;
import java.util.regex.Matcher;
import org.junit.jupiter.api.Test;
/**
* Tests for {@link DuplicateMetaTagProcessor}.
*
* @author guqing
* @since 2.8.0
*/
class DuplicateMetaTagProcessorTest {
@Test
void extractMetaTag() {
// normal
String text = "<meta name=\"description\" content=\"a description\"/>";
Matcher matcher = DuplicateMetaTagProcessor.META_PATTERN.matcher(text);
assertThat(matcher.find()).isTrue();
assertThat(matcher.group(1)).isEqualTo("description");
// name and content are not in the general order
text = "<meta content=\"K1,K2\" name=\"keywords\"/>";
matcher = DuplicateMetaTagProcessor.META_PATTERN.matcher(text);
assertThat(matcher.find()).isTrue();
assertThat(matcher.group(1)).isEqualTo("keywords");
// no closing slash
text = "<meta content=\"K1,K2\" name=\"keywords\">";
matcher = DuplicateMetaTagProcessor.META_PATTERN.matcher(text);
assertThat(matcher.find()).isTrue();
assertThat(matcher.group(1)).isEqualTo("keywords");
// multiple line breaks and other stuff
text = """
<meta content="全局 Head description" name="description" />
<style>
.moment .momemt-content pre.notranslate {
background: #f3f3f3;
color: #444;
}
</style>
""";
matcher = DuplicateMetaTagProcessor.META_PATTERN.matcher(text);
assertThat(matcher.find()).isTrue();
assertThat(matcher.group(1)).isEqualTo("description");
}
}

View File

@ -72,7 +72,10 @@ public class ThemeMessageResolverIntegrationTest {
.isEqualTo("""
<!DOCTYPE html>
<html lang="en">
<head><meta charset="UTF-8"><title>Title</title></head>
<head>
<meta charset="UTF-8">
<title>Title</title>
</head>
<body>
index
<div>zh</div>
@ -93,7 +96,10 @@ public class ThemeMessageResolverIntegrationTest {
.isEqualTo("""
<!DOCTYPE html>
<html lang="en">
<head><meta charset="UTF-8"><title>Title</title></head>
<head>
<meta charset="UTF-8">
<title>Title</title>
</head>
<body>
index
<div>en</div>
@ -114,7 +120,10 @@ public class ThemeMessageResolverIntegrationTest {
.isEqualTo("""
<!DOCTYPE html>
<html lang="en">
<head><meta charset="UTF-8"><title>Title</title></head>
<head>
<meta charset="UTF-8">
<title>Title</title>
</head>
<body>
index
<div>foo</div>
@ -135,7 +144,10 @@ public class ThemeMessageResolverIntegrationTest {
.isEqualTo("""
<!DOCTYPE html>
<html lang="en">
<head><meta charset="UTF-8"><title>Title</title></head>
<head>
<meta charset="UTF-8">
<title>Title</title>
</head>
<body>
index
<div>zh</div>
@ -154,7 +166,10 @@ public class ThemeMessageResolverIntegrationTest {
.isEqualTo("""
<!DOCTYPE html>
<html lang="en">
<head><meta charset="UTF-8"><title>Other theme title</title></head>
<head>
<meta charset="UTF-8">
<title>Other theme title</title>
</head>
<body>
<p>Other </p>
</body>
@ -167,7 +182,10 @@ public class ThemeMessageResolverIntegrationTest {
.isEqualTo("""
<!DOCTYPE html>
<html lang="en">
<head><meta charset="UTF-8"><title>Other theme title</title></head>
<head>
<meta charset="UTF-8">
<title>Other theme title</title>
</head>
<body>
<p>other index</p>
</body>