mirror of https://github.com/halo-dev/halo
refactor: optimizing regex pettern for html meta matching (#4235)
#### What type of PR is this? /kind improvement /area core /milestone 2.8.x #### What this PR does / why we need it: 优化去除 Html Meta 重复标签的正则表达式 see #4234 for more details. #### Which issue(s) this PR fixes: Fixes #4234 #### Does this PR introduce a user-facing change? ```release-note 优化去除 Html Meta 重复标签的正则表达式 ```pull/4270/head
parent
6b70296956
commit
5eb9b68209
|
@ -8,7 +8,6 @@ import java.util.Map;
|
|||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import lombok.AllArgsConstructor;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.core.annotation.Order;
|
||||
import org.springframework.stereotype.Component;
|
||||
import org.thymeleaf.context.ITemplateContext;
|
||||
|
@ -30,7 +29,7 @@ import reactor.core.publisher.Mono;
|
|||
@Component
|
||||
@AllArgsConstructor
|
||||
public class DuplicateMetaTagProcessor implements TemplateHeadProcessor {
|
||||
static final Pattern META_PATTERN = Pattern.compile("<meta\\s+name=\"(\\w+)\"(.*?)>");
|
||||
static final Pattern META_PATTERN = Pattern.compile("<meta[^>]+?name=\"([^\"]+)\"[^>]*>\\n*");
|
||||
|
||||
@Override
|
||||
public Mono<Void> process(ITemplateContext context, IModel model,
|
||||
|
@ -49,15 +48,17 @@ public class DuplicateMetaTagProcessor implements TemplateHeadProcessor {
|
|||
while (matcher.find()) {
|
||||
String tagLine = matcher.group(0);
|
||||
String nameAttribute = matcher.group(1);
|
||||
IText metaTagNode = context.getModelFactory().createText(tagLine);
|
||||
// create a new text node to replace the original text node
|
||||
// replace multiple line breaks with one line break
|
||||
IText metaTagNode = context.getModelFactory()
|
||||
.createText(tagLine.replaceAll("\\n+", "\n"));
|
||||
uniqueMetaTags.put(nameAttribute, new IndexedModel(i, metaTagNode));
|
||||
text = text.replace(tagLine, "");
|
||||
}
|
||||
if (StringUtils.isNotBlank(text)) {
|
||||
IText otherText = context.getModelFactory()
|
||||
.createText(text);
|
||||
otherModel.add(new IndexedModel(i, otherText));
|
||||
}
|
||||
// put the rest of the text into the other model
|
||||
IText otherText = context.getModelFactory()
|
||||
.createText(text);
|
||||
otherModel.add(new IndexedModel(i, otherText));
|
||||
} else {
|
||||
otherModel.add(new IndexedModel(i, templateEvent));
|
||||
}
|
||||
|
|
|
@ -0,0 +1,51 @@
|
|||
package run.halo.app.theme.dialect;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
import java.util.regex.Matcher;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
/**
|
||||
* Tests for {@link DuplicateMetaTagProcessor}.
|
||||
*
|
||||
* @author guqing
|
||||
* @since 2.8.0
|
||||
*/
|
||||
class DuplicateMetaTagProcessorTest {
|
||||
|
||||
@Test
|
||||
void extractMetaTag() {
|
||||
// normal
|
||||
String text = "<meta name=\"description\" content=\"a description\"/>";
|
||||
Matcher matcher = DuplicateMetaTagProcessor.META_PATTERN.matcher(text);
|
||||
assertThat(matcher.find()).isTrue();
|
||||
assertThat(matcher.group(1)).isEqualTo("description");
|
||||
|
||||
// name and content are not in the general order
|
||||
text = "<meta content=\"K1,K2\" name=\"keywords\"/>";
|
||||
matcher = DuplicateMetaTagProcessor.META_PATTERN.matcher(text);
|
||||
assertThat(matcher.find()).isTrue();
|
||||
assertThat(matcher.group(1)).isEqualTo("keywords");
|
||||
|
||||
// no closing slash
|
||||
text = "<meta content=\"K1,K2\" name=\"keywords\">";
|
||||
matcher = DuplicateMetaTagProcessor.META_PATTERN.matcher(text);
|
||||
assertThat(matcher.find()).isTrue();
|
||||
assertThat(matcher.group(1)).isEqualTo("keywords");
|
||||
|
||||
// multiple line breaks and other stuff
|
||||
text = """
|
||||
<meta content="全局 Head description" name="description" />
|
||||
|
||||
<style>
|
||||
.moment .momemt-content pre.notranslate {
|
||||
background: #f3f3f3;
|
||||
color: #444;
|
||||
}
|
||||
</style>
|
||||
""";
|
||||
matcher = DuplicateMetaTagProcessor.META_PATTERN.matcher(text);
|
||||
assertThat(matcher.find()).isTrue();
|
||||
assertThat(matcher.group(1)).isEqualTo("description");
|
||||
}
|
||||
}
|
|
@ -72,7 +72,10 @@ public class ThemeMessageResolverIntegrationTest {
|
|||
.isEqualTo("""
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head><meta charset="UTF-8"><title>Title</title></head>
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>Title</title>
|
||||
</head>
|
||||
<body>
|
||||
index
|
||||
<div>zh</div>
|
||||
|
@ -93,7 +96,10 @@ public class ThemeMessageResolverIntegrationTest {
|
|||
.isEqualTo("""
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head><meta charset="UTF-8"><title>Title</title></head>
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>Title</title>
|
||||
</head>
|
||||
<body>
|
||||
index
|
||||
<div>en</div>
|
||||
|
@ -114,7 +120,10 @@ public class ThemeMessageResolverIntegrationTest {
|
|||
.isEqualTo("""
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head><meta charset="UTF-8"><title>Title</title></head>
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>Title</title>
|
||||
</head>
|
||||
<body>
|
||||
index
|
||||
<div>foo</div>
|
||||
|
@ -135,7 +144,10 @@ public class ThemeMessageResolverIntegrationTest {
|
|||
.isEqualTo("""
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head><meta charset="UTF-8"><title>Title</title></head>
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>Title</title>
|
||||
</head>
|
||||
<body>
|
||||
index
|
||||
<div>zh</div>
|
||||
|
@ -154,7 +166,10 @@ public class ThemeMessageResolverIntegrationTest {
|
|||
.isEqualTo("""
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head><meta charset="UTF-8"><title>Other theme title</title></head>
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>Other theme title</title>
|
||||
</head>
|
||||
<body>
|
||||
<p>Other 首页</p>
|
||||
</body>
|
||||
|
@ -167,7 +182,10 @@ public class ThemeMessageResolverIntegrationTest {
|
|||
.isEqualTo("""
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head><meta charset="UTF-8"><title>Other theme title</title></head>
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>Other theme title</title>
|
||||
</head>
|
||||
<body>
|
||||
<p>other index</p>
|
||||
</body>
|
||||
|
|
Loading…
Reference in New Issue