mirror of https://github.com/halo-dev/halo
refactor: optimizing regex pettern for html meta matching (#4235)
#### What type of PR is this? /kind improvement /area core /milestone 2.8.x #### What this PR does / why we need it: 优化去除 Html Meta 重复标签的正则表达式 see #4234 for more details. #### Which issue(s) this PR fixes: Fixes #4234 #### Does this PR introduce a user-facing change? ```release-note 优化去除 Html Meta 重复标签的正则表达式 ```pull/4270/head
parent
6b70296956
commit
5eb9b68209
|
@ -8,7 +8,6 @@ import java.util.Map;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
import lombok.AllArgsConstructor;
|
import lombok.AllArgsConstructor;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
|
||||||
import org.springframework.core.annotation.Order;
|
import org.springframework.core.annotation.Order;
|
||||||
import org.springframework.stereotype.Component;
|
import org.springframework.stereotype.Component;
|
||||||
import org.thymeleaf.context.ITemplateContext;
|
import org.thymeleaf.context.ITemplateContext;
|
||||||
|
@ -30,7 +29,7 @@ import reactor.core.publisher.Mono;
|
||||||
@Component
|
@Component
|
||||||
@AllArgsConstructor
|
@AllArgsConstructor
|
||||||
public class DuplicateMetaTagProcessor implements TemplateHeadProcessor {
|
public class DuplicateMetaTagProcessor implements TemplateHeadProcessor {
|
||||||
static final Pattern META_PATTERN = Pattern.compile("<meta\\s+name=\"(\\w+)\"(.*?)>");
|
static final Pattern META_PATTERN = Pattern.compile("<meta[^>]+?name=\"([^\"]+)\"[^>]*>\\n*");
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Mono<Void> process(ITemplateContext context, IModel model,
|
public Mono<Void> process(ITemplateContext context, IModel model,
|
||||||
|
@ -49,15 +48,17 @@ public class DuplicateMetaTagProcessor implements TemplateHeadProcessor {
|
||||||
while (matcher.find()) {
|
while (matcher.find()) {
|
||||||
String tagLine = matcher.group(0);
|
String tagLine = matcher.group(0);
|
||||||
String nameAttribute = matcher.group(1);
|
String nameAttribute = matcher.group(1);
|
||||||
IText metaTagNode = context.getModelFactory().createText(tagLine);
|
// create a new text node to replace the original text node
|
||||||
|
// replace multiple line breaks with one line break
|
||||||
|
IText metaTagNode = context.getModelFactory()
|
||||||
|
.createText(tagLine.replaceAll("\\n+", "\n"));
|
||||||
uniqueMetaTags.put(nameAttribute, new IndexedModel(i, metaTagNode));
|
uniqueMetaTags.put(nameAttribute, new IndexedModel(i, metaTagNode));
|
||||||
text = text.replace(tagLine, "");
|
text = text.replace(tagLine, "");
|
||||||
}
|
}
|
||||||
if (StringUtils.isNotBlank(text)) {
|
// put the rest of the text into the other model
|
||||||
IText otherText = context.getModelFactory()
|
IText otherText = context.getModelFactory()
|
||||||
.createText(text);
|
.createText(text);
|
||||||
otherModel.add(new IndexedModel(i, otherText));
|
otherModel.add(new IndexedModel(i, otherText));
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
otherModel.add(new IndexedModel(i, templateEvent));
|
otherModel.add(new IndexedModel(i, templateEvent));
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,51 @@
|
||||||
|
package run.halo.app.theme.dialect;
|
||||||
|
|
||||||
|
import static org.assertj.core.api.Assertions.assertThat;
|
||||||
|
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests for {@link DuplicateMetaTagProcessor}.
|
||||||
|
*
|
||||||
|
* @author guqing
|
||||||
|
* @since 2.8.0
|
||||||
|
*/
|
||||||
|
class DuplicateMetaTagProcessorTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void extractMetaTag() {
|
||||||
|
// normal
|
||||||
|
String text = "<meta name=\"description\" content=\"a description\"/>";
|
||||||
|
Matcher matcher = DuplicateMetaTagProcessor.META_PATTERN.matcher(text);
|
||||||
|
assertThat(matcher.find()).isTrue();
|
||||||
|
assertThat(matcher.group(1)).isEqualTo("description");
|
||||||
|
|
||||||
|
// name and content are not in the general order
|
||||||
|
text = "<meta content=\"K1,K2\" name=\"keywords\"/>";
|
||||||
|
matcher = DuplicateMetaTagProcessor.META_PATTERN.matcher(text);
|
||||||
|
assertThat(matcher.find()).isTrue();
|
||||||
|
assertThat(matcher.group(1)).isEqualTo("keywords");
|
||||||
|
|
||||||
|
// no closing slash
|
||||||
|
text = "<meta content=\"K1,K2\" name=\"keywords\">";
|
||||||
|
matcher = DuplicateMetaTagProcessor.META_PATTERN.matcher(text);
|
||||||
|
assertThat(matcher.find()).isTrue();
|
||||||
|
assertThat(matcher.group(1)).isEqualTo("keywords");
|
||||||
|
|
||||||
|
// multiple line breaks and other stuff
|
||||||
|
text = """
|
||||||
|
<meta content="全局 Head description" name="description" />
|
||||||
|
|
||||||
|
<style>
|
||||||
|
.moment .momemt-content pre.notranslate {
|
||||||
|
background: #f3f3f3;
|
||||||
|
color: #444;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
""";
|
||||||
|
matcher = DuplicateMetaTagProcessor.META_PATTERN.matcher(text);
|
||||||
|
assertThat(matcher.find()).isTrue();
|
||||||
|
assertThat(matcher.group(1)).isEqualTo("description");
|
||||||
|
}
|
||||||
|
}
|
|
@ -72,7 +72,10 @@ public class ThemeMessageResolverIntegrationTest {
|
||||||
.isEqualTo("""
|
.isEqualTo("""
|
||||||
<!DOCTYPE html>
|
<!DOCTYPE html>
|
||||||
<html lang="en">
|
<html lang="en">
|
||||||
<head><meta charset="UTF-8"><title>Title</title></head>
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<title>Title</title>
|
||||||
|
</head>
|
||||||
<body>
|
<body>
|
||||||
index
|
index
|
||||||
<div>zh</div>
|
<div>zh</div>
|
||||||
|
@ -93,7 +96,10 @@ public class ThemeMessageResolverIntegrationTest {
|
||||||
.isEqualTo("""
|
.isEqualTo("""
|
||||||
<!DOCTYPE html>
|
<!DOCTYPE html>
|
||||||
<html lang="en">
|
<html lang="en">
|
||||||
<head><meta charset="UTF-8"><title>Title</title></head>
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<title>Title</title>
|
||||||
|
</head>
|
||||||
<body>
|
<body>
|
||||||
index
|
index
|
||||||
<div>en</div>
|
<div>en</div>
|
||||||
|
@ -114,7 +120,10 @@ public class ThemeMessageResolverIntegrationTest {
|
||||||
.isEqualTo("""
|
.isEqualTo("""
|
||||||
<!DOCTYPE html>
|
<!DOCTYPE html>
|
||||||
<html lang="en">
|
<html lang="en">
|
||||||
<head><meta charset="UTF-8"><title>Title</title></head>
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<title>Title</title>
|
||||||
|
</head>
|
||||||
<body>
|
<body>
|
||||||
index
|
index
|
||||||
<div>foo</div>
|
<div>foo</div>
|
||||||
|
@ -135,7 +144,10 @@ public class ThemeMessageResolverIntegrationTest {
|
||||||
.isEqualTo("""
|
.isEqualTo("""
|
||||||
<!DOCTYPE html>
|
<!DOCTYPE html>
|
||||||
<html lang="en">
|
<html lang="en">
|
||||||
<head><meta charset="UTF-8"><title>Title</title></head>
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<title>Title</title>
|
||||||
|
</head>
|
||||||
<body>
|
<body>
|
||||||
index
|
index
|
||||||
<div>zh</div>
|
<div>zh</div>
|
||||||
|
@ -154,7 +166,10 @@ public class ThemeMessageResolverIntegrationTest {
|
||||||
.isEqualTo("""
|
.isEqualTo("""
|
||||||
<!DOCTYPE html>
|
<!DOCTYPE html>
|
||||||
<html lang="en">
|
<html lang="en">
|
||||||
<head><meta charset="UTF-8"><title>Other theme title</title></head>
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<title>Other theme title</title>
|
||||||
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<p>Other 首页</p>
|
<p>Other 首页</p>
|
||||||
</body>
|
</body>
|
||||||
|
@ -167,7 +182,10 @@ public class ThemeMessageResolverIntegrationTest {
|
||||||
.isEqualTo("""
|
.isEqualTo("""
|
||||||
<!DOCTYPE html>
|
<!DOCTYPE html>
|
||||||
<html lang="en">
|
<html lang="en">
|
||||||
<head><meta charset="UTF-8"><title>Other theme title</title></head>
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<title>Other theme title</title>
|
||||||
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<p>other index</p>
|
<p>other index</p>
|
||||||
</body>
|
</body>
|
||||||
|
|
Loading…
Reference in New Issue