feat: add extension point for excerpt generation (#6348)

#### What type of PR is this?
/kind feature
/area core
/milestone 2.18.x

#### What this PR does / why we need it:
新增文章摘要生成扩展点用于扩展自动生成摘要的方式

#### Does this PR introduce a user-facing change?
```release-note
新增文章摘要生成扩展点用于扩展自动生成摘要的方式
```
pull/6390/head^2
guqing 2024-07-31 17:22:04 +08:00 committed by GitHub
parent 0110438854
commit 39ff455178
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 176 additions and 29 deletions

View File

@ -0,0 +1,32 @@
package run.halo.app.content;
import java.util.Set;
import lombok.Data;
import lombok.experimental.Accessors;
import org.pf4j.ExtensionPoint;
import reactor.core.publisher.Mono;
public interface ExcerptGenerator extends ExtensionPoint {
Mono<String> generate(ExcerptGenerator.Context context);
@Data
@Accessors(chain = true)
class Context {
private String raw;
/**
* html content.
*/
private String content;
private String rawType;
/**
* keywords in the content to help the excerpt generation more accurate.
*/
private Set<String> keywords;
/**
* Max length of the generated excerpt.
*/
private int maxLength;
}
}

View File

@ -10,6 +10,7 @@ import static run.halo.app.extension.ExtensionUtil.removeFinalizers;
import static run.halo.app.extension.MetadataUtil.nullSafeAnnotations;
import static run.halo.app.extension.MetadataUtil.nullSafeLabels;
import static run.halo.app.extension.index.query.QueryFactory.equal;
import static run.halo.app.extension.index.query.QueryFactory.in;
import com.google.common.hash.Hashing;
import java.time.Duration;
@ -20,7 +21,9 @@ import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import lombok.AllArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.BooleanUtils;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
@ -28,8 +31,10 @@ import org.springframework.context.ApplicationEvent;
import org.springframework.context.ApplicationEventPublisher;
import org.springframework.data.domain.Sort;
import org.springframework.stereotype.Component;
import reactor.core.publisher.Mono;
import run.halo.app.content.CategoryService;
import run.halo.app.content.ContentWrapper;
import run.halo.app.content.ExcerptGenerator;
import run.halo.app.content.NotificationReasonConst;
import run.halo.app.content.PostService;
import run.halo.app.content.comment.CommentService;
@ -39,6 +44,7 @@ import run.halo.app.core.extension.content.Post;
import run.halo.app.core.extension.content.Post.PostPhase;
import run.halo.app.core.extension.content.Post.VisibleEnum;
import run.halo.app.core.extension.content.Snapshot;
import run.halo.app.core.extension.content.Tag;
import run.halo.app.core.extension.notification.Subscription;
import run.halo.app.event.post.PostDeletedEvent;
import run.halo.app.event.post.PostPublishedEvent;
@ -62,6 +68,7 @@ import run.halo.app.infra.utils.HaloUtils;
import run.halo.app.metrics.CounterService;
import run.halo.app.metrics.MeterUtils;
import run.halo.app.notification.NotificationCenter;
import run.halo.app.plugin.extensionpoint.ExtensionGetter;
/**
* <p>Reconciler for {@link Post}.</p>
@ -75,6 +82,7 @@ import run.halo.app.notification.NotificationCenter;
* @author guqing
* @since 2.0.0
*/
@Slf4j
@AllArgsConstructor
@Component
public class PostReconciler implements Reconciler<Reconciler.Request> {
@ -85,6 +93,7 @@ public class PostReconciler implements Reconciler<Reconciler.Request> {
private final CounterService counterService;
private final CommentService commentService;
private final CategoryService categoryService;
private final ExtensionGetter extensionGetter;
private final ApplicationEventPublisher eventPublisher;
private final NotificationCenter notificationCenter;
@ -155,14 +164,7 @@ public class PostReconciler implements Reconciler<Reconciler.Request> {
}
var isAutoGenerate = defaultIfNull(excerpt.getAutoGenerate(), true);
if (isAutoGenerate) {
Optional<ContentWrapper> contentWrapper =
postService.getContent(post.getSpec().getReleaseSnapshot(),
post.getSpec().getBaseSnapshot())
.blockOptional();
if (contentWrapper.isPresent()) {
String contentRevised = contentWrapper.get().getContent();
status.setExcerpt(getExcerpt(contentRevised));
}
status.setExcerpt(getExcerpt(post));
} else {
status.setExcerpt(excerpt.getRaw());
}
@ -375,11 +377,57 @@ public class PostReconciler implements Reconciler<Reconciler.Request> {
.block();
}
private String getExcerpt(String htmlContent) {
String shortHtmlContent = StringUtils.substring(htmlContent, 0, 500);
String text = Jsoup.parse(shortHtmlContent).text();
// TODO The default capture 150 words as excerpt
return StringUtils.substring(text, 0, 150);
private String getExcerpt(Post post) {
Optional<ContentWrapper> contentWrapper =
postService.getContent(post.getSpec().getReleaseSnapshot(),
post.getSpec().getBaseSnapshot())
.blockOptional();
if (contentWrapper.isEmpty()) {
return StringUtils.EMPTY;
}
var content = contentWrapper.get();
var tags = listTagDisplayNames(post);
var keywords = new HashSet<>(tags);
keywords.add(post.getSpec().getTitle());
var context = new ExcerptGenerator.Context()
.setRaw(content.getRaw())
.setContent(content.getContent())
.setRawType(content.getRawType())
.setKeywords(keywords)
.setMaxLength(160);
return extensionGetter.getEnabledExtension(ExcerptGenerator.class)
.defaultIfEmpty(new DefaultExcerptGenerator())
.flatMap(generator -> generator.generate(context))
.onErrorResume(Throwable.class, e -> {
log.error("Failed to generate excerpt for post [{}]",
post.getMetadata().getName(), e);
return Mono.empty();
})
.blockOptional()
.orElse(StringUtils.EMPTY);
}
private Set<String> listTagDisplayNames(Post post) {
return Optional.ofNullable(post.getSpec().getTags())
.map(tags -> client.listAll(Tag.class, ListOptions.builder()
.fieldQuery(in("metadata.name", tags))
.build(), Sort.unsorted())
)
.stream()
.flatMap(List::stream)
.map(tag -> tag.getSpec().getDisplayName())
.collect(Collectors.toSet());
}
static class DefaultExcerptGenerator implements ExcerptGenerator {
@Override
public Mono<String> generate(Context context) {
String shortHtmlContent = StringUtils.substring(context.getContent(), 0, 500);
String text = Jsoup.parse(shortHtmlContent).text();
return Mono.just(StringUtils.substring(text, 0, 150));
}
}
List<Snapshot> listSnapshots(Ref ref) {

View File

@ -17,6 +17,9 @@ import org.jsoup.Jsoup;
import org.springframework.data.domain.Sort;
import org.springframework.stereotype.Component;
import org.springframework.util.Assert;
import reactor.core.publisher.Mono;
import run.halo.app.content.ContentWrapper;
import run.halo.app.content.ExcerptGenerator;
import run.halo.app.content.NotificationReasonConst;
import run.halo.app.content.SinglePageService;
import run.halo.app.content.comment.CommentService;
@ -43,6 +46,7 @@ import run.halo.app.infra.utils.JsonUtils;
import run.halo.app.metrics.CounterService;
import run.halo.app.metrics.MeterUtils;
import run.halo.app.notification.NotificationCenter;
import run.halo.app.plugin.extensionpoint.ExtensionGetter;
/**
* <p>Reconciler for {@link SinglePage}.</p>
@ -65,6 +69,7 @@ public class SinglePageReconciler implements Reconciler<Reconciler.Request> {
private final SinglePageService singlePageService;
private final CounterService counterService;
private final CommentService commentService;
private final ExtensionGetter extensionGetter;
private final ExternalUrlSupplier externalUrlSupplier;
@ -318,12 +323,7 @@ public class SinglePageReconciler implements Reconciler<Reconciler.Request> {
}
if (excerpt.getAutoGenerate()) {
singlePageService.getContent(spec.getHeadSnapshot(), spec.getBaseSnapshot())
.blockOptional()
.ifPresent(content -> {
String contentRevised = content.getContent();
status.setExcerpt(getExcerpt(contentRevised));
});
status.setExcerpt(getExcerpt(singlePage));
} else {
status.setExcerpt(excerpt.getRaw());
}
@ -363,11 +363,40 @@ public class SinglePageReconciler implements Reconciler<Reconciler.Request> {
});
}
private String getExcerpt(String htmlContent) {
String shortHtmlContent = StringUtils.substring(htmlContent, 0, 500);
String text = Jsoup.parse(shortHtmlContent).text();
// TODO The default capture 150 words as excerpt
return StringUtils.substring(text, 0, 150);
private String getExcerpt(SinglePage singlePage) {
Optional<ContentWrapper> contentWrapper =
singlePageService.getContent(singlePage.getSpec().getReleaseSnapshot(),
singlePage.getSpec().getBaseSnapshot())
.blockOptional();
if (contentWrapper.isEmpty()) {
return StringUtils.EMPTY;
}
var content = contentWrapper.get();
var context = new ExcerptGenerator.Context()
.setRaw(content.getRaw())
.setContent(content.getContent())
.setRaw(content.getRawType())
.setKeywords(Set.of())
.setMaxLength(160);
return extensionGetter.getEnabledExtension(ExcerptGenerator.class)
.defaultIfEmpty(new DefaultExcerptGenerator())
.flatMap(generator -> generator.generate(context))
.onErrorResume(Throwable.class, e -> {
log.error("Failed to generate excerpt for single page [{}]",
singlePage.getMetadata().getName(), e);
return Mono.empty();
})
.blockOptional()
.orElse(StringUtils.EMPTY);
}
static class DefaultExcerptGenerator implements ExcerptGenerator {
@Override
public Mono<String> generate(Context context) {
String shortHtmlContent = StringUtils.substring(context.getContent(), 0, 500);
String text = Jsoup.parse(shortHtmlContent).text();
return Mono.just(StringUtils.substring(text, 0, 150));
}
}
private boolean isDeleted(SinglePage singlePage) {

View File

@ -87,4 +87,13 @@ spec:
displayName: 页脚标签内容处理器
type: MULTI_INSTANCE
description: "提供用于扩展 <halo:footer/> 标签内容的扩展方式。"
---
apiVersion: plugin.halo.run/v1alpha1
kind: ExtensionPointDefinition
metadata:
name: excerpt-generator
spec:
className: run.halo.app.content.ExcerptGenerator
displayName: 摘要生成器
type: SINGLETON
description: "提供自动生成摘要的方式扩展,如使用算法提取或使用 AI 生成。"

View File

@ -25,6 +25,7 @@ import org.mockito.junit.jupiter.MockitoExtension;
import org.springframework.context.ApplicationEventPublisher;
import reactor.core.publisher.Mono;
import run.halo.app.content.ContentWrapper;
import run.halo.app.content.ExcerptGenerator;
import run.halo.app.content.NotificationReasonConst;
import run.halo.app.content.PostService;
import run.halo.app.content.TestPost;
@ -36,6 +37,7 @@ import run.halo.app.event.post.PostPublishedEvent;
import run.halo.app.extension.ExtensionClient;
import run.halo.app.extension.controller.Reconciler;
import run.halo.app.notification.NotificationCenter;
import run.halo.app.plugin.extensionpoint.ExtensionGetter;
/**
* Tests for {@link PostReconciler}.
@ -61,6 +63,9 @@ class PostReconcilerTest {
@Mock
private NotificationCenter notificationCenter;
@Mock
private ExtensionGetter extensionGetter;
@InjectMocks
private PostReconciler postReconciler;
@ -96,7 +101,7 @@ class PostReconcilerTest {
verify(postPermalinkPolicy, times(1)).permalink(any());
Post value = captor.getValue();
assertThat(value.getStatus().getExcerpt()).isNull();
assertThat(value.getStatus().getExcerpt()).isEmpty();
assertThat(value.getStatus().getContributors()).isEqualTo(List.of("guqing", "zhangsan"));
}
@ -126,6 +131,9 @@ class PostReconcilerTest {
Snapshot snapshotV1 = TestPost.snapshotV1();
snapshotV1.getSpec().setContributors(Set.of("guqing"));
when(extensionGetter.getEnabledExtension(eq(ExcerptGenerator.class)))
.thenReturn(Mono.empty());
when(client.listAll(eq(Snapshot.class), any(), any()))
.thenReturn(List.of(snapshotV1, snapshotV2));
@ -162,6 +170,9 @@ class PostReconcilerTest {
when(client.fetch(eq(Snapshot.class), eq(post.getSpec().getReleaseSnapshot())))
.thenReturn(Optional.of(snapshotV2));
when(extensionGetter.getEnabledExtension(eq(ExcerptGenerator.class)))
.thenReturn(Mono.empty());
when(client.listAll(eq(Snapshot.class), any(), any()))
.thenReturn(List.of());
@ -191,6 +202,9 @@ class PostReconcilerTest {
.rawType("markdown")
.build()));
when(extensionGetter.getEnabledExtension(eq(ExcerptGenerator.class)))
.thenReturn(Mono.empty());
when(client.listAll(eq(Snapshot.class), any(), any()))
.thenReturn(List.of());

View File

@ -26,6 +26,7 @@ import org.mockito.junit.jupiter.MockitoExtension;
import org.springframework.context.ApplicationContext;
import reactor.core.publisher.Mono;
import run.halo.app.content.ContentWrapper;
import run.halo.app.content.ExcerptGenerator;
import run.halo.app.content.NotificationReasonConst;
import run.halo.app.content.SinglePageService;
import run.halo.app.content.TestPost;
@ -39,6 +40,7 @@ import run.halo.app.extension.controller.Reconciler;
import run.halo.app.infra.ExternalUrlSupplier;
import run.halo.app.metrics.CounterService;
import run.halo.app.notification.NotificationCenter;
import run.halo.app.plugin.extensionpoint.ExtensionGetter;
/**
* Tests for {@link SinglePageReconciler}.
@ -66,6 +68,9 @@ class SinglePageReconcilerTest {
@Mock
NotificationCenter notificationCenter;
@Mock
ExtensionGetter extensionGetter;
@InjectMocks
private SinglePageReconciler singlePageReconciler;
@ -79,9 +84,10 @@ class SinglePageReconcilerTest {
String name = "page-A";
SinglePage page = pageV1();
page.getSpec().setHeadSnapshot("page-A-head-snapshot");
page.getSpec().setReleaseSnapshot(page.getSpec().getHeadSnapshot());
when(client.fetch(eq(SinglePage.class), eq(name)))
.thenReturn(Optional.of(page));
when(singlePageService.getContent(eq(page.getSpec().getHeadSnapshot()),
when(singlePageService.getContent(eq(page.getSpec().getReleaseSnapshot()),
eq(page.getSpec().getBaseSnapshot())))
.thenReturn(Mono.just(ContentWrapper.builder()
.snapshotName(page.getSpec().getHeadSnapshot())
@ -99,6 +105,9 @@ class SinglePageReconcilerTest {
.thenReturn(List.of(snapshotV1, snapshotV2));
when(externalUrlSupplier.get()).thenReturn(URI.create(""));
when(extensionGetter.getEnabledExtension(eq(ExcerptGenerator.class)))
.thenReturn(Mono.empty());
ArgumentCaptor<SinglePage> captor = ArgumentCaptor.forClass(SinglePage.class);
singlePageReconciler.reconcile(new Reconciler.Request(name));
@ -141,7 +150,7 @@ class SinglePageReconcilerTest {
page.getSpec().setReleaseSnapshot("page-fake-released-snapshot");
when(client.fetch(eq(SinglePage.class), eq(name)))
.thenReturn(Optional.of(page));
when(singlePageService.getContent(eq(page.getSpec().getHeadSnapshot()),
when(singlePageService.getContent(eq(page.getSpec().getReleaseSnapshot()),
eq(page.getSpec().getBaseSnapshot())))
.thenReturn(Mono.just(ContentWrapper.builder()
.snapshotName(page.getSpec().getHeadSnapshot())
@ -156,6 +165,9 @@ class SinglePageReconcilerTest {
when(client.fetch(eq(Snapshot.class), eq(page.getSpec().getReleaseSnapshot())))
.thenReturn(Optional.of(snapshotV2));
when(extensionGetter.getEnabledExtension(eq(ExcerptGenerator.class)))
.thenReturn(Mono.empty());
when(client.listAll(eq(Snapshot.class), any(), any()))
.thenReturn(List.of());
@ -176,7 +188,7 @@ class SinglePageReconcilerTest {
page.getSpec().setPublish(false);
when(client.fetch(eq(SinglePage.class), eq(name)))
.thenReturn(Optional.of(page));
when(singlePageService.getContent(eq(page.getSpec().getHeadSnapshot()),
when(singlePageService.getContent(eq(page.getSpec().getReleaseSnapshot()),
eq(page.getSpec().getBaseSnapshot())))
.thenReturn(Mono.just(ContentWrapper.builder()
.snapshotName(page.getSpec().getHeadSnapshot())
@ -186,6 +198,9 @@ class SinglePageReconcilerTest {
.build())
);
when(extensionGetter.getEnabledExtension(eq(ExcerptGenerator.class)))
.thenReturn(Mono.empty());
when(client.listAll(eq(Snapshot.class), any(), any()))
.thenReturn(List.of());