feat: add extension point for excerpt generation (#6348)

#### What type of PR is this?
/kind feature
/area core
/milestone 2.18.x

#### What this PR does / why we need it:
新增文章摘要生成扩展点用于扩展自动生成摘要的方式

#### Does this PR introduce a user-facing change?
```release-note
新增文章摘要生成扩展点用于扩展自动生成摘要的方式
```
pull/6390/head^2
guqing 2024-07-31 17:22:04 +08:00 committed by GitHub
parent 0110438854
commit 39ff455178
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 176 additions and 29 deletions

View File

@ -0,0 +1,32 @@
package run.halo.app.content;
import java.util.Set;
import lombok.Data;
import lombok.experimental.Accessors;
import org.pf4j.ExtensionPoint;
import reactor.core.publisher.Mono;
public interface ExcerptGenerator extends ExtensionPoint {
Mono<String> generate(ExcerptGenerator.Context context);
@Data
@Accessors(chain = true)
class Context {
private String raw;
/**
* html content.
*/
private String content;
private String rawType;
/**
* keywords in the content to help the excerpt generation more accurate.
*/
private Set<String> keywords;
/**
* Max length of the generated excerpt.
*/
private int maxLength;
}
}

View File

@ -10,6 +10,7 @@ import static run.halo.app.extension.ExtensionUtil.removeFinalizers;
import static run.halo.app.extension.MetadataUtil.nullSafeAnnotations; import static run.halo.app.extension.MetadataUtil.nullSafeAnnotations;
import static run.halo.app.extension.MetadataUtil.nullSafeLabels; import static run.halo.app.extension.MetadataUtil.nullSafeLabels;
import static run.halo.app.extension.index.query.QueryFactory.equal; import static run.halo.app.extension.index.query.QueryFactory.equal;
import static run.halo.app.extension.index.query.QueryFactory.in;
import com.google.common.hash.Hashing; import com.google.common.hash.Hashing;
import java.time.Duration; import java.time.Duration;
@ -20,7 +21,9 @@ import java.util.List;
import java.util.Objects; import java.util.Objects;
import java.util.Optional; import java.util.Optional;
import java.util.Set; import java.util.Set;
import java.util.stream.Collectors;
import lombok.AllArgsConstructor; import lombok.AllArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.BooleanUtils; import org.apache.commons.lang3.BooleanUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
@ -28,8 +31,10 @@ import org.springframework.context.ApplicationEvent;
import org.springframework.context.ApplicationEventPublisher; import org.springframework.context.ApplicationEventPublisher;
import org.springframework.data.domain.Sort; import org.springframework.data.domain.Sort;
import org.springframework.stereotype.Component; import org.springframework.stereotype.Component;
import reactor.core.publisher.Mono;
import run.halo.app.content.CategoryService; import run.halo.app.content.CategoryService;
import run.halo.app.content.ContentWrapper; import run.halo.app.content.ContentWrapper;
import run.halo.app.content.ExcerptGenerator;
import run.halo.app.content.NotificationReasonConst; import run.halo.app.content.NotificationReasonConst;
import run.halo.app.content.PostService; import run.halo.app.content.PostService;
import run.halo.app.content.comment.CommentService; import run.halo.app.content.comment.CommentService;
@ -39,6 +44,7 @@ import run.halo.app.core.extension.content.Post;
import run.halo.app.core.extension.content.Post.PostPhase; import run.halo.app.core.extension.content.Post.PostPhase;
import run.halo.app.core.extension.content.Post.VisibleEnum; import run.halo.app.core.extension.content.Post.VisibleEnum;
import run.halo.app.core.extension.content.Snapshot; import run.halo.app.core.extension.content.Snapshot;
import run.halo.app.core.extension.content.Tag;
import run.halo.app.core.extension.notification.Subscription; import run.halo.app.core.extension.notification.Subscription;
import run.halo.app.event.post.PostDeletedEvent; import run.halo.app.event.post.PostDeletedEvent;
import run.halo.app.event.post.PostPublishedEvent; import run.halo.app.event.post.PostPublishedEvent;
@ -62,6 +68,7 @@ import run.halo.app.infra.utils.HaloUtils;
import run.halo.app.metrics.CounterService; import run.halo.app.metrics.CounterService;
import run.halo.app.metrics.MeterUtils; import run.halo.app.metrics.MeterUtils;
import run.halo.app.notification.NotificationCenter; import run.halo.app.notification.NotificationCenter;
import run.halo.app.plugin.extensionpoint.ExtensionGetter;
/** /**
* <p>Reconciler for {@link Post}.</p> * <p>Reconciler for {@link Post}.</p>
@ -75,6 +82,7 @@ import run.halo.app.notification.NotificationCenter;
* @author guqing * @author guqing
* @since 2.0.0 * @since 2.0.0
*/ */
@Slf4j
@AllArgsConstructor @AllArgsConstructor
@Component @Component
public class PostReconciler implements Reconciler<Reconciler.Request> { public class PostReconciler implements Reconciler<Reconciler.Request> {
@ -85,6 +93,7 @@ public class PostReconciler implements Reconciler<Reconciler.Request> {
private final CounterService counterService; private final CounterService counterService;
private final CommentService commentService; private final CommentService commentService;
private final CategoryService categoryService; private final CategoryService categoryService;
private final ExtensionGetter extensionGetter;
private final ApplicationEventPublisher eventPublisher; private final ApplicationEventPublisher eventPublisher;
private final NotificationCenter notificationCenter; private final NotificationCenter notificationCenter;
@ -155,14 +164,7 @@ public class PostReconciler implements Reconciler<Reconciler.Request> {
} }
var isAutoGenerate = defaultIfNull(excerpt.getAutoGenerate(), true); var isAutoGenerate = defaultIfNull(excerpt.getAutoGenerate(), true);
if (isAutoGenerate) { if (isAutoGenerate) {
Optional<ContentWrapper> contentWrapper = status.setExcerpt(getExcerpt(post));
postService.getContent(post.getSpec().getReleaseSnapshot(),
post.getSpec().getBaseSnapshot())
.blockOptional();
if (contentWrapper.isPresent()) {
String contentRevised = contentWrapper.get().getContent();
status.setExcerpt(getExcerpt(contentRevised));
}
} else { } else {
status.setExcerpt(excerpt.getRaw()); status.setExcerpt(excerpt.getRaw());
} }
@ -375,11 +377,57 @@ public class PostReconciler implements Reconciler<Reconciler.Request> {
.block(); .block();
} }
private String getExcerpt(String htmlContent) { private String getExcerpt(Post post) {
String shortHtmlContent = StringUtils.substring(htmlContent, 0, 500); Optional<ContentWrapper> contentWrapper =
postService.getContent(post.getSpec().getReleaseSnapshot(),
post.getSpec().getBaseSnapshot())
.blockOptional();
if (contentWrapper.isEmpty()) {
return StringUtils.EMPTY;
}
var content = contentWrapper.get();
var tags = listTagDisplayNames(post);
var keywords = new HashSet<>(tags);
keywords.add(post.getSpec().getTitle());
var context = new ExcerptGenerator.Context()
.setRaw(content.getRaw())
.setContent(content.getContent())
.setRawType(content.getRawType())
.setKeywords(keywords)
.setMaxLength(160);
return extensionGetter.getEnabledExtension(ExcerptGenerator.class)
.defaultIfEmpty(new DefaultExcerptGenerator())
.flatMap(generator -> generator.generate(context))
.onErrorResume(Throwable.class, e -> {
log.error("Failed to generate excerpt for post [{}]",
post.getMetadata().getName(), e);
return Mono.empty();
})
.blockOptional()
.orElse(StringUtils.EMPTY);
}
private Set<String> listTagDisplayNames(Post post) {
return Optional.ofNullable(post.getSpec().getTags())
.map(tags -> client.listAll(Tag.class, ListOptions.builder()
.fieldQuery(in("metadata.name", tags))
.build(), Sort.unsorted())
)
.stream()
.flatMap(List::stream)
.map(tag -> tag.getSpec().getDisplayName())
.collect(Collectors.toSet());
}
static class DefaultExcerptGenerator implements ExcerptGenerator {
@Override
public Mono<String> generate(Context context) {
String shortHtmlContent = StringUtils.substring(context.getContent(), 0, 500);
String text = Jsoup.parse(shortHtmlContent).text(); String text = Jsoup.parse(shortHtmlContent).text();
// TODO The default capture 150 words as excerpt return Mono.just(StringUtils.substring(text, 0, 150));
return StringUtils.substring(text, 0, 150); }
} }
List<Snapshot> listSnapshots(Ref ref) { List<Snapshot> listSnapshots(Ref ref) {

View File

@ -17,6 +17,9 @@ import org.jsoup.Jsoup;
import org.springframework.data.domain.Sort; import org.springframework.data.domain.Sort;
import org.springframework.stereotype.Component; import org.springframework.stereotype.Component;
import org.springframework.util.Assert; import org.springframework.util.Assert;
import reactor.core.publisher.Mono;
import run.halo.app.content.ContentWrapper;
import run.halo.app.content.ExcerptGenerator;
import run.halo.app.content.NotificationReasonConst; import run.halo.app.content.NotificationReasonConst;
import run.halo.app.content.SinglePageService; import run.halo.app.content.SinglePageService;
import run.halo.app.content.comment.CommentService; import run.halo.app.content.comment.CommentService;
@ -43,6 +46,7 @@ import run.halo.app.infra.utils.JsonUtils;
import run.halo.app.metrics.CounterService; import run.halo.app.metrics.CounterService;
import run.halo.app.metrics.MeterUtils; import run.halo.app.metrics.MeterUtils;
import run.halo.app.notification.NotificationCenter; import run.halo.app.notification.NotificationCenter;
import run.halo.app.plugin.extensionpoint.ExtensionGetter;
/** /**
* <p>Reconciler for {@link SinglePage}.</p> * <p>Reconciler for {@link SinglePage}.</p>
@ -65,6 +69,7 @@ public class SinglePageReconciler implements Reconciler<Reconciler.Request> {
private final SinglePageService singlePageService; private final SinglePageService singlePageService;
private final CounterService counterService; private final CounterService counterService;
private final CommentService commentService; private final CommentService commentService;
private final ExtensionGetter extensionGetter;
private final ExternalUrlSupplier externalUrlSupplier; private final ExternalUrlSupplier externalUrlSupplier;
@ -318,12 +323,7 @@ public class SinglePageReconciler implements Reconciler<Reconciler.Request> {
} }
if (excerpt.getAutoGenerate()) { if (excerpt.getAutoGenerate()) {
singlePageService.getContent(spec.getHeadSnapshot(), spec.getBaseSnapshot()) status.setExcerpt(getExcerpt(singlePage));
.blockOptional()
.ifPresent(content -> {
String contentRevised = content.getContent();
status.setExcerpt(getExcerpt(contentRevised));
});
} else { } else {
status.setExcerpt(excerpt.getRaw()); status.setExcerpt(excerpt.getRaw());
} }
@ -363,11 +363,40 @@ public class SinglePageReconciler implements Reconciler<Reconciler.Request> {
}); });
} }
private String getExcerpt(String htmlContent) { private String getExcerpt(SinglePage singlePage) {
String shortHtmlContent = StringUtils.substring(htmlContent, 0, 500); Optional<ContentWrapper> contentWrapper =
singlePageService.getContent(singlePage.getSpec().getReleaseSnapshot(),
singlePage.getSpec().getBaseSnapshot())
.blockOptional();
if (contentWrapper.isEmpty()) {
return StringUtils.EMPTY;
}
var content = contentWrapper.get();
var context = new ExcerptGenerator.Context()
.setRaw(content.getRaw())
.setContent(content.getContent())
.setRaw(content.getRawType())
.setKeywords(Set.of())
.setMaxLength(160);
return extensionGetter.getEnabledExtension(ExcerptGenerator.class)
.defaultIfEmpty(new DefaultExcerptGenerator())
.flatMap(generator -> generator.generate(context))
.onErrorResume(Throwable.class, e -> {
log.error("Failed to generate excerpt for single page [{}]",
singlePage.getMetadata().getName(), e);
return Mono.empty();
})
.blockOptional()
.orElse(StringUtils.EMPTY);
}
static class DefaultExcerptGenerator implements ExcerptGenerator {
@Override
public Mono<String> generate(Context context) {
String shortHtmlContent = StringUtils.substring(context.getContent(), 0, 500);
String text = Jsoup.parse(shortHtmlContent).text(); String text = Jsoup.parse(shortHtmlContent).text();
// TODO The default capture 150 words as excerpt return Mono.just(StringUtils.substring(text, 0, 150));
return StringUtils.substring(text, 0, 150); }
} }
private boolean isDeleted(SinglePage singlePage) { private boolean isDeleted(SinglePage singlePage) {

View File

@ -87,4 +87,13 @@ spec:
displayName: 页脚标签内容处理器 displayName: 页脚标签内容处理器
type: MULTI_INSTANCE type: MULTI_INSTANCE
description: "提供用于扩展 <halo:footer/> 标签内容的扩展方式。" description: "提供用于扩展 <halo:footer/> 标签内容的扩展方式。"
---
apiVersion: plugin.halo.run/v1alpha1
kind: ExtensionPointDefinition
metadata:
name: excerpt-generator
spec:
className: run.halo.app.content.ExcerptGenerator
displayName: 摘要生成器
type: SINGLETON
description: "提供自动生成摘要的方式扩展,如使用算法提取或使用 AI 生成。"

View File

@ -25,6 +25,7 @@ import org.mockito.junit.jupiter.MockitoExtension;
import org.springframework.context.ApplicationEventPublisher; import org.springframework.context.ApplicationEventPublisher;
import reactor.core.publisher.Mono; import reactor.core.publisher.Mono;
import run.halo.app.content.ContentWrapper; import run.halo.app.content.ContentWrapper;
import run.halo.app.content.ExcerptGenerator;
import run.halo.app.content.NotificationReasonConst; import run.halo.app.content.NotificationReasonConst;
import run.halo.app.content.PostService; import run.halo.app.content.PostService;
import run.halo.app.content.TestPost; import run.halo.app.content.TestPost;
@ -36,6 +37,7 @@ import run.halo.app.event.post.PostPublishedEvent;
import run.halo.app.extension.ExtensionClient; import run.halo.app.extension.ExtensionClient;
import run.halo.app.extension.controller.Reconciler; import run.halo.app.extension.controller.Reconciler;
import run.halo.app.notification.NotificationCenter; import run.halo.app.notification.NotificationCenter;
import run.halo.app.plugin.extensionpoint.ExtensionGetter;
/** /**
* Tests for {@link PostReconciler}. * Tests for {@link PostReconciler}.
@ -61,6 +63,9 @@ class PostReconcilerTest {
@Mock @Mock
private NotificationCenter notificationCenter; private NotificationCenter notificationCenter;
@Mock
private ExtensionGetter extensionGetter;
@InjectMocks @InjectMocks
private PostReconciler postReconciler; private PostReconciler postReconciler;
@ -96,7 +101,7 @@ class PostReconcilerTest {
verify(postPermalinkPolicy, times(1)).permalink(any()); verify(postPermalinkPolicy, times(1)).permalink(any());
Post value = captor.getValue(); Post value = captor.getValue();
assertThat(value.getStatus().getExcerpt()).isNull(); assertThat(value.getStatus().getExcerpt()).isEmpty();
assertThat(value.getStatus().getContributors()).isEqualTo(List.of("guqing", "zhangsan")); assertThat(value.getStatus().getContributors()).isEqualTo(List.of("guqing", "zhangsan"));
} }
@ -126,6 +131,9 @@ class PostReconcilerTest {
Snapshot snapshotV1 = TestPost.snapshotV1(); Snapshot snapshotV1 = TestPost.snapshotV1();
snapshotV1.getSpec().setContributors(Set.of("guqing")); snapshotV1.getSpec().setContributors(Set.of("guqing"));
when(extensionGetter.getEnabledExtension(eq(ExcerptGenerator.class)))
.thenReturn(Mono.empty());
when(client.listAll(eq(Snapshot.class), any(), any())) when(client.listAll(eq(Snapshot.class), any(), any()))
.thenReturn(List.of(snapshotV1, snapshotV2)); .thenReturn(List.of(snapshotV1, snapshotV2));
@ -162,6 +170,9 @@ class PostReconcilerTest {
when(client.fetch(eq(Snapshot.class), eq(post.getSpec().getReleaseSnapshot()))) when(client.fetch(eq(Snapshot.class), eq(post.getSpec().getReleaseSnapshot())))
.thenReturn(Optional.of(snapshotV2)); .thenReturn(Optional.of(snapshotV2));
when(extensionGetter.getEnabledExtension(eq(ExcerptGenerator.class)))
.thenReturn(Mono.empty());
when(client.listAll(eq(Snapshot.class), any(), any())) when(client.listAll(eq(Snapshot.class), any(), any()))
.thenReturn(List.of()); .thenReturn(List.of());
@ -191,6 +202,9 @@ class PostReconcilerTest {
.rawType("markdown") .rawType("markdown")
.build())); .build()));
when(extensionGetter.getEnabledExtension(eq(ExcerptGenerator.class)))
.thenReturn(Mono.empty());
when(client.listAll(eq(Snapshot.class), any(), any())) when(client.listAll(eq(Snapshot.class), any(), any()))
.thenReturn(List.of()); .thenReturn(List.of());

View File

@ -26,6 +26,7 @@ import org.mockito.junit.jupiter.MockitoExtension;
import org.springframework.context.ApplicationContext; import org.springframework.context.ApplicationContext;
import reactor.core.publisher.Mono; import reactor.core.publisher.Mono;
import run.halo.app.content.ContentWrapper; import run.halo.app.content.ContentWrapper;
import run.halo.app.content.ExcerptGenerator;
import run.halo.app.content.NotificationReasonConst; import run.halo.app.content.NotificationReasonConst;
import run.halo.app.content.SinglePageService; import run.halo.app.content.SinglePageService;
import run.halo.app.content.TestPost; import run.halo.app.content.TestPost;
@ -39,6 +40,7 @@ import run.halo.app.extension.controller.Reconciler;
import run.halo.app.infra.ExternalUrlSupplier; import run.halo.app.infra.ExternalUrlSupplier;
import run.halo.app.metrics.CounterService; import run.halo.app.metrics.CounterService;
import run.halo.app.notification.NotificationCenter; import run.halo.app.notification.NotificationCenter;
import run.halo.app.plugin.extensionpoint.ExtensionGetter;
/** /**
* Tests for {@link SinglePageReconciler}. * Tests for {@link SinglePageReconciler}.
@ -66,6 +68,9 @@ class SinglePageReconcilerTest {
@Mock @Mock
NotificationCenter notificationCenter; NotificationCenter notificationCenter;
@Mock
ExtensionGetter extensionGetter;
@InjectMocks @InjectMocks
private SinglePageReconciler singlePageReconciler; private SinglePageReconciler singlePageReconciler;
@ -79,9 +84,10 @@ class SinglePageReconcilerTest {
String name = "page-A"; String name = "page-A";
SinglePage page = pageV1(); SinglePage page = pageV1();
page.getSpec().setHeadSnapshot("page-A-head-snapshot"); page.getSpec().setHeadSnapshot("page-A-head-snapshot");
page.getSpec().setReleaseSnapshot(page.getSpec().getHeadSnapshot());
when(client.fetch(eq(SinglePage.class), eq(name))) when(client.fetch(eq(SinglePage.class), eq(name)))
.thenReturn(Optional.of(page)); .thenReturn(Optional.of(page));
when(singlePageService.getContent(eq(page.getSpec().getHeadSnapshot()), when(singlePageService.getContent(eq(page.getSpec().getReleaseSnapshot()),
eq(page.getSpec().getBaseSnapshot()))) eq(page.getSpec().getBaseSnapshot())))
.thenReturn(Mono.just(ContentWrapper.builder() .thenReturn(Mono.just(ContentWrapper.builder()
.snapshotName(page.getSpec().getHeadSnapshot()) .snapshotName(page.getSpec().getHeadSnapshot())
@ -99,6 +105,9 @@ class SinglePageReconcilerTest {
.thenReturn(List.of(snapshotV1, snapshotV2)); .thenReturn(List.of(snapshotV1, snapshotV2));
when(externalUrlSupplier.get()).thenReturn(URI.create("")); when(externalUrlSupplier.get()).thenReturn(URI.create(""));
when(extensionGetter.getEnabledExtension(eq(ExcerptGenerator.class)))
.thenReturn(Mono.empty());
ArgumentCaptor<SinglePage> captor = ArgumentCaptor.forClass(SinglePage.class); ArgumentCaptor<SinglePage> captor = ArgumentCaptor.forClass(SinglePage.class);
singlePageReconciler.reconcile(new Reconciler.Request(name)); singlePageReconciler.reconcile(new Reconciler.Request(name));
@ -141,7 +150,7 @@ class SinglePageReconcilerTest {
page.getSpec().setReleaseSnapshot("page-fake-released-snapshot"); page.getSpec().setReleaseSnapshot("page-fake-released-snapshot");
when(client.fetch(eq(SinglePage.class), eq(name))) when(client.fetch(eq(SinglePage.class), eq(name)))
.thenReturn(Optional.of(page)); .thenReturn(Optional.of(page));
when(singlePageService.getContent(eq(page.getSpec().getHeadSnapshot()), when(singlePageService.getContent(eq(page.getSpec().getReleaseSnapshot()),
eq(page.getSpec().getBaseSnapshot()))) eq(page.getSpec().getBaseSnapshot())))
.thenReturn(Mono.just(ContentWrapper.builder() .thenReturn(Mono.just(ContentWrapper.builder()
.snapshotName(page.getSpec().getHeadSnapshot()) .snapshotName(page.getSpec().getHeadSnapshot())
@ -156,6 +165,9 @@ class SinglePageReconcilerTest {
when(client.fetch(eq(Snapshot.class), eq(page.getSpec().getReleaseSnapshot()))) when(client.fetch(eq(Snapshot.class), eq(page.getSpec().getReleaseSnapshot())))
.thenReturn(Optional.of(snapshotV2)); .thenReturn(Optional.of(snapshotV2));
when(extensionGetter.getEnabledExtension(eq(ExcerptGenerator.class)))
.thenReturn(Mono.empty());
when(client.listAll(eq(Snapshot.class), any(), any())) when(client.listAll(eq(Snapshot.class), any(), any()))
.thenReturn(List.of()); .thenReturn(List.of());
@ -176,7 +188,7 @@ class SinglePageReconcilerTest {
page.getSpec().setPublish(false); page.getSpec().setPublish(false);
when(client.fetch(eq(SinglePage.class), eq(name))) when(client.fetch(eq(SinglePage.class), eq(name)))
.thenReturn(Optional.of(page)); .thenReturn(Optional.of(page));
when(singlePageService.getContent(eq(page.getSpec().getHeadSnapshot()), when(singlePageService.getContent(eq(page.getSpec().getReleaseSnapshot()),
eq(page.getSpec().getBaseSnapshot()))) eq(page.getSpec().getBaseSnapshot())))
.thenReturn(Mono.just(ContentWrapper.builder() .thenReturn(Mono.just(ContentWrapper.builder()
.snapshotName(page.getSpec().getHeadSnapshot()) .snapshotName(page.getSpec().getHeadSnapshot())
@ -186,6 +198,9 @@ class SinglePageReconcilerTest {
.build()) .build())
); );
when(extensionGetter.getEnabledExtension(eq(ExcerptGenerator.class)))
.thenReturn(Mono.empty());
when(client.listAll(eq(Snapshot.class), any(), any())) when(client.listAll(eq(Snapshot.class), any(), any()))
.thenReturn(List.of()); .thenReturn(List.of());