mirror of https://github.com/halo-dev/halo
feat: add extension point for excerpt generation (#6348)
#### What type of PR is this? /kind feature /area core /milestone 2.18.x #### What this PR does / why we need it: 新增文章摘要生成扩展点用于扩展自动生成摘要的方式 #### Does this PR introduce a user-facing change? ```release-note 新增文章摘要生成扩展点用于扩展自动生成摘要的方式 ```pull/6390/head^2
parent
0110438854
commit
39ff455178
|
@ -0,0 +1,32 @@
|
|||
package run.halo.app.content;
|
||||
|
||||
import java.util.Set;
|
||||
import lombok.Data;
|
||||
import lombok.experimental.Accessors;
|
||||
import org.pf4j.ExtensionPoint;
|
||||
import reactor.core.publisher.Mono;
|
||||
|
||||
public interface ExcerptGenerator extends ExtensionPoint {
|
||||
|
||||
Mono<String> generate(ExcerptGenerator.Context context);
|
||||
|
||||
@Data
|
||||
@Accessors(chain = true)
|
||||
class Context {
|
||||
private String raw;
|
||||
/**
|
||||
* html content.
|
||||
*/
|
||||
private String content;
|
||||
|
||||
private String rawType;
|
||||
/**
|
||||
* keywords in the content to help the excerpt generation more accurate.
|
||||
*/
|
||||
private Set<String> keywords;
|
||||
/**
|
||||
* Max length of the generated excerpt.
|
||||
*/
|
||||
private int maxLength;
|
||||
}
|
||||
}
|
|
@ -10,6 +10,7 @@ import static run.halo.app.extension.ExtensionUtil.removeFinalizers;
|
|||
import static run.halo.app.extension.MetadataUtil.nullSafeAnnotations;
|
||||
import static run.halo.app.extension.MetadataUtil.nullSafeLabels;
|
||||
import static run.halo.app.extension.index.query.QueryFactory.equal;
|
||||
import static run.halo.app.extension.index.query.QueryFactory.in;
|
||||
|
||||
import com.google.common.hash.Hashing;
|
||||
import java.time.Duration;
|
||||
|
@ -20,7 +21,9 @@ import java.util.List;
|
|||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.BooleanUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.jsoup.Jsoup;
|
||||
|
@ -28,8 +31,10 @@ import org.springframework.context.ApplicationEvent;
|
|||
import org.springframework.context.ApplicationEventPublisher;
|
||||
import org.springframework.data.domain.Sort;
|
||||
import org.springframework.stereotype.Component;
|
||||
import reactor.core.publisher.Mono;
|
||||
import run.halo.app.content.CategoryService;
|
||||
import run.halo.app.content.ContentWrapper;
|
||||
import run.halo.app.content.ExcerptGenerator;
|
||||
import run.halo.app.content.NotificationReasonConst;
|
||||
import run.halo.app.content.PostService;
|
||||
import run.halo.app.content.comment.CommentService;
|
||||
|
@ -39,6 +44,7 @@ import run.halo.app.core.extension.content.Post;
|
|||
import run.halo.app.core.extension.content.Post.PostPhase;
|
||||
import run.halo.app.core.extension.content.Post.VisibleEnum;
|
||||
import run.halo.app.core.extension.content.Snapshot;
|
||||
import run.halo.app.core.extension.content.Tag;
|
||||
import run.halo.app.core.extension.notification.Subscription;
|
||||
import run.halo.app.event.post.PostDeletedEvent;
|
||||
import run.halo.app.event.post.PostPublishedEvent;
|
||||
|
@ -62,6 +68,7 @@ import run.halo.app.infra.utils.HaloUtils;
|
|||
import run.halo.app.metrics.CounterService;
|
||||
import run.halo.app.metrics.MeterUtils;
|
||||
import run.halo.app.notification.NotificationCenter;
|
||||
import run.halo.app.plugin.extensionpoint.ExtensionGetter;
|
||||
|
||||
/**
|
||||
* <p>Reconciler for {@link Post}.</p>
|
||||
|
@ -75,6 +82,7 @@ import run.halo.app.notification.NotificationCenter;
|
|||
* @author guqing
|
||||
* @since 2.0.0
|
||||
*/
|
||||
@Slf4j
|
||||
@AllArgsConstructor
|
||||
@Component
|
||||
public class PostReconciler implements Reconciler<Reconciler.Request> {
|
||||
|
@ -85,6 +93,7 @@ public class PostReconciler implements Reconciler<Reconciler.Request> {
|
|||
private final CounterService counterService;
|
||||
private final CommentService commentService;
|
||||
private final CategoryService categoryService;
|
||||
private final ExtensionGetter extensionGetter;
|
||||
|
||||
private final ApplicationEventPublisher eventPublisher;
|
||||
private final NotificationCenter notificationCenter;
|
||||
|
@ -155,14 +164,7 @@ public class PostReconciler implements Reconciler<Reconciler.Request> {
|
|||
}
|
||||
var isAutoGenerate = defaultIfNull(excerpt.getAutoGenerate(), true);
|
||||
if (isAutoGenerate) {
|
||||
Optional<ContentWrapper> contentWrapper =
|
||||
postService.getContent(post.getSpec().getReleaseSnapshot(),
|
||||
post.getSpec().getBaseSnapshot())
|
||||
.blockOptional();
|
||||
if (contentWrapper.isPresent()) {
|
||||
String contentRevised = contentWrapper.get().getContent();
|
||||
status.setExcerpt(getExcerpt(contentRevised));
|
||||
}
|
||||
status.setExcerpt(getExcerpt(post));
|
||||
} else {
|
||||
status.setExcerpt(excerpt.getRaw());
|
||||
}
|
||||
|
@ -375,11 +377,57 @@ public class PostReconciler implements Reconciler<Reconciler.Request> {
|
|||
.block();
|
||||
}
|
||||
|
||||
private String getExcerpt(String htmlContent) {
|
||||
String shortHtmlContent = StringUtils.substring(htmlContent, 0, 500);
|
||||
String text = Jsoup.parse(shortHtmlContent).text();
|
||||
// TODO The default capture 150 words as excerpt
|
||||
return StringUtils.substring(text, 0, 150);
|
||||
private String getExcerpt(Post post) {
|
||||
Optional<ContentWrapper> contentWrapper =
|
||||
postService.getContent(post.getSpec().getReleaseSnapshot(),
|
||||
post.getSpec().getBaseSnapshot())
|
||||
.blockOptional();
|
||||
if (contentWrapper.isEmpty()) {
|
||||
return StringUtils.EMPTY;
|
||||
}
|
||||
var content = contentWrapper.get();
|
||||
var tags = listTagDisplayNames(post);
|
||||
|
||||
var keywords = new HashSet<>(tags);
|
||||
keywords.add(post.getSpec().getTitle());
|
||||
|
||||
var context = new ExcerptGenerator.Context()
|
||||
.setRaw(content.getRaw())
|
||||
.setContent(content.getContent())
|
||||
.setRawType(content.getRawType())
|
||||
.setKeywords(keywords)
|
||||
.setMaxLength(160);
|
||||
return extensionGetter.getEnabledExtension(ExcerptGenerator.class)
|
||||
.defaultIfEmpty(new DefaultExcerptGenerator())
|
||||
.flatMap(generator -> generator.generate(context))
|
||||
.onErrorResume(Throwable.class, e -> {
|
||||
log.error("Failed to generate excerpt for post [{}]",
|
||||
post.getMetadata().getName(), e);
|
||||
return Mono.empty();
|
||||
})
|
||||
.blockOptional()
|
||||
.orElse(StringUtils.EMPTY);
|
||||
}
|
||||
|
||||
private Set<String> listTagDisplayNames(Post post) {
|
||||
return Optional.ofNullable(post.getSpec().getTags())
|
||||
.map(tags -> client.listAll(Tag.class, ListOptions.builder()
|
||||
.fieldQuery(in("metadata.name", tags))
|
||||
.build(), Sort.unsorted())
|
||||
)
|
||||
.stream()
|
||||
.flatMap(List::stream)
|
||||
.map(tag -> tag.getSpec().getDisplayName())
|
||||
.collect(Collectors.toSet());
|
||||
}
|
||||
|
||||
static class DefaultExcerptGenerator implements ExcerptGenerator {
|
||||
@Override
|
||||
public Mono<String> generate(Context context) {
|
||||
String shortHtmlContent = StringUtils.substring(context.getContent(), 0, 500);
|
||||
String text = Jsoup.parse(shortHtmlContent).text();
|
||||
return Mono.just(StringUtils.substring(text, 0, 150));
|
||||
}
|
||||
}
|
||||
|
||||
List<Snapshot> listSnapshots(Ref ref) {
|
||||
|
|
|
@ -17,6 +17,9 @@ import org.jsoup.Jsoup;
|
|||
import org.springframework.data.domain.Sort;
|
||||
import org.springframework.stereotype.Component;
|
||||
import org.springframework.util.Assert;
|
||||
import reactor.core.publisher.Mono;
|
||||
import run.halo.app.content.ContentWrapper;
|
||||
import run.halo.app.content.ExcerptGenerator;
|
||||
import run.halo.app.content.NotificationReasonConst;
|
||||
import run.halo.app.content.SinglePageService;
|
||||
import run.halo.app.content.comment.CommentService;
|
||||
|
@ -43,6 +46,7 @@ import run.halo.app.infra.utils.JsonUtils;
|
|||
import run.halo.app.metrics.CounterService;
|
||||
import run.halo.app.metrics.MeterUtils;
|
||||
import run.halo.app.notification.NotificationCenter;
|
||||
import run.halo.app.plugin.extensionpoint.ExtensionGetter;
|
||||
|
||||
/**
|
||||
* <p>Reconciler for {@link SinglePage}.</p>
|
||||
|
@ -65,6 +69,7 @@ public class SinglePageReconciler implements Reconciler<Reconciler.Request> {
|
|||
private final SinglePageService singlePageService;
|
||||
private final CounterService counterService;
|
||||
private final CommentService commentService;
|
||||
private final ExtensionGetter extensionGetter;
|
||||
|
||||
private final ExternalUrlSupplier externalUrlSupplier;
|
||||
|
||||
|
@ -318,12 +323,7 @@ public class SinglePageReconciler implements Reconciler<Reconciler.Request> {
|
|||
}
|
||||
|
||||
if (excerpt.getAutoGenerate()) {
|
||||
singlePageService.getContent(spec.getHeadSnapshot(), spec.getBaseSnapshot())
|
||||
.blockOptional()
|
||||
.ifPresent(content -> {
|
||||
String contentRevised = content.getContent();
|
||||
status.setExcerpt(getExcerpt(contentRevised));
|
||||
});
|
||||
status.setExcerpt(getExcerpt(singlePage));
|
||||
} else {
|
||||
status.setExcerpt(excerpt.getRaw());
|
||||
}
|
||||
|
@ -363,11 +363,40 @@ public class SinglePageReconciler implements Reconciler<Reconciler.Request> {
|
|||
});
|
||||
}
|
||||
|
||||
private String getExcerpt(String htmlContent) {
|
||||
String shortHtmlContent = StringUtils.substring(htmlContent, 0, 500);
|
||||
String text = Jsoup.parse(shortHtmlContent).text();
|
||||
// TODO The default capture 150 words as excerpt
|
||||
return StringUtils.substring(text, 0, 150);
|
||||
private String getExcerpt(SinglePage singlePage) {
|
||||
Optional<ContentWrapper> contentWrapper =
|
||||
singlePageService.getContent(singlePage.getSpec().getReleaseSnapshot(),
|
||||
singlePage.getSpec().getBaseSnapshot())
|
||||
.blockOptional();
|
||||
if (contentWrapper.isEmpty()) {
|
||||
return StringUtils.EMPTY;
|
||||
}
|
||||
var content = contentWrapper.get();
|
||||
var context = new ExcerptGenerator.Context()
|
||||
.setRaw(content.getRaw())
|
||||
.setContent(content.getContent())
|
||||
.setRaw(content.getRawType())
|
||||
.setKeywords(Set.of())
|
||||
.setMaxLength(160);
|
||||
return extensionGetter.getEnabledExtension(ExcerptGenerator.class)
|
||||
.defaultIfEmpty(new DefaultExcerptGenerator())
|
||||
.flatMap(generator -> generator.generate(context))
|
||||
.onErrorResume(Throwable.class, e -> {
|
||||
log.error("Failed to generate excerpt for single page [{}]",
|
||||
singlePage.getMetadata().getName(), e);
|
||||
return Mono.empty();
|
||||
})
|
||||
.blockOptional()
|
||||
.orElse(StringUtils.EMPTY);
|
||||
}
|
||||
|
||||
static class DefaultExcerptGenerator implements ExcerptGenerator {
|
||||
@Override
|
||||
public Mono<String> generate(Context context) {
|
||||
String shortHtmlContent = StringUtils.substring(context.getContent(), 0, 500);
|
||||
String text = Jsoup.parse(shortHtmlContent).text();
|
||||
return Mono.just(StringUtils.substring(text, 0, 150));
|
||||
}
|
||||
}
|
||||
|
||||
private boolean isDeleted(SinglePage singlePage) {
|
||||
|
|
|
@ -87,4 +87,13 @@ spec:
|
|||
displayName: 页脚标签内容处理器
|
||||
type: MULTI_INSTANCE
|
||||
description: "提供用于扩展 <halo:footer/> 标签内容的扩展方式。"
|
||||
|
||||
---
|
||||
apiVersion: plugin.halo.run/v1alpha1
|
||||
kind: ExtensionPointDefinition
|
||||
metadata:
|
||||
name: excerpt-generator
|
||||
spec:
|
||||
className: run.halo.app.content.ExcerptGenerator
|
||||
displayName: 摘要生成器
|
||||
type: SINGLETON
|
||||
description: "提供自动生成摘要的方式扩展,如使用算法提取或使用 AI 生成。"
|
||||
|
|
|
@ -25,6 +25,7 @@ import org.mockito.junit.jupiter.MockitoExtension;
|
|||
import org.springframework.context.ApplicationEventPublisher;
|
||||
import reactor.core.publisher.Mono;
|
||||
import run.halo.app.content.ContentWrapper;
|
||||
import run.halo.app.content.ExcerptGenerator;
|
||||
import run.halo.app.content.NotificationReasonConst;
|
||||
import run.halo.app.content.PostService;
|
||||
import run.halo.app.content.TestPost;
|
||||
|
@ -36,6 +37,7 @@ import run.halo.app.event.post.PostPublishedEvent;
|
|||
import run.halo.app.extension.ExtensionClient;
|
||||
import run.halo.app.extension.controller.Reconciler;
|
||||
import run.halo.app.notification.NotificationCenter;
|
||||
import run.halo.app.plugin.extensionpoint.ExtensionGetter;
|
||||
|
||||
/**
|
||||
* Tests for {@link PostReconciler}.
|
||||
|
@ -61,6 +63,9 @@ class PostReconcilerTest {
|
|||
@Mock
|
||||
private NotificationCenter notificationCenter;
|
||||
|
||||
@Mock
|
||||
private ExtensionGetter extensionGetter;
|
||||
|
||||
@InjectMocks
|
||||
private PostReconciler postReconciler;
|
||||
|
||||
|
@ -96,7 +101,7 @@ class PostReconcilerTest {
|
|||
verify(postPermalinkPolicy, times(1)).permalink(any());
|
||||
|
||||
Post value = captor.getValue();
|
||||
assertThat(value.getStatus().getExcerpt()).isNull();
|
||||
assertThat(value.getStatus().getExcerpt()).isEmpty();
|
||||
assertThat(value.getStatus().getContributors()).isEqualTo(List.of("guqing", "zhangsan"));
|
||||
}
|
||||
|
||||
|
@ -126,6 +131,9 @@ class PostReconcilerTest {
|
|||
Snapshot snapshotV1 = TestPost.snapshotV1();
|
||||
snapshotV1.getSpec().setContributors(Set.of("guqing"));
|
||||
|
||||
when(extensionGetter.getEnabledExtension(eq(ExcerptGenerator.class)))
|
||||
.thenReturn(Mono.empty());
|
||||
|
||||
when(client.listAll(eq(Snapshot.class), any(), any()))
|
||||
.thenReturn(List.of(snapshotV1, snapshotV2));
|
||||
|
||||
|
@ -162,6 +170,9 @@ class PostReconcilerTest {
|
|||
when(client.fetch(eq(Snapshot.class), eq(post.getSpec().getReleaseSnapshot())))
|
||||
.thenReturn(Optional.of(snapshotV2));
|
||||
|
||||
when(extensionGetter.getEnabledExtension(eq(ExcerptGenerator.class)))
|
||||
.thenReturn(Mono.empty());
|
||||
|
||||
when(client.listAll(eq(Snapshot.class), any(), any()))
|
||||
.thenReturn(List.of());
|
||||
|
||||
|
@ -191,6 +202,9 @@ class PostReconcilerTest {
|
|||
.rawType("markdown")
|
||||
.build()));
|
||||
|
||||
when(extensionGetter.getEnabledExtension(eq(ExcerptGenerator.class)))
|
||||
.thenReturn(Mono.empty());
|
||||
|
||||
when(client.listAll(eq(Snapshot.class), any(), any()))
|
||||
.thenReturn(List.of());
|
||||
|
||||
|
|
|
@ -26,6 +26,7 @@ import org.mockito.junit.jupiter.MockitoExtension;
|
|||
import org.springframework.context.ApplicationContext;
|
||||
import reactor.core.publisher.Mono;
|
||||
import run.halo.app.content.ContentWrapper;
|
||||
import run.halo.app.content.ExcerptGenerator;
|
||||
import run.halo.app.content.NotificationReasonConst;
|
||||
import run.halo.app.content.SinglePageService;
|
||||
import run.halo.app.content.TestPost;
|
||||
|
@ -39,6 +40,7 @@ import run.halo.app.extension.controller.Reconciler;
|
|||
import run.halo.app.infra.ExternalUrlSupplier;
|
||||
import run.halo.app.metrics.CounterService;
|
||||
import run.halo.app.notification.NotificationCenter;
|
||||
import run.halo.app.plugin.extensionpoint.ExtensionGetter;
|
||||
|
||||
/**
|
||||
* Tests for {@link SinglePageReconciler}.
|
||||
|
@ -66,6 +68,9 @@ class SinglePageReconcilerTest {
|
|||
@Mock
|
||||
NotificationCenter notificationCenter;
|
||||
|
||||
@Mock
|
||||
ExtensionGetter extensionGetter;
|
||||
|
||||
@InjectMocks
|
||||
private SinglePageReconciler singlePageReconciler;
|
||||
|
||||
|
@ -79,9 +84,10 @@ class SinglePageReconcilerTest {
|
|||
String name = "page-A";
|
||||
SinglePage page = pageV1();
|
||||
page.getSpec().setHeadSnapshot("page-A-head-snapshot");
|
||||
page.getSpec().setReleaseSnapshot(page.getSpec().getHeadSnapshot());
|
||||
when(client.fetch(eq(SinglePage.class), eq(name)))
|
||||
.thenReturn(Optional.of(page));
|
||||
when(singlePageService.getContent(eq(page.getSpec().getHeadSnapshot()),
|
||||
when(singlePageService.getContent(eq(page.getSpec().getReleaseSnapshot()),
|
||||
eq(page.getSpec().getBaseSnapshot())))
|
||||
.thenReturn(Mono.just(ContentWrapper.builder()
|
||||
.snapshotName(page.getSpec().getHeadSnapshot())
|
||||
|
@ -99,6 +105,9 @@ class SinglePageReconcilerTest {
|
|||
.thenReturn(List.of(snapshotV1, snapshotV2));
|
||||
when(externalUrlSupplier.get()).thenReturn(URI.create(""));
|
||||
|
||||
when(extensionGetter.getEnabledExtension(eq(ExcerptGenerator.class)))
|
||||
.thenReturn(Mono.empty());
|
||||
|
||||
ArgumentCaptor<SinglePage> captor = ArgumentCaptor.forClass(SinglePage.class);
|
||||
singlePageReconciler.reconcile(new Reconciler.Request(name));
|
||||
|
||||
|
@ -141,7 +150,7 @@ class SinglePageReconcilerTest {
|
|||
page.getSpec().setReleaseSnapshot("page-fake-released-snapshot");
|
||||
when(client.fetch(eq(SinglePage.class), eq(name)))
|
||||
.thenReturn(Optional.of(page));
|
||||
when(singlePageService.getContent(eq(page.getSpec().getHeadSnapshot()),
|
||||
when(singlePageService.getContent(eq(page.getSpec().getReleaseSnapshot()),
|
||||
eq(page.getSpec().getBaseSnapshot())))
|
||||
.thenReturn(Mono.just(ContentWrapper.builder()
|
||||
.snapshotName(page.getSpec().getHeadSnapshot())
|
||||
|
@ -156,6 +165,9 @@ class SinglePageReconcilerTest {
|
|||
when(client.fetch(eq(Snapshot.class), eq(page.getSpec().getReleaseSnapshot())))
|
||||
.thenReturn(Optional.of(snapshotV2));
|
||||
|
||||
when(extensionGetter.getEnabledExtension(eq(ExcerptGenerator.class)))
|
||||
.thenReturn(Mono.empty());
|
||||
|
||||
when(client.listAll(eq(Snapshot.class), any(), any()))
|
||||
.thenReturn(List.of());
|
||||
|
||||
|
@ -176,7 +188,7 @@ class SinglePageReconcilerTest {
|
|||
page.getSpec().setPublish(false);
|
||||
when(client.fetch(eq(SinglePage.class), eq(name)))
|
||||
.thenReturn(Optional.of(page));
|
||||
when(singlePageService.getContent(eq(page.getSpec().getHeadSnapshot()),
|
||||
when(singlePageService.getContent(eq(page.getSpec().getReleaseSnapshot()),
|
||||
eq(page.getSpec().getBaseSnapshot())))
|
||||
.thenReturn(Mono.just(ContentWrapper.builder()
|
||||
.snapshotName(page.getSpec().getHeadSnapshot())
|
||||
|
@ -186,6 +198,9 @@ class SinglePageReconcilerTest {
|
|||
.build())
|
||||
);
|
||||
|
||||
when(extensionGetter.getEnabledExtension(eq(ExcerptGenerator.class)))
|
||||
.thenReturn(Mono.empty());
|
||||
|
||||
when(client.listAll(eq(Snapshot.class), any(), any()))
|
||||
.thenReturn(List.of());
|
||||
|
||||
|
|
Loading…
Reference in New Issue