Skip to content

Feat/support deep research #567

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 8 commits into from
89 changes: 85 additions & 4 deletions apps/api/src/rag/rag.service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,15 @@ import { Embeddings } from '@langchain/core/embeddings';
import { OpenAIEmbeddings } from '@langchain/openai';
import { FireworksEmbeddings } from '@langchain/community/embeddings/fireworks';
import { MemoryVectorStore } from 'langchain/vectorstores/memory';
import { cleanMarkdownForIngest } from '@refly-packages/utils';

import { SearchResult, User } from '@refly-packages/openapi-schema';
import { HybridSearchParam, ContentPayload, ReaderResult, NodeMeta } from './rag.dto';
import { cleanMarkdownForIngest, convertHTMLToMarkdown, tidyMarkdown } from '@refly-packages/utils';

import {
SearchResult,
User,
ExtractRequest,
type ExtractResponse,
} from '@refly-packages/openapi-schema';
import { HybridSearchParam, ContentPayload, NodeMeta, ReaderResult } from './rag.dto';
import { QdrantService } from '@/common/qdrant.service';
import { Condition, PointStruct } from '@/common/qdrant.dto';
import { genResourceUuid } from '@/utils';
Expand Down Expand Up @@ -324,6 +329,82 @@ export class RAGService {
});
}

async extract(user: User, param: ExtractRequest): Promise<ExtractResponse> {
const { url, type = 'extract', options } = param;
const { topic, maxLength } = options || {};

try {
// Fetch content from remote reader
const readerResult = await this.crawlFromRemoteReader(url);

// Convert HTML to markdown using the utility functions
const markdown = convertHTMLToMarkdown('render', readerResult.data.content);

// Clean and tidy the markdown
const cleanedMarkdown = tidyMarkdown(markdown);

// If topic is provided, try to extract relevant content
let finalContent = cleanedMarkdown;
if (topic) {
// Use vector search to find relevant sections
const docs = await this.inMemorySearchWithIndexing(user, {
content: cleanedMarkdown,
query: topic,
k: 3,
needChunk: true,
});

// Combine relevant sections
finalContent = docs.map((doc) => doc.pageContent).join('\n\n');
}

// Truncate content if maxLength is specified
if (maxLength && finalContent.length > maxLength) {
finalContent = `${finalContent.slice(0, maxLength)}...`;
}

const source = new URL(url).hostname;
return {
success: true,
data: [
{
type,
url,
content: finalContent,
seq: 0,
title: readerResult.data.title || source,
nodeType: 'document',
metadata: {
title: readerResult.data.title,
date: readerResult.data.publishedTime,
source,
},
},
],
};
} catch (error) {
this.logger.error(`Failed to extract content from ${url}: ${error}`);
const source = new URL(url).hostname;
return {
success: true,
data: [
{
type,
url,
content: '',
seq: 0,
title: source,
nodeType: 'document',
metadata: {
error: error.message,
source,
},
},
],
};
}
}

async retrieve(user: User, param: HybridSearchParam): Promise<ContentPayload[]> {
if (!param.vector) {
param.vector = await this.embeddings.embedQuery(param.query);
Expand Down
6 changes: 5 additions & 1 deletion apps/api/src/skill/skill.service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import { InjectQueue } from '@nestjs/bullmq';
import { AIMessage, HumanMessage } from '@langchain/core/messages';
import { Prisma, SkillTrigger as SkillTriggerModel } from '@prisma/client';
import { Response } from 'express';
import { AIMessageChunk, BaseMessage } from '@langchain/core/dist/messages';
import { AIMessageChunk, BaseMessage } from '@langchain/core/messages';
import {
CreateSkillInstanceRequest,
CreateSkillTriggerRequest,
Expand Down Expand Up @@ -225,6 +225,10 @@ export class SkillService {
await this.knowledge.deleteReferences(user, req);
return buildSuccessResponse({});
},
extract: async (user, req) => {
const result = await this.rag.extract(user, req);
return result;
},
inMemorySearchWithIndexing: async (user, options) => {
const result = await this.rag.inMemorySearchWithIndexing(user, options);
return buildSuccessResponse(result);
Expand Down
4 changes: 3 additions & 1 deletion biome.json
Original file line number Diff line number Diff line change
Expand Up @@ -63,13 +63,15 @@
},
"files": {
"ignoreUnknown": true,
"maxSize": 3000000,
"ignore": [
"**/node_modules/**",
"**/dist/**",
"**/.wxt/**",
"**/.output/**",
"**/build/**",
"packages/wxt/**"
"packages/wxt/**",
"**/common/temp/pnpm-store/**"
]
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,14 @@ const LogBox = memo(

return (
<div
className={cn('my-2 p-4 border border-solid border-gray-200 rounded-lg transition-all', {
className={cn('p-4 my-2 rounded-lg border border-gray-200 border-solid transition-all', {
'px-4 py-3 cursor-pointer hover:bg-gray-50': collapsed,
'relative pb-0': !collapsed,
})}
>
{collapsed ? (
<div
className="text-gray-500 text-sm flex items-center justify-between"
className="flex justify-between items-center text-sm text-gray-500"
onClick={() => onCollapse(false)}
>
<div>
Expand Down Expand Up @@ -81,7 +81,7 @@ const LogBox = memo(
type="text"
icon={<ChevronUp className="w-4 h-4 text-gray-500" />}
onClick={() => onCollapse(true)}
className="absolute right-2 top-2"
className="absolute top-2 right-2"
/>
</>
)}
Expand Down Expand Up @@ -112,7 +112,7 @@ const StepContent = memo(
}, [resultId]);

return (
<div className="my-3 text-gray-600 text-base">
<div className="my-3 text-base text-gray-600">
<div className={`skill-response-content-${resultId}-${step.name}`}>
<Markdown content={content} sources={sources} />
<SelectionContext
Expand All @@ -132,7 +132,7 @@ const ArtifactItem = memo(({ artifact, onSelect }: { artifact: any; onSelect: ()
return (
<div
key={artifact.entityId}
className="my-2 px-4 py-2 h-12 border border-solid border-gray-200 rounded-lg flex items-center justify-between space-x-2 cursor-pointer hover:bg-gray-50"
className="flex justify-between items-center px-4 py-2 my-2 space-x-2 h-12 rounded-lg border border-gray-200 border-solid cursor-pointer hover:bg-gray-50"
onClick={onSelect}
>
<div className="flex items-center space-x-2">
Expand Down Expand Up @@ -162,19 +162,171 @@ const ArtifactItem = memo(({ artifact, onSelect }: { artifact: any; onSelect: ()
);
});

// Progress bar component
const DeepResearchProgress = memo(
({
progress,
}: {
progress: {
maxDepth: number;
totalSteps: number;
currentDepth: number;
completedSteps: number;
};
}) => {
return (
<div className="w-full">
<div className="flex items-center justify-between text-sm text-muted-foreground mb-2">
<span>Research in progress...</span>
<span>{Math.round((progress.completedSteps / progress.totalSteps) * 100)}%</span>
</div>
<div className="w-full bg-gray-100 rounded-full h-2">
<div
className="bg-orange-500 h-2 rounded-full transition-all duration-500"
style={{
width: `${Math.round((progress.completedSteps / progress.totalSteps) * 100)}%`,
}}
/>
</div>
</div>
);
},
);

// Tab panel component
const DeepResearchPanel = memo(
({
activity,
sources,
}: {
activity: Array<{
type: 'search' | 'extract' | 'analyze' | 'reasoning' | 'synthesis' | 'thought';
status: 'pending' | 'complete' | 'error';
message: string;
timestamp: string;
depth?: number;
completedSteps?: number;
totalSteps?: number;
}>;
sources: Array<{
url: string;
title: string;
description: string;
}>;
}) => {
const [activeTab, setActiveTab] = useState<'activity' | 'sources'>('activity');

return (
<div className="fixed right-4 top-20 w-80 bg-background border rounded-lg shadow-lg p-4 max-h-[80vh] flex flex-col overflow-hidden">
{/* Tabs */}
<div className="flex border-b mb-4">
<Button
className={cn('flex-1 pb-2 text-sm font-medium border-b-2 transition-colors', {
'border-orange-500 text-orange-600': activeTab === 'activity',
'border-transparent text-muted-foreground hover:text-foreground':
activeTab !== 'activity',
})}
onClick={() => setActiveTab('activity')}
>
Activity
</Button>
<Button
className={cn('flex-1 pb-2 text-sm font-medium border-b-2 transition-colors', {
'border-orange-500 text-orange-600': activeTab === 'sources',
'border-transparent text-muted-foreground hover:text-foreground':
activeTab !== 'sources',
})}
onClick={() => setActiveTab('sources')}
>
Sources
</Button>
</div>

{/* Content */}
<div className="flex-1 overflow-y-auto">
{activeTab === 'activity' && (
<div className="space-y-2">
{[...activity].reverse().map((item, index) => (
<div key={index} className="flex gap-2 items-center">
<div
className={cn('size-2 rounded-full', {
'bg-yellow-500': item.status === 'pending',
'bg-green-500': item.status === 'complete',
'bg-red-500': item.status === 'error',
})}
/>
<div className="flex-1">
<p className="text-sm text-foreground">{item.message}</p>
<p className="text-xs text-muted-foreground">
{new Date(item.timestamp).toLocaleTimeString()}
</p>
</div>
</div>
))}
</div>
)}

{activeTab === 'sources' && (
<div className="space-y-2">
{sources.map((source, index) => (
<div key={index} className="flex flex-col gap-1">
<a
href={source.url}
target="_blank"
rel="noopener noreferrer"
className="text-sm font-medium hover:underline"
>
{source.title}
</a>
<div className="text-xs text-muted-foreground">
{new URL(source.url).hostname}
</div>
</div>
))}
</div>
)}
</div>
</div>
);
},
);

export const ActionStepCard = memo(
({
result,
step,
stepStatus,
index,
query,
deepResearchState,
}: {
result: ActionResult;
step: ActionStep;
stepStatus: 'executing' | 'finish';
index: number;
query: string;
deepResearchState: {
activity: Array<{
type: 'search' | 'extract' | 'analyze' | 'reasoning' | 'synthesis' | 'thought';
status: 'pending' | 'complete' | 'error';
message: string;
timestamp: string;
depth?: number;
completedSteps?: number;
totalSteps?: number;
}>;
sources: Array<{
url: string;
title: string;
description: string;
}>;
progress: {
maxDepth: number;
totalSteps: number;
currentDepth: number;
completedSteps: number;
};
};
}) => {
const { t } = useTranslation();
const { setSelectedNodeByEntity } = useNodeSelection();
Expand Down Expand Up @@ -232,11 +384,11 @@ export const ActionStepCard = memo(

return (
<div className="flex flex-col gap-1">
<div className="my-1 text-gray-600 text-sm flex items-center gap-2 font-medium">
<div className="flex gap-2 items-center my-1 text-sm font-medium text-gray-600">
{stepStatus === 'executing' ? (
<IconLoading className="h-3 w-3 animate-spin text-green-500" />
<IconLoading className="w-3 h-3 text-green-500 animate-spin" />
) : (
<IconCheck className="h-4 w-4 text-green-500" />
<IconCheck className="w-4 h-4 text-green-500" />
)}
{t('canvas.skillResponse.stepTitle', { index })}{' '}
{` · ${t(`${skillName}.steps.${step.name}.name`, { ns: 'skill', defaultValue: step.name })}`}
Expand All @@ -252,6 +404,17 @@ export const ActionStepCard = memo(
/>
)}

{/* Replace old DeepResearch component with new split components */}
{step.name === 'deepResearch' && (
<>
<DeepResearchProgress progress={deepResearchState.progress} />
<DeepResearchPanel
activity={deepResearchState.activity}
sources={deepResearchState.sources}
/>
</>
)}

{parsedData.sources && <SourceViewer sources={parsedData.sources} query={query} />}

{content && (
Expand Down
Loading