Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions core/src/browser/extension.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
HuggingFace = 'huggingFace',
Engine = 'engine',
Hardware = 'hardware',
RAG = 'rag',
VectorDB = 'vectorDB',
}

export interface ExtensionType {
Expand Down Expand Up @@ -91,18 +93,18 @@
* @property {Array} platform
*/
compatibility(): Compatibility | undefined {
return undefined
}

Check warning on line 97 in core/src/browser/extension.ts

View workflow job for this annotation

GitHub Actions / coverage-check

96-97 lines are not covered with tests

/**
* Registers models - it persists in-memory shared ModelManager instance's data map.
* @param models
*/
async registerModels(models: Model[]): Promise<void> {
for (const model of models) {
ModelManager.instance().register(model)
}
}

Check warning on line 107 in core/src/browser/extension.ts

View workflow job for this annotation

GitHub Actions / coverage-check

104-107 lines are not covered with tests

/**
* Register settings for the extension.
Expand All @@ -111,9 +113,9 @@
*/
async registerSettings(settings: SettingComponentProps[]): Promise<void> {
if (!this.name) {
console.error('Extension name is not defined')
return
}

Check warning on line 118 in core/src/browser/extension.ts

View workflow job for this annotation

GitHub Actions / coverage-check

116-118 lines are not covered with tests

settings.forEach((setting) => {
setting.extensionName = this.name
Expand All @@ -122,30 +124,30 @@
const oldSettingsJson = localStorage.getItem(this.name)
// Persists new settings
if (oldSettingsJson) {
const oldSettings = JSON.parse(oldSettingsJson)
settings.forEach((setting) => {

Check warning on line 128 in core/src/browser/extension.ts

View workflow job for this annotation

GitHub Actions / coverage-check

127-128 lines are not covered with tests
// Keep setting value
if (setting.controllerProps && Array.isArray(oldSettings))
setting.controllerProps.value =
oldSettings.find((e: any) => e.key === setting.key)?.controllerProps?.value ??
setting.controllerProps.value
if ('options' in setting.controllerProps)
setting.controllerProps.options = setting.controllerProps.options?.length
? setting.controllerProps.options
: oldSettings.find((e: any) => e.key === setting.key)?.controllerProps?.options
if ('recommended' in setting.controllerProps) {
const oldRecommended = oldSettings.find((e: any) => e.key === setting.key)
?.controllerProps?.recommended
if (oldRecommended !== undefined && oldRecommended !== '') {
setting.controllerProps.recommended = oldRecommended
}
}
})
}

Check warning on line 146 in core/src/browser/extension.ts

View workflow job for this annotation

GitHub Actions / coverage-check

130-146 lines are not covered with tests
localStorage.setItem(this.name, JSON.stringify(settings))
} catch (err) {
console.error(err)
}

Check warning on line 150 in core/src/browser/extension.ts

View workflow job for this annotation

GitHub Actions / coverage-check

149-150 lines are not covered with tests
}

/**
Expand Down Expand Up @@ -179,18 +181,18 @@
* @returns
*/
async getSettings(): Promise<SettingComponentProps[]> {
if (!this.name) return []

Check warning on line 184 in core/src/browser/extension.ts

View workflow job for this annotation

GitHub Actions / coverage-check

184 line is not covered with tests

try {
const settingsString = localStorage.getItem(this.name)
if (!settingsString) return []
const settings: SettingComponentProps[] = JSON.parse(settingsString)
return settings
} catch (err) {
console.warn(err)
return []
}
}

Check warning on line 195 in core/src/browser/extension.ts

View workflow job for this annotation

GitHub Actions / coverage-check

186-195 lines are not covered with tests

/**
* Update the settings for the extension.
Expand Down
1 change: 1 addition & 0 deletions core/src/browser/extensions/engines/AIEngine.ts
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ export interface SessionInfo {
port: number // llama-server output port (corrected from portid)
model_id: string //name of the model
model_path: string // path of the loaded model
is_embedding: boolean
api_key: string
mmproj_path?: string
}
Expand Down
5 changes: 5 additions & 0 deletions core/src/browser/extensions/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,8 @@ export { MCPExtension } from './mcp'
* Base AI Engines.
*/
export * from './engines'

export { RAGExtension, RAG_INTERNAL_SERVER } from './rag'
export type { AttachmentInput, IngestAttachmentsResult } from './rag'
export { VectorDBExtension } from './vector-db'
export type { SearchMode, VectorDBStatus, VectorChunkInput, VectorSearchResult, AttachmentFileInfo, VectorDBFileInput, VectorDBIngestOptions } from './vector-db'
36 changes: 36 additions & 0 deletions core/src/browser/extensions/rag.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import { BaseExtension, ExtensionTypeEnum } from '../extension'
import type { MCPTool, MCPToolCallResult } from '../../types'
import type { AttachmentFileInfo } from './vector-db'

export interface AttachmentInput {
path: string
name?: string
type?: string
size?: number
}

export interface IngestAttachmentsResult {
filesProcessed: number
chunksInserted: number
files: AttachmentFileInfo[]
}

export const RAG_INTERNAL_SERVER = 'rag-internal'

/**
* RAG extension base: exposes RAG tools and orchestration API.
*/
export abstract class RAGExtension extends BaseExtension {
type(): ExtensionTypeEnum | undefined {
return ExtensionTypeEnum.RAG
}

Check warning on line 26 in core/src/browser/extensions/rag.ts

View workflow job for this annotation

GitHub Actions / coverage-check

25-26 lines are not covered with tests

abstract getTools(): Promise<MCPTool[]>
/**
* Lightweight list of tool names for quick routing/lookup.
*/
abstract getToolNames(): Promise<string[]>
abstract callTool(toolName: string, args: Record<string, unknown>): Promise<MCPToolCallResult>

abstract ingestAttachments(threadId: string, files: AttachmentInput[]): Promise<IngestAttachmentsResult>
}
82 changes: 82 additions & 0 deletions core/src/browser/extensions/vector-db.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
import { BaseExtension, ExtensionTypeEnum } from '../extension'

export type SearchMode = 'auto' | 'ann' | 'linear'

export interface VectorDBStatus {
ann_available: boolean
}

export interface VectorChunkInput {
text: string
embedding: number[]
}

export interface VectorSearchResult {
id: string
text: string
score?: number
file_id: string
chunk_file_order: number
}

export interface AttachmentFileInfo {
id: string
name?: string
path?: string
type?: string
size?: number
chunk_count: number
}

// High-level input types for file ingestion
export interface VectorDBFileInput {
path: string
name?: string
type?: string
size?: number
}

export interface VectorDBIngestOptions {
chunkSize: number
chunkOverlap: number
}

/**
* Vector DB extension base: abstraction over local vector storage and search.
*/
export abstract class VectorDBExtension extends BaseExtension {
type(): ExtensionTypeEnum | undefined {
return ExtensionTypeEnum.VectorDB
}

Check warning on line 50 in core/src/browser/extensions/vector-db.ts

View workflow job for this annotation

GitHub Actions / coverage-check

49-50 lines are not covered with tests

abstract getStatus(): Promise<VectorDBStatus>
abstract createCollection(threadId: string, dimension: number): Promise<void>
abstract insertChunks(
threadId: string,
fileId: string,
chunks: VectorChunkInput[]
): Promise<void>
abstract ingestFile(
threadId: string,
file: VectorDBFileInput,
opts: VectorDBIngestOptions
): Promise<AttachmentFileInfo>
abstract searchCollection(
threadId: string,
query_embedding: number[],
limit: number,
threshold: number,
mode?: SearchMode,
fileIds?: string[]
): Promise<VectorSearchResult[]>
abstract deleteChunks(threadId: string, ids: string[]): Promise<void>
abstract deleteFile(threadId: string, fileId: string): Promise<void>
abstract deleteCollection(threadId: string): Promise<void>
abstract listAttachments(threadId: string, limit?: number): Promise<AttachmentFileInfo[]>
abstract getChunks(
threadId: string,
fileId: string,
startOrder: number,
endOrder: number
): Promise<VectorSearchResult[]>
}
2 changes: 2 additions & 0 deletions core/src/types/setting/settingComponent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ export type SettingComponentProps = {
extensionName?: string
requireModelReload?: boolean
configType?: ConfigType
titleKey?: string
descriptionKey?: string
}

export type ConfigType = 'runtime' | 'setting'
Expand Down
32 changes: 25 additions & 7 deletions extensions-web/src/jan-provider-web/provider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ export default class JanProviderWeb extends AIEngine {
// Verify Jan models capabilities in localStorage
private validateJanModelsLocalStorage() {
try {
console.log("Validating Jan models in localStorage...")
console.log('Validating Jan models in localStorage...')
const storageKey = 'model-provider'
const data = localStorage.getItem(storageKey)
if (!data) return
Expand All @@ -60,9 +60,14 @@ export default class JanProviderWeb extends AIEngine {
if (provider.provider === 'jan' && provider.models) {
for (const model of provider.models) {
console.log(`Checking Jan model: ${model.id}`, model.capabilities)
if (JSON.stringify(model.capabilities) !== JSON.stringify(JAN_MODEL_CAPABILITIES)) {
if (
JSON.stringify(model.capabilities) !==
JSON.stringify(JAN_MODEL_CAPABILITIES)
) {
hasInvalidModel = true
console.log(`Found invalid Jan model: ${model.id}, clearing localStorage`)
console.log(
`Found invalid Jan model: ${model.id}, clearing localStorage`
)
break
}
}
Expand All @@ -79,9 +84,17 @@ export default class JanProviderWeb extends AIEngine {
// If still present, try setting to empty state
if (afterRemoval) {
// Try alternative clearing method
localStorage.setItem(storageKey, JSON.stringify({ state: { providers: [] }, version: parsed.version || 3 }))
localStorage.setItem(
storageKey,
JSON.stringify({
state: { providers: [] },
version: parsed.version || 3,
})
)
}
console.log('Cleared model-provider from localStorage due to invalid Jan capabilities')
console.log(
'Cleared model-provider from localStorage due to invalid Jan capabilities'
)
// Force a page reload to ensure clean state
window.location.reload()
}
Expand Down Expand Up @@ -159,6 +172,7 @@ export default class JanProviderWeb extends AIEngine {
port: 443, // HTTPS port
model_id: modelId,
model_path: `remote:${modelId}`, // Indicate this is a remote model
is_embedding: false, // assume false here, TODO: might need further implementation
api_key: '', // API key handled by auth service
}

Expand Down Expand Up @@ -193,8 +207,12 @@ export default class JanProviderWeb extends AIEngine {
console.error(`Failed to unload Jan session ${sessionId}:`, error)
return {
success: false,
error: error instanceof ApiError ? error.message :
error instanceof Error ? error.message : 'Unknown error',
error:
error instanceof ApiError
? error.message
: error instanceof Error
? error.message
: 'Unknown error',
}
}
}
Expand Down
93 changes: 66 additions & 27 deletions extensions/llamacpp-extension/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -333,14 +333,12 @@ export default class llamacpp_extension extends AIEngine {
)
// Clear the invalid stored preference
this.clearStoredBackendType()
bestAvailableBackendString = await this.determineBestBackend(
version_backends
)
bestAvailableBackendString =
await this.determineBestBackend(version_backends)
}
} else {
bestAvailableBackendString = await this.determineBestBackend(
version_backends
)
bestAvailableBackendString =
await this.determineBestBackend(version_backends)
}

let settings = structuredClone(SETTINGS)
Expand Down Expand Up @@ -1530,17 +1528,41 @@ export default class llamacpp_extension extends AIEngine {

if (
this.autoUnload &&
!isEmbedding &&
(loadedModels.length > 0 || otherLoadingPromises.length > 0)
) {
// Wait for OTHER loading models to finish, then unload everything
if (otherLoadingPromises.length > 0) {
await Promise.all(otherLoadingPromises)
}

// Now unload all loaded models
// Now unload all loaded Text models excluding embedding models
const allLoadedModels = await this.getLoadedModels()
if (allLoadedModels.length > 0) {
await Promise.all(allLoadedModels.map((model) => this.unload(model)))
const sessionInfos: (SessionInfo | null)[] = await Promise.all(
allLoadedModels.map(async (modelId) => {
try {
return await this.findSessionByModel(modelId)
} catch (e) {
logger.warn(`Unable to find session for model "${modelId}": ${e}`)
return null // treat as “not‑eligible for unload”
}
})
)

logger.info(JSON.stringify(sessionInfos))

const nonEmbeddingModels: string[] = sessionInfos
.filter(
(s): s is SessionInfo => s !== null && s.is_embedding === false
)
.map((s) => s.model_id)

if (nonEmbeddingModels.length > 0) {
await Promise.all(
nonEmbeddingModels.map((modelId) => this.unload(modelId))
)
}
}
}
const args: string[] = []
Expand Down Expand Up @@ -1638,7 +1660,7 @@ export default class llamacpp_extension extends AIEngine {
if (cfg.no_kv_offload) args.push('--no-kv-offload')
if (isEmbedding) {
args.push('--embedding')
args.push('--pooling mean')
args.push('--pooling', 'mean')
} else {
if (cfg.ctx_size > 0) args.push('--ctx-size', String(cfg.ctx_size))
if (cfg.n_predict > 0) args.push('--n-predict', String(cfg.n_predict))
Expand Down Expand Up @@ -1677,6 +1699,7 @@ export default class llamacpp_extension extends AIEngine {
libraryPath,
args,
envs,
isEmbedding,
}
)
return sInfo
Expand Down Expand Up @@ -2020,6 +2043,7 @@ export default class llamacpp_extension extends AIEngine {
}

async embed(text: string[]): Promise<EmbeddingResponse> {
// Ensure the sentence-transformer model is present
let sInfo = await this.findSessionByModel('sentence-transformer-mini')
if (!sInfo) {
const downloadedModelList = await this.list()
Expand All @@ -2033,30 +2057,45 @@ export default class llamacpp_extension extends AIEngine {
'https://huggingface.co/second-state/All-MiniLM-L6-v2-Embedding-GGUF/resolve/main/all-MiniLM-L6-v2-ggml-model-f16.gguf?download=true',
})
}
sInfo = await this.load('sentence-transformer-mini')
// Load specifically in embedding mode
sInfo = await this.load('sentence-transformer-mini', undefined, true)
}
const baseUrl = `http://localhost:${sInfo.port}/v1/embeddings`
const headers = {
'Content-Type': 'application/json',
'Authorization': `Bearer ${sInfo.api_key}`,

const attemptRequest = async (session: SessionInfo) => {
const baseUrl = `http://localhost:${session.port}/v1/embeddings`
const headers = {
'Content-Type': 'application/json',
'Authorization': `Bearer ${session.api_key}`,
}
const body = JSON.stringify({
input: text,
model: session.model_id,
encoding_format: 'float',
})
const response = await fetch(baseUrl, {
method: 'POST',
headers,
body,
})
return response
}

// First try with the existing session (may have been started without --embedding previously)
let response = await attemptRequest(sInfo)

// If embeddings endpoint is not available (501), reload with embedding mode and retry once
if (response.status === 501) {
try {
await this.unload('sentence-transformer-mini')
} catch {}
sInfo = await this.load('sentence-transformer-mini', undefined, true)
response = await attemptRequest(sInfo)
}
const body = JSON.stringify({
input: text,
model: sInfo.model_id,
encoding_format: 'float',
})
const response = await fetch(baseUrl, {
method: 'POST',
headers,
body,
})

if (!response.ok) {
const errorData = await response.json().catch(() => null)
throw new Error(
`API request failed with status ${response.status}: ${JSON.stringify(
errorData
)}`
`API request failed with status ${response.status}: ${JSON.stringify(errorData)}`
)
}
const responseData = await response.json()
Expand Down
Loading
Loading