Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions core/src/browser/extension.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
HuggingFace = 'huggingFace',
Engine = 'engine',
Hardware = 'hardware',
RAG = 'rag',
VectorDB = 'vectorDB',
}

export interface ExtensionType {
Expand Down Expand Up @@ -91,18 +93,18 @@
* @property {Array} platform
*/
compatibility(): Compatibility | undefined {
return undefined
}

Check warning on line 97 in core/src/browser/extension.ts

View workflow job for this annotation

GitHub Actions / coverage-check

96-97 lines are not covered with tests

/**
* Registers models - it persists in-memory shared ModelManager instance's data map.
* @param models
*/
async registerModels(models: Model[]): Promise<void> {
for (const model of models) {
ModelManager.instance().register(model)
}
}

Check warning on line 107 in core/src/browser/extension.ts

View workflow job for this annotation

GitHub Actions / coverage-check

104-107 lines are not covered with tests

/**
* Register settings for the extension.
Expand All @@ -111,9 +113,9 @@
*/
async registerSettings(settings: SettingComponentProps[]): Promise<void> {
if (!this.name) {
console.error('Extension name is not defined')
return
}

Check warning on line 118 in core/src/browser/extension.ts

View workflow job for this annotation

GitHub Actions / coverage-check

116-118 lines are not covered with tests

settings.forEach((setting) => {
setting.extensionName = this.name
Expand All @@ -122,30 +124,30 @@
const oldSettingsJson = localStorage.getItem(this.name)
// Persists new settings
if (oldSettingsJson) {
const oldSettings = JSON.parse(oldSettingsJson)
settings.forEach((setting) => {

Check warning on line 128 in core/src/browser/extension.ts

View workflow job for this annotation

GitHub Actions / coverage-check

127-128 lines are not covered with tests
// Keep setting value
if (setting.controllerProps && Array.isArray(oldSettings))
setting.controllerProps.value =
oldSettings.find((e: any) => e.key === setting.key)?.controllerProps?.value ??
setting.controllerProps.value
if ('options' in setting.controllerProps)
setting.controllerProps.options = setting.controllerProps.options?.length
? setting.controllerProps.options
: oldSettings.find((e: any) => e.key === setting.key)?.controllerProps?.options
if ('recommended' in setting.controllerProps) {
const oldRecommended = oldSettings.find((e: any) => e.key === setting.key)
?.controllerProps?.recommended
if (oldRecommended !== undefined && oldRecommended !== '') {
setting.controllerProps.recommended = oldRecommended
}
}
})
}

Check warning on line 146 in core/src/browser/extension.ts

View workflow job for this annotation

GitHub Actions / coverage-check

130-146 lines are not covered with tests
localStorage.setItem(this.name, JSON.stringify(settings))
} catch (err) {
console.error(err)
}

Check warning on line 150 in core/src/browser/extension.ts

View workflow job for this annotation

GitHub Actions / coverage-check

149-150 lines are not covered with tests
}

/**
Expand Down Expand Up @@ -179,18 +181,18 @@
* @returns
*/
async getSettings(): Promise<SettingComponentProps[]> {
if (!this.name) return []

Check warning on line 184 in core/src/browser/extension.ts

View workflow job for this annotation

GitHub Actions / coverage-check

184 line is not covered with tests

try {
const settingsString = localStorage.getItem(this.name)
if (!settingsString) return []
const settings: SettingComponentProps[] = JSON.parse(settingsString)
return settings
} catch (err) {
console.warn(err)
return []
}
}

Check warning on line 195 in core/src/browser/extension.ts

View workflow job for this annotation

GitHub Actions / coverage-check

186-195 lines are not covered with tests

/**
* Update the settings for the extension.
Expand Down
1 change: 1 addition & 0 deletions core/src/browser/extensions/engines/AIEngine.ts
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ export interface SessionInfo {
port: number // llama-server output port (corrected from portid)
model_id: string //name of the model
model_path: string // path of the loaded model
is_embedding: boolean
api_key: string
mmproj_path?: string
}
Expand Down
5 changes: 5 additions & 0 deletions core/src/browser/extensions/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,8 @@ export { MCPExtension } from './mcp'
* Base AI Engines.
*/
export * from './engines'

export { RAGExtension, RAG_INTERNAL_SERVER } from './rag'
export type { AttachmentInput, IngestAttachmentsResult } from './rag'
export { VectorDBExtension } from './vector-db'
export type { SearchMode, VectorDBStatus, VectorChunkInput, VectorSearchResult, AttachmentFileInfo, VectorDBFileInput, VectorDBIngestOptions } from './vector-db'
36 changes: 36 additions & 0 deletions core/src/browser/extensions/rag.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import { BaseExtension, ExtensionTypeEnum } from '../extension'
import type { MCPTool, MCPToolCallResult } from '../../types'
import type { AttachmentFileInfo } from './vector-db'

export interface AttachmentInput {
path: string
name?: string
type?: string
size?: number
}

export interface IngestAttachmentsResult {
filesProcessed: number
chunksInserted: number
files: AttachmentFileInfo[]
}

export const RAG_INTERNAL_SERVER = 'rag-internal'

/**
* RAG extension base: exposes RAG tools and orchestration API.
*/
export abstract class RAGExtension extends BaseExtension {
type(): ExtensionTypeEnum | undefined {
return ExtensionTypeEnum.RAG
}

Check warning on line 26 in core/src/browser/extensions/rag.ts

View workflow job for this annotation

GitHub Actions / coverage-check

25-26 lines are not covered with tests

abstract getTools(): Promise<MCPTool[]>
/**
* Lightweight list of tool names for quick routing/lookup.
*/
abstract getToolNames(): Promise<string[]>
abstract callTool(toolName: string, args: Record<string, unknown>): Promise<MCPToolCallResult>

abstract ingestAttachments(threadId: string, files: AttachmentInput[]): Promise<IngestAttachmentsResult>
}
82 changes: 82 additions & 0 deletions core/src/browser/extensions/vector-db.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
import { BaseExtension, ExtensionTypeEnum } from '../extension'

export type SearchMode = 'auto' | 'ann' | 'linear'

export interface VectorDBStatus {
ann_available: boolean
}

export interface VectorChunkInput {
text: string
embedding: number[]
}

export interface VectorSearchResult {
id: string
text: string
score?: number
file_id: string
chunk_file_order: number
}

export interface AttachmentFileInfo {
id: string
name?: string
path?: string
type?: string
size?: number
chunk_count: number
}

// High-level input types for file ingestion
export interface VectorDBFileInput {
path: string
name?: string
type?: string
size?: number
}

export interface VectorDBIngestOptions {
chunkSize: number
chunkOverlap: number
}

/**
* Vector DB extension base: abstraction over local vector storage and search.
*/
export abstract class VectorDBExtension extends BaseExtension {
type(): ExtensionTypeEnum | undefined {
return ExtensionTypeEnum.VectorDB
}

Check warning on line 50 in core/src/browser/extensions/vector-db.ts

View workflow job for this annotation

GitHub Actions / coverage-check

49-50 lines are not covered with tests

abstract getStatus(): Promise<VectorDBStatus>
abstract createCollection(threadId: string, dimension: number): Promise<void>
abstract insertChunks(
threadId: string,
fileId: string,
chunks: VectorChunkInput[]
): Promise<void>
abstract ingestFile(
threadId: string,
file: VectorDBFileInput,
opts: VectorDBIngestOptions
): Promise<AttachmentFileInfo>
abstract searchCollection(
threadId: string,
query_embedding: number[],
limit: number,
threshold: number,
mode?: SearchMode,
fileIds?: string[]
): Promise<VectorSearchResult[]>
abstract deleteChunks(threadId: string, ids: string[]): Promise<void>
abstract deleteFile(threadId: string, fileId: string): Promise<void>
abstract deleteCollection(threadId: string): Promise<void>
abstract listAttachments(threadId: string, limit?: number): Promise<AttachmentFileInfo[]>
abstract getChunks(
threadId: string,
fileId: string,
startOrder: number,
endOrder: number
): Promise<VectorSearchResult[]>
}
2 changes: 2 additions & 0 deletions core/src/types/setting/settingComponent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ export type SettingComponentProps = {
extensionName?: string
requireModelReload?: boolean
configType?: ConfigType
titleKey?: string
descriptionKey?: string
}

export type ConfigType = 'runtime' | 'setting'
Expand Down
32 changes: 25 additions & 7 deletions extensions-web/src/jan-provider-web/provider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ export default class JanProviderWeb extends AIEngine {
// Verify Jan models capabilities in localStorage
private validateJanModelsLocalStorage() {
try {
console.log("Validating Jan models in localStorage...")
console.log('Validating Jan models in localStorage...')
const storageKey = 'model-provider'
const data = localStorage.getItem(storageKey)
if (!data) return
Expand All @@ -60,9 +60,14 @@ export default class JanProviderWeb extends AIEngine {
if (provider.provider === 'jan' && provider.models) {
for (const model of provider.models) {
console.log(`Checking Jan model: ${model.id}`, model.capabilities)
if (JSON.stringify(model.capabilities) !== JSON.stringify(JAN_MODEL_CAPABILITIES)) {
if (
JSON.stringify(model.capabilities) !==
JSON.stringify(JAN_MODEL_CAPABILITIES)
) {
hasInvalidModel = true
console.log(`Found invalid Jan model: ${model.id}, clearing localStorage`)
console.log(
`Found invalid Jan model: ${model.id}, clearing localStorage`
)
break
}
}
Expand All @@ -79,9 +84,17 @@ export default class JanProviderWeb extends AIEngine {
// If still present, try setting to empty state
if (afterRemoval) {
// Try alternative clearing method
localStorage.setItem(storageKey, JSON.stringify({ state: { providers: [] }, version: parsed.version || 3 }))
localStorage.setItem(
storageKey,
JSON.stringify({
state: { providers: [] },
version: parsed.version || 3,
})
)
}
console.log('Cleared model-provider from localStorage due to invalid Jan capabilities')
console.log(
'Cleared model-provider from localStorage due to invalid Jan capabilities'
)
// Force a page reload to ensure clean state
window.location.reload()
}
Expand Down Expand Up @@ -159,6 +172,7 @@ export default class JanProviderWeb extends AIEngine {
port: 443, // HTTPS port
model_id: modelId,
model_path: `remote:${modelId}`, // Indicate this is a remote model
is_embedding: false, // assume false here, TODO: might need further implementation
api_key: '', // API key handled by auth service
}

Expand Down Expand Up @@ -193,8 +207,12 @@ export default class JanProviderWeb extends AIEngine {
console.error(`Failed to unload Jan session ${sessionId}:`, error)
return {
success: false,
error: error instanceof ApiError ? error.message :
error instanceof Error ? error.message : 'Unknown error',
error:
error instanceof ApiError
? error.message
: error instanceof Error
? error.message
: 'Unknown error',
}
}
}
Expand Down
93 changes: 66 additions & 27 deletions extensions/llamacpp-extension/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -333,14 +333,12 @@ export default class llamacpp_extension extends AIEngine {
)
// Clear the invalid stored preference
this.clearStoredBackendType()
bestAvailableBackendString = await this.determineBestBackend(
version_backends
)
bestAvailableBackendString =
await this.determineBestBackend(version_backends)
}
} else {
bestAvailableBackendString = await this.determineBestBackend(
version_backends
)
bestAvailableBackendString =
await this.determineBestBackend(version_backends)
}

let settings = structuredClone(SETTINGS)
Expand Down Expand Up @@ -1530,17 +1528,41 @@ export default class llamacpp_extension extends AIEngine {

if (
this.autoUnload &&
!isEmbedding &&
(loadedModels.length > 0 || otherLoadingPromises.length > 0)
) {
// Wait for OTHER loading models to finish, then unload everything
if (otherLoadingPromises.length > 0) {
await Promise.all(otherLoadingPromises)
}

// Now unload all loaded models
// Now unload all loaded Text models excluding embedding models
const allLoadedModels = await this.getLoadedModels()
if (allLoadedModels.length > 0) {
await Promise.all(allLoadedModels.map((model) => this.unload(model)))
const sessionInfos: (SessionInfo | null)[] = await Promise.all(
allLoadedModels.map(async (modelId) => {
try {
return await this.findSessionByModel(modelId)
} catch (e) {
logger.warn(`Unable to find session for model "${modelId}": ${e}`)
return null // treat as “not‑eligible for unload”
}
})
)

logger.info(JSON.stringify(sessionInfos))

const nonEmbeddingModels: string[] = sessionInfos
.filter(
(s): s is SessionInfo => s !== null && s.is_embedding === false
)
.map((s) => s.model_id)

if (nonEmbeddingModels.length > 0) {
await Promise.all(
nonEmbeddingModels.map((modelId) => this.unload(modelId))
)
}
}
}
const args: string[] = []
Expand Down Expand Up @@ -1638,7 +1660,7 @@ export default class llamacpp_extension extends AIEngine {
if (cfg.no_kv_offload) args.push('--no-kv-offload')
if (isEmbedding) {
args.push('--embedding')
args.push('--pooling mean')
args.push('--pooling', 'mean')
} else {
if (cfg.ctx_size > 0) args.push('--ctx-size', String(cfg.ctx_size))
if (cfg.n_predict > 0) args.push('--n-predict', String(cfg.n_predict))
Expand Down Expand Up @@ -1677,6 +1699,7 @@ export default class llamacpp_extension extends AIEngine {
libraryPath,
args,
envs,
isEmbedding,
}
)
return sInfo
Expand Down Expand Up @@ -2083,6 +2106,7 @@ export default class llamacpp_extension extends AIEngine {
}

async embed(text: string[]): Promise<EmbeddingResponse> {
// Ensure the sentence-transformer model is present
let sInfo = await this.findSessionByModel('sentence-transformer-mini')
if (!sInfo) {
const downloadedModelList = await this.list()
Expand All @@ -2096,30 +2120,45 @@ export default class llamacpp_extension extends AIEngine {
'https://huggingface.co/second-state/All-MiniLM-L6-v2-Embedding-GGUF/resolve/main/all-MiniLM-L6-v2-ggml-model-f16.gguf?download=true',
})
}
sInfo = await this.load('sentence-transformer-mini')
// Load specifically in embedding mode
sInfo = await this.load('sentence-transformer-mini', undefined, true)
}
const baseUrl = `http://localhost:${sInfo.port}/v1/embeddings`
const headers = {
'Content-Type': 'application/json',
'Authorization': `Bearer ${sInfo.api_key}`,

const attemptRequest = async (session: SessionInfo) => {
const baseUrl = `http://localhost:${session.port}/v1/embeddings`
const headers = {
'Content-Type': 'application/json',
'Authorization': `Bearer ${session.api_key}`,
}
const body = JSON.stringify({
input: text,
model: session.model_id,
encoding_format: 'float',
})
const response = await fetch(baseUrl, {
method: 'POST',
headers,
body,
})
return response
}

// First try with the existing session (may have been started without --embedding previously)
let response = await attemptRequest(sInfo)

// If embeddings endpoint is not available (501), reload with embedding mode and retry once
if (response.status === 501) {
try {
await this.unload('sentence-transformer-mini')
} catch {}
sInfo = await this.load('sentence-transformer-mini', undefined, true)
response = await attemptRequest(sInfo)
}
const body = JSON.stringify({
input: text,
model: sInfo.model_id,
encoding_format: 'float',
})
const response = await fetch(baseUrl, {
method: 'POST',
headers,
body,
})

if (!response.ok) {
const errorData = await response.json().catch(() => null)
throw new Error(
`API request failed with status ${response.status}: ${JSON.stringify(
errorData
)}`
`API request failed with status ${response.status}: ${JSON.stringify(errorData)}`
)
}
const responseData = await response.json()
Expand Down
Loading
Loading