diff --git a/apps/jan-api-gateway/application/app/domain/auth/auth_service.go b/apps/jan-api-gateway/application/app/domain/auth/auth_service.go
index 34f8307b..b5449919 100644
--- a/apps/jan-api-gateway/application/app/domain/auth/auth_service.go
+++ b/apps/jan-api-gateway/application/app/domain/auth/auth_service.go
@@ -80,7 +80,7 @@ func (s *AuthService) AppUserAuthMiddleware() gin.HandlerFunc {
 		}
 
 		reqCtx.AbortWithStatusJSON(http.StatusUnauthorized, responses.ErrorResponse{
-			Code: "4026757e-d5a4-4cf7-8914-2c96f011084f",
+			Code: "019947f0-eca1-7474-8ed2-09d6e5389b54",
 		})
 	}
 }
diff --git a/apps/jan-api-gateway/application/app/domain/common/error.go b/apps/jan-api-gateway/application/app/domain/common/error.go
new file mode 100644
index 00000000..dc35ea58
--- /dev/null
+++ b/apps/jan-api-gateway/application/app/domain/common/error.go
@@ -0,0 +1,56 @@
+package common
+
+import "fmt"
+
+// Error represents a standardized error with code and underlying error
+type Error struct {
+	Err  error  `json:"-"`
+	Code string `json:"code"`
+}
+
+// NewError creates a new Error instance from an existing error
+func NewError(err error, code string) *Error {
+	return &Error{
+		Err:  err,
+		Code: code,
+	}
+}
+
+// NewErrorWithMessage creates a new Error instance with a custom message
+func NewErrorWithMessage(message string, code string) *Error {
+	return &Error{
+		Err:  fmt.Errorf("%s", message),
+		Code: code,
+	}
+}
+
+// Error implements the error interface
+func (e *Error) Error() string {
+	if e.Err != nil {
+		return e.Err.Error()
+	}
+	return ""
+}
+
+// String returns the string representation of the error
+func (e *Error) String() string {
+	return e.Error()
+}
+
+// GetMessage returns the error message from the underlying error
+func (e *Error) GetMessage() string {
+	if e.Err != nil {
+		return e.Err.Error()
+	}
+	return ""
+}
+
+// GetCode returns the error code
+func (e *Error) GetCode() string {
+	return e.Code
+}
+
+// GetCode returns the error code
+func (e *Error) GetError() error {
+	return e.Err
+}
diff --git a/apps/jan-api-gateway/application/app/domain/conversation/conversation.go b/apps/jan-api-gateway/application/app/domain/conversation/conversation.go
index 38a6f184..709666f5 100644
--- a/apps/jan-api-gateway/application/app/domain/conversation/conversation.go
+++ b/apps/jan-api-gateway/application/app/domain/conversation/conversation.go
@@ -2,6 +2,7 @@ package conversation
 
 import (
 	"context"
+	"time"
 
 	"menlo.ai/jan-api-gateway/app/domain/query"
 )
@@ -50,18 +51,53 @@ func ValidateItemRole(input string) bool {
 	}
 }
 
+// @Enum(pending, in_progress, completed, failed, cancelled)
+type ItemStatus string
+
+const (
+	ItemStatusPending    ItemStatus = "pending"
+	ItemStatusInProgress ItemStatus = "in_progress"
+	ItemStatusCompleted  ItemStatus = "completed"
+	ItemStatusFailed     ItemStatus = "failed"
+	ItemStatusCancelled  ItemStatus = "cancelled"
+)
+
+func ValidateItemStatus(input string) bool {
+	switch ItemStatus(input) {
+	case ItemStatusPending, ItemStatusInProgress, ItemStatusCompleted, ItemStatusFailed, ItemStatusCancelled:
+		return true
+	default:
+		return false
+	}
+}
+
+// ToItemStatusPtr returns a pointer to the given ItemStatus
+func ToItemStatusPtr(s ItemStatus) *ItemStatus {
+	return &s
+}
+
+// ItemStatusToStringPtr converts *ItemStatus to *string
+func ItemStatusToStringPtr(s *ItemStatus) *string {
+	if s == nil {
+		return nil
+	}
+	str := string(*s)
+	return &str
+}
+
 type Item struct {
-	ID                uint               `json:"-"` // Internal DB ID (hidden from JSON)
+	ID                uint               `json:"-"`
 	ConversationID    uint               `json:"-"`
-	PublicID          string             `json:"id"` // OpenAI-compatible string ID like "msg_abc123"
+	PublicID          string             `json:"id"`
 	Type              ItemType           `json:"type"`
 	Role              *ItemRole          `json:"role,omitempty"`
 	Content           []Content          `json:"content,omitempty"`
-	Status            *string            `json:"status,omitempty"`
-	IncompleteAt      *int64             `json:"incomplete_at,omitempty"`
+	Status            *ItemStatus        `json:"status,omitempty"`
+	IncompleteAt      *time.Time         `json:"incomplete_at,omitempty"`
 	IncompleteDetails *IncompleteDetails `json:"incomplete_details,omitempty"`
-	CompletedAt       *int64             `json:"completed_at,omitempty"`
-	CreatedAt         int64              `json:"created_at"` // Unix timestamp for OpenAI compatibility
+	CompletedAt       *time.Time         `json:"completed_at,omitempty"`
+	ResponseID        *uint              `json:"-"`
+	CreatedAt         time.Time          `json:"created_at"`
 }
 
 type Content struct {
@@ -129,16 +165,16 @@ type IncompleteDetails struct {
 }
 
 type Conversation struct {
-	ID        uint               `json:"-"`  // Internal DB ID (hidden from JSON)
+	ID        uint               `json:"-"`
 	PublicID  string             `json:"id"` // OpenAI-compatible string ID like "conv_abc123"
 	Title     *string            `json:"title,omitempty"`
-	UserID    uint               `json:"-"` // Internal user ID (hidden from JSON)
+	UserID    uint               `json:"-"`
 	Status    ConversationStatus `json:"status"`
 	Items     []Item             `json:"items,omitempty"`
 	Metadata  map[string]string  `json:"metadata,omitempty"`
 	IsPrivate bool               `json:"is_private"`
-	CreatedAt int64              `json:"created_at"` // Unix timestamp for OpenAI compatibility
-	UpdatedAt int64              `json:"updated_at"` // Unix timestamp for OpenAI compatibility
+	CreatedAt time.Time          `json:"created_at"` // Unix timestamp for OpenAI compatibility
+	UpdatedAt time.Time          `json:"updated_at"` // Unix timestamp for OpenAI compatibility
 }
 
 type ConversationFilter struct {
@@ -149,6 +185,8 @@ type ConversationFilter struct {
 type ItemFilter struct {
 	PublicID       *string
 	ConversationID *uint
+	Role           *ItemRole
+	ResponseID     *uint
 }
 
 type ConversationRepository interface {
@@ -177,3 +215,26 @@ type ItemRepository interface {
 	FindByFilter(ctx context.Context, filter ItemFilter, pagination *query.Pagination) ([]*Item, error)
 	Count(ctx context.Context, filter ItemFilter) (int64, error)
 }
+
+// NewItem creates a new conversation item with the given parameters
+func NewItem(publicID string, itemType ItemType, role ItemRole, content []Content, conversationID uint, responseID *uint) *Item {
+	return &Item{
+		PublicID:       publicID,
+		Type:           itemType,
+		Role:           &role,
+		Content:        content,
+		ConversationID: conversationID,
+		ResponseID:     responseID,
+		CreatedAt:      time.Now(),
+	}
+}
+
+// NewTextContent creates a new text content item
+func NewTextContent(text string) Content {
+	return Content{
+		Type: "text",
+		Text: &Text{
+			Value: text,
+		},
+	}
+}
diff --git a/apps/jan-api-gateway/application/app/domain/conversation/conversation_service.go b/apps/jan-api-gateway/application/app/domain/conversation/conversation_service.go
index 7c72d456..23feeb56 100644
--- a/apps/jan-api-gateway/application/app/domain/conversation/conversation_service.go
+++ b/apps/jan-api-gateway/application/app/domain/conversation/conversation_service.go
@@ -1,7 +1,6 @@
 package conversation
 
 import (
-	"fmt"
 	"net/http"
 	"time"
 
@@ -9,10 +8,10 @@ import (
 	"golang.org/x/net/context"
 
 	"menlo.ai/jan-api-gateway/app/domain/auth"
+	"menlo.ai/jan-api-gateway/app/domain/common"
 	"menlo.ai/jan-api-gateway/app/domain/query"
 	"menlo.ai/jan-api-gateway/app/interfaces/http/responses"
 	"menlo.ai/jan-api-gateway/app/utils/idgen"
-	"menlo.ai/jan-api-gateway/app/utils/ptr"
 )
 
 type ConversationContextKey string
@@ -45,33 +44,32 @@ func NewService(conversationRepo ConversationRepository, itemRepo ItemRepository
 	}
 }
 
-func NewServiceWithValidator(conversationRepo ConversationRepository, itemRepo ItemRepository, validator *ConversationValidator) *ConversationService {
-	return &ConversationService{
-		conversationRepo: conversationRepo,
-		itemRepo:         itemRepo,
-		validator:        validator,
+func (s *ConversationService) FindConversationsByFilter(ctx context.Context, filter ConversationFilter, pagination *query.Pagination) ([]*Conversation, *common.Error) {
+	conversations, err := s.conversationRepo.FindByFilter(ctx, filter, pagination)
+	if err != nil {
+		return nil, common.NewError(err, "a1b2c3d4-e5f6-7890-abcd-ef1234567890")
 	}
+	return conversations, nil
 }
 
-func (s *ConversationService) FindConversationsByFilter(ctx context.Context, filter ConversationFilter, pagination *query.Pagination) ([]*Conversation, error) {
-	return s.conversationRepo.FindByFilter(ctx, filter, pagination)
-}
-
-func (s *ConversationService) CountConversationsByFilter(ctx context.Context, filter ConversationFilter) (int64, error) {
-	return s.conversationRepo.Count(ctx, filter)
+func (s *ConversationService) CountConversationsByFilter(ctx context.Context, filter ConversationFilter) (int64, *common.Error) {
+	count, err := s.conversationRepo.Count(ctx, filter)
+	if err != nil {
+		return 0, common.NewError(err, "b2c3d4e5-f6g7-8901-bcde-f23456789012")
+	}
+	return count, nil
 }
 
-func (s *ConversationService) CreateConversation(ctx context.Context, userID uint, title *string, isPrivate bool, metadata map[string]string) (*Conversation, error) {
+func (s *ConversationService) CreateConversation(ctx context.Context, userID uint, title *string, isPrivate bool, metadata map[string]string) (*Conversation, *common.Error) {
 	if err := s.validator.ValidateConversationInput(title, metadata); err != nil {
-		return nil, fmt.Errorf("validation failed: %w", err)
+		return nil, common.NewError(err, "c3d4e5f6-g7h8-9012-cdef-345678901234")
 	}
 
 	publicID, err := s.generateConversationPublicID()
 	if err != nil {
-		return nil, fmt.Errorf("failed to generate public ID: %w", err)
+		return nil, common.NewError(err, "d4e5f6g7-h8i9-0123-defg-456789012345")
 	}
 
-	now := time.Now().Unix()
 	conversation := &Conversation{
 		PublicID:  publicID,
 		Title:     title,
@@ -79,103 +77,180 @@ func (s *ConversationService) CreateConversation(ctx context.Context, userID uin
 		Status:    ConversationStatusActive,
 		IsPrivate: isPrivate,
 		Metadata:  metadata,
-		CreatedAt: now,
-		UpdatedAt: now,
 	}
 
 	if err := s.conversationRepo.Create(ctx, conversation); err != nil {
-		return nil, fmt.Errorf("failed to create conversation: %w", err)
+		return nil, common.NewError(err, "e5f6g7h8-i9j0-1234-efgh-567890123456")
 	}
 
 	return conversation, nil
 }
 
 // GetConversation retrieves a conversation by its public ID with access control and items loaded
-func (s *ConversationService) GetConversationByPublicIDAndUserID(ctx context.Context, publicID string, userID uint) (*Conversation, error) {
+func (s *ConversationService) GetConversationByPublicIDAndUserID(ctx context.Context, publicID string, userID uint) (*Conversation, *common.Error) {
+	return s.getConversationWithAccessCheck(ctx, publicID, userID)
+}
+
+// GetConversationByID retrieves a conversation by its internal ID without user access control
+func (s *ConversationService) GetConversationByID(ctx context.Context, conversationID uint) (*Conversation, *common.Error) {
+	// Validate inputs
+	if conversationID == 0 {
+		return nil, common.NewErrorWithMessage("Conversation ID cannot be zero", "f6g7h8i9-j0k1-2345-fghi-678901234567")
+	}
+
+	conversation, err := s.conversationRepo.FindByID(ctx, conversationID)
+	if err != nil {
+		return nil, common.NewError(err, "g7h8i9j0-k1l2-3456-ghij-789012345678")
+	}
+	if conversation == nil {
+		return nil, common.NewErrorWithMessage("Conversation not found", "h8i9j0k1-l2m3-4567-hijk-890123456789")
+	}
+
+	return conversation, nil
+}
+
+// getConversationWithAccessCheck is the internal method that handles conversation retrieval with optional item loading
+func (s *ConversationService) getConversationWithAccessCheck(ctx context.Context, publicID string, userID uint) (*Conversation, *common.Error) {
+	// Validate inputs
+	if publicID == "" {
+		return nil, common.NewErrorWithMessage("Public ID cannot be empty", "i9j0k1l2-m3n4-5678-ijkl-901234567890")
+	}
+
 	convs, err := s.conversationRepo.FindByFilter(ctx, ConversationFilter{
 		UserID:   &userID,
 		PublicID: &publicID,
 	}, nil)
 	if err != nil {
-		return nil, err
+		return nil, common.NewError(err, "j0k1l2m3-n4o5-6789-jklm-012345678901")
 	}
 	if len(convs) != 1 {
-		return nil, fmt.Errorf("conversation not found")
+		return nil, common.NewErrorWithMessage("Conversation not found", "k1l2m3n4-o5p6-7890-klmn-123456789012")
 	}
 	return convs[0], nil
 }
 
-func (s *ConversationService) UpdateConversation(ctx context.Context, entity *Conversation) (*Conversation, error) {
+func (s *ConversationService) UpdateConversation(ctx context.Context, entity *Conversation) (*Conversation, *common.Error) {
 	if err := s.conversationRepo.Update(ctx, entity); err != nil {
-		return nil, fmt.Errorf("failed to update conversation: %w", err)
+		return nil, common.NewError(err, "l2m3n4o5-p6q7-8901-lmno-234567890123")
 	}
 	return entity, nil
 }
 
-func (s *ConversationService) DeleteConversation(ctx context.Context, conv *Conversation) error {
+func (s *ConversationService) DeleteConversation(ctx context.Context, conv *Conversation) (bool, *common.Error) {
 	if err := s.conversationRepo.Delete(ctx, conv.ID); err != nil {
-		return fmt.Errorf("failed to delete conversation: %w", err)
+		return false, common.NewError(err, "m3n4o5p6-q7r8-9012-mnop-345678901234")
 	}
-	return nil
+	return true, nil
+}
+
+func (s *ConversationService) AddItem(ctx context.Context, conversation *Conversation, userID uint, itemType ItemType, role *ItemRole, content []Content) (*Item, *common.Error) {
+	// Check access permissions
+	if conversation.IsPrivate && conversation.UserID != userID {
+		return nil, common.NewErrorWithMessage("Private conversation access denied", "n4o5p6q7-r8s9-0123-nopq-456789012345")
+	}
+
+	if err := s.validator.ValidateItemContent(content); err != nil {
+		return nil, common.NewError(err, "o5p6q7r8-s9t0-1234-opqr-567890123456")
+	}
+
+	itemPublicID, err := s.generateItemPublicID()
+	if err != nil {
+		return nil, common.NewError(err, "p6q7r8s9-t0u1-2345-pqrs-678901234567")
+	}
+
+	item := &Item{
+		PublicID:    itemPublicID,
+		Type:        itemType,
+		Role:        role,
+		Content:     content,
+		Status:      ToItemStatusPtr(ItemStatusCompleted), 
+	}
+
+	if err := s.conversationRepo.AddItem(ctx, conversation.ID, item); err != nil {
+		return nil, common.NewError(err, "q7r8s9t0-u1v2-3456-qrst-789012345678")
+	}
+
+	// Update conversation timestamp
+	if err := s.updateConversationTimestamp(ctx, conversation, "r8s9t0u1-v2w3-4567-rstu-890123456789"); err != nil {
+		return nil, err
+	}
+
+	return item, nil
 }
 
 // DeleteItemWithConversation deletes an item by its ID and updates the conversation accordingly.
-func (s *ConversationService) DeleteItemWithConversation(ctx context.Context, conversation *Conversation, item *Item) (*Item, error) {
+func (s *ConversationService) DeleteItemWithConversation(ctx context.Context, conversation *Conversation, item *Item) (*Item, *common.Error) {
 	if err := s.itemRepo.Delete(ctx, item.ID); err != nil {
-		return nil, err
+		return nil, common.NewError(err, "e1f2g3h4-i5j6-7890-efgh-123456789012")
 	}
 
-	conversation.UpdatedAt = time.Now().Unix()
-	if err := s.conversationRepo.Update(ctx, conversation); err != nil {
-		return nil, fmt.Errorf("failed to update conversation: %w", err)
+	if err := s.updateConversationTimestamp(ctx, conversation, "f2g3h4i5-j6k7-8901-fghi-234567890123"); err != nil {
+		return nil, err
 	}
 
 	return item, nil
 }
 
 // generateConversationPublicID generates a conversation ID with business rules
-// Business rule: conversations use "conv" prefix with 16 character length for OpenAI compatibility
+// Business rule: conversations use "conv" prefix with 42 character length for OpenAI compatibility
 func (s *ConversationService) generateConversationPublicID() (string, error) {
-	return idgen.GenerateSecureID("conv", 16)
+	return idgen.GenerateSecureID("conv", 42)
 }
 
 // generateItemPublicID generates an item/message ID with business rules
-// Business rule: items/messages use "msg" prefix with 16 character length for OpenAI compatibility
+// Business rule: items/messages use "msg" prefix with 42 character length for OpenAI compatibility
 func (s *ConversationService) generateItemPublicID() (string, error) {
-	return idgen.GenerateSecureID("msg", 16)
+	return idgen.GenerateSecureID("msg", 42)
 }
 
-func (s *ConversationService) ValidateItems(ctx context.Context, items []*Item) (bool, *string) {
+// updateConversationTimestamp updates the conversation's UpdatedAt timestamp and saves to database
+func (s *ConversationService) updateConversationTimestamp(ctx context.Context, conversation *Conversation, errorCode string) *common.Error {
+	conversation.UpdatedAt = time.Now()
+	if err := s.conversationRepo.Update(ctx, conversation); err != nil {
+		return common.NewError(err, errorCode)
+	}
+	return nil
+}
+
+func (s *ConversationService) ValidateItems(ctx context.Context, items []*Item) (*common.Error) {
 	if len(items) > 100 {
-		return false, ptr.ToString("0502c02c-ea2d-429e-933c-1243d4e2bcb2")
+		return common.NewErrorWithMessage("Too many items", "g3h4i5j6-k7l8-9012-ghij-345678901234")
 	}
 	for _, itemData := range items {
 		if errCode := s.validator.ValidateItemContent(itemData.Content); errCode != nil {
-			return false, errCode
+			return common.NewErrorWithMessage("Item validation failed", "h4i5j6k7-l8m9-0123-hijk-456789012345")
 		}
 	}
-	return true, nil
+	return nil
 }
 
-func (s *ConversationService) FindItemsByFilter(ctx context.Context, filter ItemFilter, p *query.Pagination) ([]*Item, error) {
-	return s.itemRepo.FindByFilter(ctx, filter, p)
+func (s *ConversationService) FindItemsByFilter(ctx context.Context, filter ItemFilter, p *query.Pagination) ([]*Item, *common.Error) {
+	items, err := s.itemRepo.FindByFilter(ctx, filter, p)
+	if err != nil {
+		return nil, common.NewError(err, "i5j6k7l8-m9n0-1234-ijkl-567890123456")
+	}
+	return items, nil
 }
 
-func (s *ConversationService) CountItemsByFilter(ctx context.Context, filter ItemFilter) (int64, error) {
-	return s.itemRepo.Count(ctx, filter)
+func (s *ConversationService) CountItemsByFilter(ctx context.Context, filter ItemFilter) (int64, *common.Error) {
+	count, err := s.itemRepo.Count(ctx, filter)
+	if err != nil {
+		return 0, common.NewError(err, "j6k7l8m9-n0o1-2345-jklm-678901234567")
+	}
+	return count, nil
 }
 
 // AddMultipleItems adds multiple items to a conversation in a single transaction
-func (s *ConversationService) AddMultipleItems(ctx context.Context, conversation *Conversation, userID uint, items []*Item) ([]*Item, error) {
+func (s *ConversationService) AddMultipleItems(ctx context.Context, conversation *Conversation, userID uint, items []*Item) ([]*Item, *common.Error) {
 	// Check access permissions
-	now := time.Now().Unix()
+	now := time.Now()
 	createdItems := make([]*Item, len(items))
 
 	// Create all items
 	for i, itemData := range items {
 		itemPublicID, err := s.generateItemPublicID()
 		if err != nil {
-			return nil, fmt.Errorf("failed to generate item public ID for item %d: %w", i, err)
+			return nil, common.NewError(err, "k7l8m9n0-o1p2-3456-klmn-789012345678")
 		}
 
 		item := &Item{
@@ -183,21 +258,20 @@ func (s *ConversationService) AddMultipleItems(ctx context.Context, conversation
 			Type:        itemData.Type,
 			Role:        itemData.Role,
 			Content:     itemData.Content,
-			Status:      ptr.ToString("completed"),
-			CreatedAt:   now,
+			Status:      ToItemStatusPtr(ItemStatusCompleted),
 			CompletedAt: &now,
+			ResponseID:  itemData.ResponseID,
 		}
 
 		if err := s.conversationRepo.AddItem(ctx, conversation.ID, item); err != nil {
-			return nil, fmt.Errorf("failed to add item %d: %w", i, err)
+			return nil, common.NewErrorWithMessage("Failed to add item", "l8m9n0o1-p2q3-4567-lmno-890123456789")
 		}
 
 		createdItems[i] = item
 	}
 
-	conversation.UpdatedAt = now
-	if err := s.conversationRepo.Update(ctx, conversation); err != nil {
-		return nil, fmt.Errorf("failed to update conversation timestamp: %w", err)
+	if err := s.updateConversationTimestamp(ctx, conversation, "m9n0o1p2-q3r4-5678-mnop-901234567890"); err != nil {
+		return nil, err
 	}
 
 	return createdItems, nil
@@ -217,7 +291,8 @@ func (s *ConversationService) GetConversationMiddleWare() gin.HandlerFunc {
 		user, ok := auth.GetUserFromContext(reqCtx)
 		if !ok {
 			reqCtx.AbortWithStatusJSON(http.StatusUnauthorized, responses.ErrorResponse{
-				Code: "f5742805-2c6e-45a8-b6a8-95091b9d46f0",
+				Code:  "01994c96-38fb-7426-9c45-37c8df6c757f",
+				Error: "user not found",
 			})
 			return
 		}
@@ -228,15 +303,16 @@ func (s *ConversationService) GetConversationMiddleWare() gin.HandlerFunc {
 
 		if err != nil {
 			reqCtx.AbortWithStatusJSON(http.StatusUnauthorized, responses.ErrorResponse{
-				Code:          "1fe94ab8-ba2c-4356-a446-f091c256e260",
-				ErrorInstance: err,
+				Code:  err.GetCode(),
+				Error: err.Error(),
 			})
 			return
 		}
 
 		if len(entities) == 0 {
 			reqCtx.AbortWithStatusJSON(http.StatusNotFound, responses.ErrorResponse{
-				Code: "e91636c2-fced-4a89-bf08-55309005365f",
+				Code:  "e91636c2-fced-4a89-bf08-55309005365f",
+				Error: "conversation not found",
 			})
 			return
 		}
@@ -268,7 +344,8 @@ func (s *ConversationService) GetConversationItemMiddleWare() gin.HandlerFunc {
 		conv, ok := GetConversationFromContext(reqCtx)
 		if !ok {
 			reqCtx.AbortWithStatusJSON(http.StatusNotFound, responses.ErrorResponse{
-				Code: "0f5c3304-bf46-45ce-8719-7c03a3485b37",
+				Code:  "0f5c3304-bf46-45ce-8719-7c03a3485b37",
+				Error: "conversation not found",
 			})
 			return
 		}
@@ -287,15 +364,16 @@ func (s *ConversationService) GetConversationItemMiddleWare() gin.HandlerFunc {
 
 		if err != nil {
 			reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-				Code:          "bff3c8bf-c259-46a1-8ff0-7c2b2dbfe1b2",
-				ErrorInstance: err,
+				Code:  err.GetCode(),
+				Error: err.Error(),
 			})
 			return
 		}
 
 		if len(entities) == 0 {
 			reqCtx.AbortWithStatusJSON(http.StatusNotFound, responses.ErrorResponse{
-				Code: "25647b40-4967-497e-9cbd-a85243ccef58",
+				Code:  "25647b40-4967-497e-9cbd-a85243ccef58",
+				Error: "conversation item not found",
 			})
 			return
 		}
diff --git a/apps/jan-api-gateway/application/app/domain/conversation/validation.go b/apps/jan-api-gateway/application/app/domain/conversation/validation.go
index f0b18f94..e4f2d10d 100644
--- a/apps/jan-api-gateway/application/app/domain/conversation/validation.go
+++ b/apps/jan-api-gateway/application/app/domain/conversation/validation.go
@@ -7,7 +7,6 @@ import (
 	"unicode/utf8"
 
 	"menlo.ai/jan-api-gateway/app/utils/idgen"
-	"menlo.ai/jan-api-gateway/app/utils/ptr"
 )
 
 // ValidationConfig holds conversation validation rules
@@ -74,18 +73,18 @@ func (v *ConversationValidator) ValidateConversationInput(title *string, metadat
 }
 
 // ValidateItemContent performs comprehensive content validation
-func (v *ConversationValidator) ValidateItemContent(content []Content) *string {
+func (v *ConversationValidator) ValidateItemContent(content []Content) error {
 	if len(content) == 0 {
-		return ptr.ToString("aa497939-edbb-416a-899c-a8acc387247e")
+		return fmt.Errorf("aa497939-edbb-416a-899c-a8acc387247e")
 	}
 
 	if len(content) > v.config.MaxContentBlocks {
-		return ptr.ToString("6dbdb6a2-72f0-430a-909c-9f8ca5dd3397")
+		return fmt.Errorf("6dbdb6a2-72f0-430a-909c-9f8ca5dd3397")
 	}
 
 	for _, c := range content {
 		if err := v.validateContentBlock(c); err != nil {
-			return ptr.ToString("c67847d7-9011-41c0-9a05-520c9c670a28")
+			return fmt.Errorf("c67847d7-9011-41c0-9a05-520c9c670a28")
 		}
 	}
 
diff --git a/apps/jan-api-gateway/application/app/domain/inference/inference_provider.go b/apps/jan-api-gateway/application/app/domain/inference/inference_provider.go
new file mode 100644
index 00000000..204be1ba
--- /dev/null
+++ b/apps/jan-api-gateway/application/app/domain/inference/inference_provider.go
@@ -0,0 +1,37 @@
+package inference
+
+import (
+	"context"
+	"io"
+
+	openai "github.com/sashabaranov/go-openai"
+)
+
+// InferenceProvider defines the interface for AI inference services
+type InferenceProvider interface {
+	// CreateCompletion creates a non-streaming chat completion
+	CreateCompletion(ctx context.Context, apiKey string, request openai.ChatCompletionRequest) (*openai.ChatCompletionResponse, error)
+
+	// CreateCompletionStream creates a streaming chat completion
+	CreateCompletionStream(ctx context.Context, apiKey string, request openai.ChatCompletionRequest) (io.ReadCloser, error)
+
+	// GetModels returns available models
+	GetModels(ctx context.Context) (*ModelsResponse, error)
+
+	// ValidateModel checks if a model is supported
+	ValidateModel(model string) error
+}
+
+// ModelsResponse represents the response from GetModels
+type ModelsResponse struct {
+	Object string  `json:"object"`
+	Data   []Model `json:"data"`
+}
+
+// Model represents an AI model
+type Model struct {
+	ID      string `json:"id"`
+	Object  string `json:"object"`
+	Created int    `json:"created"`
+	OwnedBy string `json:"owned_by"`
+}
diff --git a/apps/jan-api-gateway/application/app/domain/response/response.go b/apps/jan-api-gateway/application/app/domain/response/response.go
new file mode 100644
index 00000000..f6bae5d8
--- /dev/null
+++ b/apps/jan-api-gateway/application/app/domain/response/response.go
@@ -0,0 +1,312 @@
+package response
+
+import (
+	"context"
+	"encoding/json"
+	"time"
+
+	"menlo.ai/jan-api-gateway/app/domain/common"
+	"menlo.ai/jan-api-gateway/app/domain/conversation"
+	"menlo.ai/jan-api-gateway/app/domain/query"
+)
+
+// Response represents a model response stored in the database
+type Response struct {
+	ID                 uint
+	PublicID           string
+	UserID             uint
+	ConversationID     *uint
+	PreviousResponseID *string // Public ID of the previous response
+	Model              string
+	Status             ResponseStatus
+	Input              string  // JSON string of the input
+	Output             *string // JSON string of the output
+	SystemPrompt       *string
+	MaxTokens          *int
+	Temperature        *float64
+	TopP               *float64
+	TopK               *int
+	RepetitionPenalty  *float64
+	Seed               *int
+	Stop               *string // JSON string of stop sequences
+	PresencePenalty    *float64
+	FrequencyPenalty   *float64
+	LogitBias          *string // JSON string of logit bias
+	ResponseFormat     *string // JSON string of response format
+	Tools              *string // JSON string of tools
+	ToolChoice         *string // JSON string of tool choice
+	Metadata           *string // JSON string of metadata
+	Stream             *bool
+	Background         *bool
+	Timeout            *int
+	User               *string
+	Usage              *string // JSON string of usage statistics
+	Error              *string // JSON string of error details
+	CompletedAt        *time.Time
+	CancelledAt        *time.Time
+	FailedAt           *time.Time
+	CreatedAt          time.Time
+	UpdatedAt          time.Time
+	Items              []conversation.Item // Items that belong to this response
+}
+
+// ResponseStatus represents the status of a response
+type ResponseStatus string
+
+const (
+	ResponseStatusPending   ResponseStatus = "pending"
+	ResponseStatusRunning   ResponseStatus = "running"
+	ResponseStatusCompleted ResponseStatus = "completed"
+	ResponseStatusCancelled ResponseStatus = "cancelled"
+	ResponseStatusFailed    ResponseStatus = "failed"
+)
+
+// ResponseFilter represents filters for querying responses
+type ResponseFilter struct {
+	PublicID       *string
+	UserID         *uint
+	ConversationID *uint
+	Model          *string
+	Status         *ResponseStatus
+	CreatedAfter   *time.Time
+	CreatedBefore  *time.Time
+}
+
+// ResponseRepository defines the interface for response data operations
+type ResponseRepository interface {
+	Create(ctx context.Context, r *Response) error
+	Update(ctx context.Context, r *Response) error
+	DeleteByID(ctx context.Context, id uint) error
+	FindByID(ctx context.Context, id uint) (*Response, error)
+	FindByPublicID(ctx context.Context, publicID string) (*Response, error)
+	FindByFilter(ctx context.Context, filter ResponseFilter, pagination *query.Pagination) ([]*Response, error)
+	Count(ctx context.Context, filter ResponseFilter) (int64, error)
+	FindByUserID(ctx context.Context, userID uint, pagination *query.Pagination) ([]*Response, error)
+	FindByConversationID(ctx context.Context, conversationID uint, pagination *query.Pagination) ([]*Response, error)
+}
+
+// ResponseParams represents parameters for creating a response
+type ResponseParams struct {
+	MaxTokens         *int
+	Temperature       *float64
+	TopP              *float64
+	TopK              *int
+	RepetitionPenalty *float64
+	Seed              *int
+	Stop              []string
+	PresencePenalty   *float64
+	FrequencyPenalty  *float64
+	LogitBias         map[string]float64
+	ResponseFormat    any
+	Tools             any
+	ToolChoice        any
+	Metadata          map[string]any
+	Stream            *bool
+	Background        *bool
+	Timeout           *int
+	User              *string
+}
+
+// NewResponse creates a new Response object with the given parameters
+func NewResponse(userID uint, conversationID *uint, model, input string, systemPrompt *string, params *ResponseParams) *Response {
+	response := &Response{
+		UserID:         userID,
+		ConversationID: conversationID,
+		Model:          model,
+		Input:          input,
+		SystemPrompt:   systemPrompt,
+		Status:         ResponseStatusPending,
+	}
+
+	// Apply response parameters
+	if params != nil {
+		response.MaxTokens = params.MaxTokens
+		response.Temperature = params.Temperature
+		response.TopP = params.TopP
+		response.TopK = params.TopK
+		response.RepetitionPenalty = params.RepetitionPenalty
+		response.Seed = params.Seed
+		response.PresencePenalty = params.PresencePenalty
+		response.FrequencyPenalty = params.FrequencyPenalty
+		response.Stream = params.Stream
+		response.Background = params.Background
+		response.Timeout = params.Timeout
+		response.User = params.User
+
+		// Convert complex fields to JSON strings
+		if params.Stop != nil {
+			if stopJSON, err := json.Marshal(params.Stop); err == nil {
+				stopStr := string(stopJSON)
+				if stopStr != "[]" && stopStr != "{}" {
+					response.Stop = &stopStr
+				}
+			}
+		}
+
+		if params.LogitBias != nil {
+			if logitBiasJSON, err := json.Marshal(params.LogitBias); err == nil {
+				logitBiasStr := string(logitBiasJSON)
+				if logitBiasStr != "[]" && logitBiasStr != "{}" {
+					response.LogitBias = &logitBiasStr
+				}
+			}
+		}
+
+		if params.ResponseFormat != nil {
+			if responseFormatJSON, err := json.Marshal(params.ResponseFormat); err == nil {
+				responseFormatStr := string(responseFormatJSON)
+				if responseFormatStr != "[]" && responseFormatStr != "{}" {
+					response.ResponseFormat = &responseFormatStr
+				}
+			}
+		}
+
+		if params.Tools != nil {
+			if toolsJSON, err := json.Marshal(params.Tools); err == nil {
+				toolsStr := string(toolsJSON)
+				if toolsStr != "[]" && toolsStr != "{}" {
+					response.Tools = &toolsStr
+				}
+			}
+		}
+
+		if params.ToolChoice != nil {
+			if toolChoiceJSON, err := json.Marshal(params.ToolChoice); err == nil {
+				toolChoiceStr := string(toolChoiceJSON)
+				if toolChoiceStr != "[]" && toolChoiceStr != "{}" {
+					response.ToolChoice = &toolChoiceStr
+				}
+			}
+		}
+
+		if params.Metadata != nil {
+			if metadataJSON, err := json.Marshal(params.Metadata); err == nil {
+				metadataStr := string(metadataJSON)
+				if metadataStr != "[]" && metadataStr != "{}" {
+					response.Metadata = &metadataStr
+				}
+			}
+		}
+	}
+
+	return response
+}
+
+// ResponseUpdates represents multiple updates to be applied to a response
+type ResponseUpdates struct {
+	Status *string `json:"status,omitempty"`
+	Output any     `json:"output,omitempty"`
+	Usage  any     `json:"usage,omitempty"`
+	Error  any     `json:"error,omitempty"`
+}
+
+// ApplyResponseUpdates applies multiple updates to a response object (no DB access)
+func ApplyResponseUpdates(response *Response, updates *ResponseUpdates) *common.Error {
+	// Update status if provided
+	if updates.Status != nil {
+		UpdateResponseStatusOnObject(response, ResponseStatus(*updates.Status))
+	}
+
+	// Update output if provided
+	if updates.Output != nil {
+		if err := UpdateResponseOutputOnObject(response, updates.Output); err != nil {
+			return err
+		}
+	}
+
+	// Update usage if provided
+	if updates.Usage != nil {
+		if err := UpdateResponseUsageOnObject(response, updates.Usage); err != nil {
+			return err
+		}
+	}
+
+	// Update error if provided
+	if updates.Error != nil {
+		if err := UpdateResponseErrorOnObject(response, updates.Error); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
+// UpdateResponseStatusOnObject updates the status on a response object (no DB access)
+func UpdateResponseStatusOnObject(response *Response, status ResponseStatus) {
+	response.Status = status
+	response.UpdatedAt = time.Now()
+
+	// Set completion timestamps based on status
+	now := time.Now()
+	switch status {
+	case ResponseStatusCompleted:
+		response.CompletedAt = &now
+	case ResponseStatusCancelled:
+		response.CancelledAt = &now
+	case ResponseStatusFailed:
+		response.FailedAt = &now
+	}
+}
+
+// UpdateResponseOutputOnObject updates the output on a response object (no DB access)
+func UpdateResponseOutputOnObject(response *Response, output any) *common.Error {
+	// Convert output to JSON string
+	outputJSON, err := json.Marshal(output)
+	if err != nil {
+		return common.NewError(err, "s9t0u1v2-w3x4-5678-stuv-901234567890")
+	}
+
+	outputStr := string(outputJSON)
+	// For JSON columns, use null for empty arrays/objects
+	if outputStr == "[]" || outputStr == "{}" {
+		response.Output = nil
+	} else {
+		response.Output = &outputStr
+	}
+	response.UpdatedAt = time.Now()
+
+	return nil
+}
+
+// UpdateResponseUsageOnObject updates the usage statistics on a response object (no DB access)
+func UpdateResponseUsageOnObject(response *Response, usage any) *common.Error {
+	// Convert usage to JSON string
+	usageJSON, err := json.Marshal(usage)
+	if err != nil {
+		return common.NewError(err, "w3x4y5z6-a7b8-9012-wxyz-345678901234")
+	}
+
+	usageStr := string(usageJSON)
+	// For JSON columns, use null for empty arrays/objects
+	if usageStr == "[]" || usageStr == "{}" {
+		response.Usage = nil
+	} else {
+		response.Usage = &usageStr
+	}
+	response.UpdatedAt = time.Now()
+
+	return nil
+}
+
+// UpdateResponseErrorOnObject updates the error information on a response object (no DB access)
+func UpdateResponseErrorOnObject(response *Response, error any) *common.Error {
+	// Convert error to JSON string
+	errorJSON, err := json.Marshal(error)
+	if err != nil {
+		return common.NewError(err, "a7b8c9d0-e1f2-3456-abcd-789012345678")
+	}
+
+	errorStr := string(errorJSON)
+	// For JSON columns, use null for empty arrays/objects
+	if errorStr == "[]" || errorStr == "{}" {
+		response.Error = nil
+	} else {
+		response.Error = &errorStr
+	}
+	response.Status = ResponseStatusFailed
+	response.UpdatedAt = time.Now()
+	now := time.Now()
+	response.FailedAt = &now
+
+	return nil
+}
diff --git a/apps/jan-api-gateway/application/app/domain/response/response_model_nonstream_service.go b/apps/jan-api-gateway/application/app/domain/response/response_model_nonstream_service.go
new file mode 100644
index 00000000..cfdbfcc7
--- /dev/null
+++ b/apps/jan-api-gateway/application/app/domain/response/response_model_nonstream_service.go
@@ -0,0 +1,213 @@
+package response
+
+import (
+	"context"
+	"net/http"
+	"time"
+
+	"github.com/gin-gonic/gin"
+	openai "github.com/sashabaranov/go-openai"
+	"menlo.ai/jan-api-gateway/app/domain/common"
+	"menlo.ai/jan-api-gateway/app/domain/conversation"
+	requesttypes "menlo.ai/jan-api-gateway/app/interfaces/http/requests"
+	responsetypes "menlo.ai/jan-api-gateway/app/interfaces/http/responses"
+	janinference "menlo.ai/jan-api-gateway/app/utils/httpclients/jan_inference"
+	"menlo.ai/jan-api-gateway/app/utils/logger"
+	"menlo.ai/jan-api-gateway/app/utils/ptr"
+)
+
+const (
+	// DefaultTimeout is the default timeout for non-streaming requests
+	DefaultTimeout = 120 * time.Second
+)
+
+// NonStreamModelService handles non-streaming response requests
+type NonStreamModelService struct {
+	*ResponseModelService
+}
+
+// NewNonStreamModelService creates a new NonStreamModelService instance
+func NewNonStreamModelService(responseModelService *ResponseModelService) *NonStreamModelService {
+	return &NonStreamModelService{
+		ResponseModelService: responseModelService,
+	}
+}
+
+// CreateNonStreamResponse handles the business logic for creating a non-streaming response
+func (h *NonStreamModelService) CreateNonStreamResponseHandler(reqCtx *gin.Context, request *requesttypes.CreateResponseRequest, key string, conv *conversation.Conversation, responseEntity *Response, chatCompletionRequest *openai.ChatCompletionRequest) {
+
+	result, err := h.CreateNonStreamResponse(reqCtx, request, key, conv, responseEntity, chatCompletionRequest)
+	if err != nil {
+		reqCtx.AbortWithStatusJSON(
+			http.StatusBadRequest,
+			responsetypes.ErrorResponse{
+				Code:  err.GetCode(),
+				Error: err.Error(),
+			})
+		return
+	}
+
+	reqCtx.JSON(http.StatusOK, result)
+}
+
+// doCreateNonStreamResponse performs the business logic for creating a non-streaming response
+func (h *NonStreamModelService) CreateNonStreamResponse(reqCtx *gin.Context, request *requesttypes.CreateResponseRequest, key string, conv *conversation.Conversation, responseEntity *Response, chatCompletionRequest *openai.ChatCompletionRequest) (responsetypes.Response, *common.Error) {
+	// Process with Jan inference client for non-streaming with timeout
+	janInferenceClient := janinference.NewJanInferenceClient(reqCtx)
+	ctx, cancel := context.WithTimeout(reqCtx.Request.Context(), DefaultTimeout)
+	defer cancel()
+	chatResponse, err := janInferenceClient.CreateChatCompletion(ctx, key, *chatCompletionRequest)
+	if err != nil {
+		return responsetypes.Response{}, common.NewError(err, "bc82d69c-685b-4556-9d1f-2a4a80ae8ca4")
+	}
+
+	// Process reasoning content
+	var processedResponse *openai.ChatCompletionResponse = chatResponse
+
+	// Append assistant's response to conversation (only if conversation exists)
+	if conv != nil && len(processedResponse.Choices) > 0 && processedResponse.Choices[0].Message.Content != "" {
+		assistantMessage := openai.ChatCompletionMessage{
+			Role:    openai.ChatMessageRoleAssistant,
+			Content: processedResponse.Choices[0].Message.Content,
+		}
+		success, err := h.responseService.AppendMessagesToConversation(reqCtx, conv, []openai.ChatCompletionMessage{assistantMessage}, &responseEntity.ID)
+		if !success {
+			// Log error but don't fail the response
+			logger.GetLogger().Errorf("Failed to append assistant response to conversation: %s - %s", err.GetCode(), err.Error())
+		}
+	}
+
+	// Convert chat completion response to response format
+	responseData := h.convertFromChatCompletionResponse(processedResponse, request, conv, responseEntity)
+
+	// Update response with all fields at once (optimized to prevent N+1 queries)
+	updates := &ResponseUpdates{
+		Status: ptr.ToString(string(ResponseStatusCompleted)),
+		Output: responseData.Output,
+		Usage:  responseData.Usage,
+	}
+	success, updateErr := h.responseService.UpdateResponseFields(reqCtx, responseEntity.ID, updates)
+	if !success {
+		// Log error but don't fail the request since response is already generated
+		logger.GetLogger().Errorf("Failed to update response fields: %s - %s\n", updateErr.GetCode(), updateErr.Error())
+	}
+
+	return responseData, nil
+}
+
+// convertFromChatCompletionResponse converts a ChatCompletionResponse to a Response
+func (h *NonStreamModelService) convertFromChatCompletionResponse(chatResp *openai.ChatCompletionResponse, req *requesttypes.CreateResponseRequest, conv *conversation.Conversation, responseEntity *Response) responsetypes.Response {
+
+	// Extract the content and reasoning from the first choice
+	var outputText string
+	var reasoningContent string
+
+	if len(chatResp.Choices) > 0 {
+		choice := chatResp.Choices[0]
+		outputText = choice.Message.Content
+
+		// Extract reasoning content if present
+		if choice.Message.ReasoningContent != "" {
+			reasoningContent = choice.Message.ReasoningContent
+		}
+	}
+
+	// Convert input back to the original format for response
+	var responseInput any
+	switch v := req.Input.(type) {
+	case string:
+		responseInput = v
+	case []any:
+		responseInput = v
+	default:
+		responseInput = req.Input
+	}
+
+	// Create output using proper ResponseOutput structure
+	var output []responsetypes.ResponseOutput
+
+	// Add reasoning content if present
+	if reasoningContent != "" {
+		output = append(output, responsetypes.ResponseOutput{
+			Type: responsetypes.OutputTypeReasoning,
+			Reasoning: &responsetypes.ReasoningOutput{
+				Task:   "reasoning",
+				Result: reasoningContent,
+				Steps:  []responsetypes.ReasoningStep{},
+			},
+		})
+	}
+
+	// Add text content if present
+	if outputText != "" {
+		output = append(output, responsetypes.ResponseOutput{
+			Type: responsetypes.OutputTypeText,
+			Text: &responsetypes.TextOutput{
+				Value:       outputText,
+				Annotations: []responsetypes.Annotation{},
+			},
+		})
+	}
+
+	// Create usage information using proper DetailedUsage struct
+	usage := &responsetypes.DetailedUsage{
+		InputTokens:  chatResp.Usage.PromptTokens,
+		OutputTokens: chatResp.Usage.CompletionTokens,
+		TotalTokens:  chatResp.Usage.TotalTokens,
+		InputTokensDetails: &responsetypes.TokenDetails{
+			CachedTokens: 0,
+		},
+		OutputTokensDetails: &responsetypes.TokenDetails{
+			ReasoningTokens: 0,
+		},
+	}
+
+	// Create conversation info
+	var conversationInfo *responsetypes.ConversationInfo
+	if conv != nil {
+		conversationInfo = &responsetypes.ConversationInfo{
+			ID: conv.PublicID,
+		}
+	}
+
+	response := responsetypes.Response{
+		ID:           responseEntity.PublicID,
+		Object:       "response",
+		Created:      chatResp.Created,
+		Model:        chatResp.Model,
+		Status:       responsetypes.ResponseStatusCompleted,
+		Input:        responseInput,
+		Output:       output,
+		Usage:        usage,
+		Conversation: conversationInfo,
+		// Add other OpenAI response fields
+		Error:              nil,
+		IncompleteDetails:  nil,
+		Instructions:       nil,
+		MaxOutputTokens:    req.MaxTokens,
+		ParallelToolCalls:  false,
+		PreviousResponseID: nil,
+		Reasoning: &responsetypes.Reasoning{
+			Effort: nil,
+			Summary: func() *string {
+				if reasoningContent != "" {
+					return &reasoningContent
+				}
+				return nil
+			}(),
+		},
+		Store:       true,
+		Temperature: req.Temperature,
+		Text: &responsetypes.TextFormat{
+			Format: &responsetypes.FormatType{
+				Type: "text",
+			},
+		},
+		TopP:       req.TopP,
+		Truncation: "disabled",
+		User:       nil,
+		Metadata:   req.Metadata,
+	}
+
+	return response
+}
diff --git a/apps/jan-api-gateway/application/app/domain/response/response_model_service.go b/apps/jan-api-gateway/application/app/domain/response/response_model_service.go
new file mode 100644
index 00000000..970c6383
--- /dev/null
+++ b/apps/jan-api-gateway/application/app/domain/response/response_model_service.go
@@ -0,0 +1,397 @@
+package response
+
+import (
+	"context"
+	"encoding/json"
+	"net/http"
+	"strconv"
+	"time"
+
+	"github.com/gin-gonic/gin"
+	openai "github.com/sashabaranov/go-openai"
+	"menlo.ai/jan-api-gateway/app/domain/apikey"
+	"menlo.ai/jan-api-gateway/app/domain/auth"
+	"menlo.ai/jan-api-gateway/app/domain/common"
+	"menlo.ai/jan-api-gateway/app/domain/conversation"
+	inferencemodelregistry "menlo.ai/jan-api-gateway/app/domain/inference_model_registry"
+	"menlo.ai/jan-api-gateway/app/domain/user"
+	requesttypes "menlo.ai/jan-api-gateway/app/interfaces/http/requests"
+	responsetypes "menlo.ai/jan-api-gateway/app/interfaces/http/responses"
+	janinference "menlo.ai/jan-api-gateway/app/utils/httpclients/jan_inference"
+	"menlo.ai/jan-api-gateway/app/utils/ptr"
+)
+
+// ResponseCreationResult represents the result of creating a response
+type ResponseCreationResult struct {
+	Response              *Response
+	Conversation          *conversation.Conversation
+	ChatCompletionRequest *openai.ChatCompletionRequest
+	APIKey                string
+	IsStreaming           bool
+}
+
+// ResponseModelService handles the business logic for response API endpoints
+type ResponseModelService struct {
+	UserService           *user.UserService
+	authService           *auth.AuthService
+	apikeyService         *apikey.ApiKeyService
+	conversationService   *conversation.ConversationService
+	responseService       *ResponseService
+	streamModelService    *StreamModelService
+	nonStreamModelService *NonStreamModelService
+}
+
+// NewResponseModelService creates a new ResponseModelService instance
+func NewResponseModelService(
+	userService *user.UserService,
+	authService *auth.AuthService,
+	apikeyService *apikey.ApiKeyService,
+	conversationService *conversation.ConversationService,
+	responseService *ResponseService,
+) *ResponseModelService {
+	responseModelService := &ResponseModelService{
+		UserService:         userService,
+		authService:         authService,
+		apikeyService:       apikeyService,
+		conversationService: conversationService,
+		responseService:     responseService,
+	}
+
+	// Initialize specialized handlers
+	responseModelService.streamModelService = NewStreamModelService(responseModelService)
+	responseModelService.nonStreamModelService = NewNonStreamModelService(responseModelService)
+
+	return responseModelService
+}
+
+// CreateResponse handles the business logic for creating a response
+// Returns domain objects and business logic results, no HTTP concerns
+func (h *ResponseModelService) CreateResponse(ctx context.Context, userID uint, request *requesttypes.CreateResponseRequest) (*ResponseCreationResult, *common.Error) {
+	// Validate the request
+	success, err := ValidateCreateResponseRequest(request)
+	if !success {
+		return nil, err
+	}
+
+	// TODO add the logic to get the API key for the user
+	key := ""
+
+	// Check if model exists in registry
+	modelRegistry := inferencemodelregistry.GetInstance()
+	mToE := modelRegistry.GetModelToEndpoints()
+	endpoints, ok := mToE[request.Model]
+	if !ok {
+		return nil, common.NewErrorWithMessage("Model validation error", "h8i9j0k1-l2m3-4567-hijk-890123456789")
+	}
+
+	// Convert response request to chat completion request using domain service
+	chatCompletionRequest := h.responseService.ConvertToChatCompletionRequest(request)
+	if chatCompletionRequest == nil {
+		return nil, common.NewErrorWithMessage("Input validation error", "i9j0k1l2-m3n4-5678-ijkl-901234567890")
+	}
+
+	// Check if model endpoint exists
+	janInferenceClient := janinference.NewJanInferenceClient(ctx)
+	endpointExists := false
+	for _, endpoint := range endpoints {
+		if endpoint == janInferenceClient.BaseURL {
+			endpointExists = true
+			break
+		}
+	}
+
+	if !endpointExists {
+		return nil, common.NewErrorWithMessage("Model validation error", "h8i9j0k1-l2m3-4567-hijk-890123456789")
+	}
+
+	// Handle conversation logic using domain service
+	conversation, err := h.responseService.HandleConversation(ctx, userID, request)
+	if err != nil {
+		return nil, err
+	}
+
+	// If previous_response_id is provided, prepend conversation history to input messages
+	if request.PreviousResponseID != nil && *request.PreviousResponseID != "" {
+		conversationMessages, err := h.responseService.ConvertConversationItemsToMessages(ctx, conversation)
+		if err != nil {
+			return nil, err
+		}
+		// Prepend conversation history to the input messages
+		chatCompletionRequest.Messages = append(conversationMessages, chatCompletionRequest.Messages...)
+	}
+
+	// Create response parameters
+	responseParams := &ResponseParams{
+		MaxTokens:         request.MaxTokens,
+		Temperature:       request.Temperature,
+		TopP:              request.TopP,
+		TopK:              request.TopK,
+		RepetitionPenalty: request.RepetitionPenalty,
+		Seed:              request.Seed,
+		Stop:              request.Stop,
+		PresencePenalty:   request.PresencePenalty,
+		FrequencyPenalty:  request.FrequencyPenalty,
+		LogitBias:         request.LogitBias,
+		ResponseFormat:    request.ResponseFormat,
+		Metadata:          request.Metadata,
+		Stream:            request.Stream,
+		Background:        request.Background,
+		Timeout:           request.Timeout,
+		User:              request.User,
+	}
+
+	// Create response record in database
+	var conversationID *uint
+	if conversation != nil {
+		conversationID = &conversation.ID
+	}
+
+	// Convert input to JSON string
+	inputJSON, jsonErr := json.Marshal(request.Input)
+	if jsonErr != nil {
+		return nil, common.NewError(jsonErr, "a1b2c3d4-e5f6-7890-abcd-ef1234567890")
+	}
+
+	// Build Response object from parameters
+	response := NewResponse(userID, conversationID, request.Model, string(inputJSON), request.SystemPrompt, responseParams)
+
+	responseEntity, err := h.responseService.CreateResponse(ctx, response)
+	if err != nil {
+		return nil, err
+	}
+
+	// Append input messages to conversation (only if conversation exists)
+	if conversation != nil {
+		success, err := h.responseService.AppendMessagesToConversation(ctx, conversation, chatCompletionRequest.Messages, &responseEntity.ID)
+		if !success {
+			return nil, err
+		}
+	}
+
+	// Return the result for the interface layer to handle
+	isStreaming := request.Stream != nil && *request.Stream
+	return &ResponseCreationResult{
+		Response:              responseEntity,
+		Conversation:          conversation,
+		ChatCompletionRequest: chatCompletionRequest,
+		APIKey:                key,
+		IsStreaming:           isStreaming,
+	}, nil
+}
+
+// handleConversation handles conversation creation or loading based on the request
+
+// GetResponse handles the business logic for getting a response
+func (h *ResponseModelService) GetResponseHandler(reqCtx *gin.Context) {
+	// Get response from middleware context
+	responseEntity, ok := GetResponseFromContext(reqCtx)
+	if !ok {
+		h.sendErrorResponse(reqCtx, http.StatusBadRequest, "a1b2c3d4-e5f6-7890-abcd-ef1234567890", "response not found in context")
+		return
+	}
+
+	result, err := h.GetResponse(responseEntity)
+	if err != nil {
+		h.sendErrorResponse(reqCtx, http.StatusBadRequest, err.GetCode(), err.Error())
+		return
+	}
+
+	h.sendSuccessResponse(reqCtx, result)
+}
+
+// doGetResponse performs the business logic for getting a response
+func (h *ResponseModelService) GetResponse(responseEntity *Response) (responsetypes.Response, *common.Error) {
+	// Convert domain response to API response using domain service
+	apiResponse := h.responseService.ConvertDomainResponseToAPIResponse(responseEntity)
+	return apiResponse, nil
+}
+
+// DeleteResponse handles the business logic for deleting a response
+func (h *ResponseModelService) DeleteResponseHandler(reqCtx *gin.Context) {
+	// Get response from middleware context
+	responseEntity, ok := GetResponseFromContext(reqCtx)
+	if !ok {
+		h.sendErrorResponse(reqCtx, http.StatusBadRequest, "b2c3d4e5-f6g7-8901-bcde-f23456789012", "response not found in context")
+		return
+	}
+
+	result, err := h.DeleteResponse(reqCtx, responseEntity)
+	if err != nil {
+		h.sendErrorResponse(reqCtx, http.StatusBadRequest, err.GetCode(), err.Error())
+		return
+	}
+
+	h.sendSuccessResponse(reqCtx, result)
+}
+
+// doDeleteResponse performs the business logic for deleting a response
+func (h *ResponseModelService) DeleteResponse(reqCtx *gin.Context, responseEntity *Response) (responsetypes.Response, *common.Error) {
+	// Delete the response from database
+	success, err := h.responseService.DeleteResponse(reqCtx, responseEntity.ID)
+	if !success {
+		return responsetypes.Response{}, err
+	}
+
+	// Return the deleted response data
+	deletedResponse := responsetypes.Response{
+		ID:          responseEntity.PublicID,
+		Object:      "response",
+		Created:     responseEntity.CreatedAt.Unix(),
+		Model:       responseEntity.Model,
+		Status:      responsetypes.ResponseStatusCancelled,
+		CancelledAt: ptr.ToInt64(time.Now().Unix()),
+	}
+
+	return deletedResponse, nil
+}
+
+// CancelResponse handles the business logic for cancelling a response
+func (h *ResponseModelService) CancelResponseHandler(reqCtx *gin.Context) {
+	// Get response from middleware context
+	responseEntity, ok := GetResponseFromContext(reqCtx)
+	if !ok {
+		h.sendErrorResponse(reqCtx, http.StatusBadRequest, "d4e5f6g7-h8i9-0123-defg-456789012345", "response not found in context")
+		return
+	}
+
+	result, err := h.CancelResponse(responseEntity)
+	if err != nil {
+		h.sendErrorResponse(reqCtx, http.StatusBadRequest, err.GetCode(), err.Error())
+		return
+	}
+
+	h.sendSuccessResponse(reqCtx, result)
+}
+
+// doCancelResponse performs the business logic for cancelling a response
+func (h *ResponseModelService) CancelResponse(responseEntity *Response) (responsetypes.Response, *common.Error) {
+	// TODO: Implement actual cancellation logic
+	// For now, return the response with cancelled status
+	mockResponse := responsetypes.Response{
+		ID:          responseEntity.PublicID,
+		Object:      "response",
+		Created:     responseEntity.CreatedAt.Unix(),
+		Model:       responseEntity.Model,
+		Status:      responsetypes.ResponseStatusCancelled,
+		CancelledAt: ptr.ToInt64(time.Now().Unix()),
+	}
+
+	return mockResponse, nil
+}
+
+// ListInputItems handles the business logic for listing input items
+func (h *ResponseModelService) ListInputItemsHandler(reqCtx *gin.Context) {
+	// Get response from middleware context
+	responseEntity, ok := GetResponseFromContext(reqCtx)
+	if !ok {
+		h.sendErrorResponse(reqCtx, http.StatusBadRequest, "e5f6g7h8-i9j0-1234-efgh-567890123456", "response not found in context")
+		return
+	}
+
+	result, err := h.ListInputItems(reqCtx, responseEntity)
+	if err != nil {
+		h.sendErrorResponse(reqCtx, http.StatusBadRequest, err.GetCode(), err.Error())
+		return
+	}
+
+	reqCtx.JSON(http.StatusOK, result)
+}
+
+// doListInputItems performs the business logic for listing input items
+func (h *ResponseModelService) ListInputItems(reqCtx *gin.Context, responseEntity *Response) (responsetypes.OpenAIListResponse[responsetypes.InputItem], *common.Error) {
+	// Parse pagination parameters
+	limit := 20 // default limit
+	if limitStr := reqCtx.Query("limit"); limitStr != "" {
+		if parsedLimit, err := strconv.Atoi(limitStr); err == nil && parsedLimit > 0 && parsedLimit <= 100 {
+			limit = parsedLimit
+		}
+	}
+
+	// Get input items for the response (only user role messages)
+	userRole := conversation.ItemRole("user")
+	items, err := h.responseService.GetItemsForResponse(reqCtx, responseEntity.ID, &userRole)
+	if err != nil {
+		return responsetypes.OpenAIListResponse[responsetypes.InputItem]{}, err
+	}
+
+	// Convert conversation items to input items using domain service
+	inputItems := make([]responsetypes.InputItem, 0, len(items))
+	for _, item := range items {
+		inputItem := h.responseService.ConvertConversationItemToInputItem(item)
+		inputItems = append(inputItems, inputItem)
+	}
+
+	// Apply pagination (simple implementation - in production you'd want cursor-based pagination)
+	after := reqCtx.Query("after")
+	before := reqCtx.Query("before")
+
+	var paginatedItems []responsetypes.InputItem
+	var hasMore bool
+
+	if after != "" {
+		// Find items after the specified ID
+		found := false
+		for _, item := range inputItems {
+			if found {
+				paginatedItems = append(paginatedItems, item)
+				if len(paginatedItems) >= limit {
+					break
+				}
+			}
+			if item.ID == after {
+				found = true
+			}
+		}
+	} else if before != "" {
+		// Find items before the specified ID
+		for _, item := range inputItems {
+			if item.ID == before {
+				break
+			}
+			paginatedItems = append(paginatedItems, item)
+			if len(paginatedItems) >= limit {
+				break
+			}
+		}
+	} else {
+		// No pagination, return first N items
+		if len(inputItems) > limit {
+			paginatedItems = inputItems[:limit]
+			hasMore = true
+		} else {
+			paginatedItems = inputItems
+		}
+	}
+
+	// Set pagination metadata
+	var firstID, lastID *string
+	if len(paginatedItems) > 0 {
+		firstID = &paginatedItems[0].ID
+		lastID = &paginatedItems[len(paginatedItems)-1].ID
+	}
+
+	status := responsetypes.ResponseCodeOk
+	objectType := responsetypes.ObjectTypeList
+
+	return responsetypes.OpenAIListResponse[responsetypes.InputItem]{
+		JanStatus: &status,
+		Object:    &objectType,
+		HasMore:   &hasMore,
+		FirstID:   firstID,
+		LastID:    lastID,
+		T:         paginatedItems,
+	}, nil
+}
+
+// sendErrorResponse sends a standardized error response
+func (h *ResponseModelService) sendErrorResponse(reqCtx *gin.Context, statusCode int, errorCode, errorMessage string) {
+	reqCtx.AbortWithStatusJSON(statusCode, responsetypes.ErrorResponse{
+		Code:  errorCode,
+		Error: errorMessage,
+	})
+}
+
+// sendSuccessResponse sends a standardized success response
+func (h *ResponseModelService) sendSuccessResponse(reqCtx *gin.Context, data any) {
+	reqCtx.JSON(http.StatusOK, data.(responsetypes.Response))
+}
diff --git a/apps/jan-api-gateway/application/app/domain/response/response_model_stream_service.go b/apps/jan-api-gateway/application/app/domain/response/response_model_stream_service.go
new file mode 100644
index 00000000..4545765a
--- /dev/null
+++ b/apps/jan-api-gateway/application/app/domain/response/response_model_stream_service.go
@@ -0,0 +1,765 @@
+package response
+
+import (
+	"bufio"
+	"context"
+	"encoding/json"
+	"fmt"
+	"net/http"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/gin-gonic/gin"
+	openai "github.com/sashabaranov/go-openai"
+	"menlo.ai/jan-api-gateway/app/domain/common"
+	"menlo.ai/jan-api-gateway/app/domain/conversation"
+	requesttypes "menlo.ai/jan-api-gateway/app/interfaces/http/requests"
+	responsetypes "menlo.ai/jan-api-gateway/app/interfaces/http/responses"
+	janinference "menlo.ai/jan-api-gateway/app/utils/httpclients/jan_inference"
+	"menlo.ai/jan-api-gateway/app/utils/idgen"
+	"menlo.ai/jan-api-gateway/app/utils/logger"
+	"menlo.ai/jan-api-gateway/app/utils/ptr"
+)
+
+// StreamModelService handles streaming response requests
+type StreamModelService struct {
+	*ResponseModelService
+}
+
+// NewStreamModelService creates a new StreamModelService instance
+func NewStreamModelService(responseModelService *ResponseModelService) *StreamModelService {
+	return &StreamModelService{
+		ResponseModelService: responseModelService,
+	}
+}
+
+// Constants for streaming configuration
+const (
+	RequestTimeout    = 120 * time.Second
+	MinWordsPerChunk  = 6
+	DataPrefix        = "data: "
+	DoneMarker        = "[DONE]"
+	SSEEventFormat    = "event: %s\ndata: %s\n\n"
+	SSEDataFormat     = "data: %s\n\n"
+	ChannelBufferSize = 100
+	ErrorBufferSize   = 10
+)
+
+// validateRequest validates the incoming request
+func (h *StreamModelService) validateRequest(request *requesttypes.CreateResponseRequest) (bool, *common.Error) {
+	if request.Model == "" {
+		return false, common.NewErrorWithMessage("Model is required", "a1b2c3d4-e5f6-7890-abcd-ef1234567890")
+	}
+	if request.Input == nil {
+		return false, common.NewErrorWithMessage("Input is required", "b2c3d4e5-f6g7-8901-bcde-f23456789012")
+	}
+	return true, nil
+}
+
+// checkContextCancellation checks if context was cancelled and sends error to channel
+func (h *StreamModelService) checkContextCancellation(ctx context.Context, errChan chan<- error) bool {
+	select {
+	case <-ctx.Done():
+		errChan <- ctx.Err()
+		return true
+	default:
+		return false
+	}
+}
+
+// marshalAndSendEvent marshals data and sends it to the data channel with proper error handling
+func (h *StreamModelService) marshalAndSendEvent(dataChan chan<- string, eventType string, data any) {
+	eventJSON, err := json.Marshal(data)
+	if err != nil {
+		logger.GetLogger().Errorf("Failed to marshal event: %v", err)
+		return
+	}
+	dataChan <- fmt.Sprintf(SSEEventFormat, eventType, string(eventJSON))
+}
+
+// logStreamingMetrics logs streaming completion metrics
+func (h *StreamModelService) logStreamingMetrics(responseID string, startTime time.Time, wordCount int) {
+	duration := time.Since(startTime)
+	logger.GetLogger().Infof("Streaming completed - ID: %s, Duration: %v, Words: %d",
+		responseID, duration, wordCount)
+}
+
+// createTextDeltaEvent creates a text delta event
+func (h *StreamModelService) createTextDeltaEvent(itemID string, sequenceNumber int, delta string) responsetypes.ResponseOutputTextDeltaEvent {
+	return responsetypes.ResponseOutputTextDeltaEvent{
+		BaseStreamingEvent: responsetypes.BaseStreamingEvent{
+			Type:           "response.output_text.delta",
+			SequenceNumber: sequenceNumber,
+		},
+		ItemID:       itemID,
+		OutputIndex:  0,
+		ContentIndex: 0,
+		Delta:        delta,
+		Logprobs:     []responsetypes.Logprob{},
+		Obfuscation:  fmt.Sprintf("%x", time.Now().UnixNano())[:10], // Simple obfuscation
+	}
+}
+
+// CreateStreamResponse handles the business logic for creating a streaming response
+func (h *StreamModelService) CreateStreamResponse(reqCtx *gin.Context, request *requesttypes.CreateResponseRequest, key string, conv *conversation.Conversation, responseEntity *Response, chatCompletionRequest *openai.ChatCompletionRequest) {
+	// Validate request
+	success, err := h.validateRequest(request)
+	if !success {
+		reqCtx.JSON(http.StatusBadRequest, responsetypes.ErrorResponse{
+			Code:  err.GetCode(),
+			Error: err.GetMessage(),
+		})
+		return
+	}
+
+	// Add timeout context
+	ctx, cancel := context.WithTimeout(reqCtx.Request.Context(), RequestTimeout)
+	defer cancel()
+
+	// Use ctx for long-running operations
+	reqCtx.Request = reqCtx.Request.WithContext(ctx)
+
+	// Set up streaming headers (matching completion API format)
+	reqCtx.Header("Content-Type", "text/event-stream")
+	reqCtx.Header("Cache-Control", "no-cache")
+	reqCtx.Header("Connection", "keep-alive")
+	reqCtx.Header("Access-Control-Allow-Origin", "*")
+	reqCtx.Header("Access-Control-Allow-Headers", "Cache-Control")
+
+	// Use the public ID from the response entity
+	responseID := responseEntity.PublicID
+
+	// Create conversation info
+	var conversationInfo *responsetypes.ConversationInfo
+	if conv != nil {
+		conversationInfo = &responsetypes.ConversationInfo{
+			ID: conv.PublicID,
+		}
+	}
+
+	// Convert input back to the original format for response
+	var responseInput any
+	switch v := request.Input.(type) {
+	case string:
+		responseInput = v
+	case []any:
+		responseInput = v
+	default:
+		responseInput = request.Input
+	}
+
+	// Create initial response object
+	response := responsetypes.Response{
+		ID:           responseID,
+		Object:       "response",
+		Created:      time.Now().Unix(),
+		Model:        request.Model,
+		Status:       responsetypes.ResponseStatusRunning,
+		Input:        responseInput,
+		Conversation: conversationInfo,
+		Stream:       ptr.ToBool(true),
+		Temperature:  request.Temperature,
+		TopP:         request.TopP,
+		MaxTokens:    request.MaxTokens,
+		Metadata:     request.Metadata,
+	}
+
+	// Emit response.created event
+	h.emitStreamEvent(reqCtx, "response.created", responsetypes.ResponseCreatedEvent{
+		BaseStreamingEvent: responsetypes.BaseStreamingEvent{
+			Type:           "response.created",
+			SequenceNumber: 0,
+		},
+		Response: response,
+	})
+
+	// Note: User messages are already added to conversation by the main response handler
+	// No need to add them again here to avoid duplication
+
+	// Process with Jan inference client for streaming
+	janInferenceClient := janinference.NewJanInferenceClient(reqCtx)
+	streamErr := h.processStreamingResponse(reqCtx, janInferenceClient, key, *chatCompletionRequest, responseID, conv)
+	if streamErr != nil {
+		// Check if context was cancelled (timeout)
+		if reqCtx.Request.Context().Err() == context.DeadlineExceeded {
+			h.emitStreamEvent(reqCtx, "response.error", responsetypes.ResponseErrorEvent{
+				Event:      "response.error",
+				Created:    time.Now().Unix(),
+				ResponseID: responseID,
+				Data: responsetypes.ResponseError{
+					Code: "a1b2c3d4-e5f6-7890-abcd-ef1234567890",
+				},
+			})
+		} else if reqCtx.Request.Context().Err() == context.Canceled {
+			h.emitStreamEvent(reqCtx, "response.error", responsetypes.ResponseErrorEvent{
+				Event:      "response.error",
+				Created:    time.Now().Unix(),
+				ResponseID: responseID,
+				Data: responsetypes.ResponseError{
+					Code: "b2c3d4e5-f6g7-8901-bcde-f23456789012",
+				},
+			})
+		} else {
+			h.emitStreamEvent(reqCtx, "response.error", responsetypes.ResponseErrorEvent{
+				Event:      "response.error",
+				Created:    time.Now().Unix(),
+				ResponseID: responseID,
+				Data: responsetypes.ResponseError{
+					Code: "c3af973c-eada-4e8b-96d9-e92546588cd3",
+				},
+			})
+		}
+		return
+	}
+
+	// Emit response.completed event
+	response.Status = responsetypes.ResponseStatusCompleted
+	h.emitStreamEvent(reqCtx, "response.completed", responsetypes.ResponseCompletedEvent{
+		BaseStreamingEvent: responsetypes.BaseStreamingEvent{
+			Type:           "response.completed",
+			SequenceNumber: 9999, // High number to indicate completion
+		},
+		Response: response,
+	})
+}
+
+// emitStreamEvent emits a streaming event (matching completion API SSE format)
+func (h *StreamModelService) emitStreamEvent(reqCtx *gin.Context, eventType string, data any) {
+	// Marshal the data directly without wrapping
+	eventJSON, err := json.Marshal(data)
+	if err != nil {
+		logger.GetLogger().Errorf("Failed to marshal streaming event: %v", err)
+		return
+	}
+
+	// Use proper SSE format
+	reqCtx.Writer.Write([]byte(fmt.Sprintf(SSEEventFormat, eventType, string(eventJSON))))
+	reqCtx.Writer.Flush()
+}
+
+// processStreamingResponse processes the streaming response from Jan inference using two channels
+func (h *StreamModelService) processStreamingResponse(reqCtx *gin.Context, _ *janinference.JanInferenceClient, _ string, request openai.ChatCompletionRequest, responseID string, conv *conversation.Conversation) error {
+	// Create buffered channels for data and errors
+	dataChan := make(chan string, ChannelBufferSize)
+	errChan := make(chan error, ErrorBufferSize)
+
+	var wg sync.WaitGroup
+	wg.Add(1)
+
+	// Start streaming in a goroutine
+	go h.streamResponseToChannel(reqCtx, request, dataChan, errChan, responseID, conv, &wg)
+
+	// Wait for streaming to complete and close channels
+	go func() {
+		wg.Wait()
+		close(dataChan)
+		close(errChan)
+	}()
+
+	// Process data and errors from channels
+	for {
+		select {
+		case line, ok := <-dataChan:
+			if !ok {
+				return nil
+			}
+			_, err := reqCtx.Writer.Write([]byte(line))
+			if err != nil {
+				reqCtx.AbortWithStatusJSON(
+					http.StatusBadRequest,
+					responsetypes.ErrorResponse{
+						Code: "bc82d69c-685b-4556-9d1f-2a4a80ae8ca4",
+					})
+				return err
+			}
+			reqCtx.Writer.Flush()
+		case err := <-errChan:
+			if err != nil {
+				reqCtx.AbortWithStatusJSON(
+					http.StatusBadRequest,
+					responsetypes.ErrorResponse{
+						Code: "bc82d69c-685b-4556-9d1f-2a4a80ae8ca4",
+					})
+				return err
+			}
+		}
+	}
+}
+
+// OpenAIStreamData represents the structure of OpenAI streaming data
+type OpenAIStreamData struct {
+	Choices []struct {
+		Delta struct {
+			Content          string `json:"content"`
+			ReasoningContent string `json:"reasoning_content"`
+		} `json:"delta"`
+	} `json:"choices"`
+}
+
+// parseOpenAIStreamData parses OpenAI streaming data and extracts content
+func (h *StreamModelService) parseOpenAIStreamData(jsonStr string) string {
+	var data OpenAIStreamData
+	if err := json.Unmarshal([]byte(jsonStr), &data); err != nil {
+		return ""
+	}
+
+	// Check if choices array is empty to prevent panic
+	if len(data.Choices) == 0 {
+		return ""
+	}
+
+	// Use reasoning_content if content is empty (jan-v1-4b model format)
+	content := data.Choices[0].Delta.Content
+	if content == "" {
+		content = data.Choices[0].Delta.ReasoningContent
+	}
+
+	return content
+}
+
+// extractContentFromOpenAIStream extracts content from OpenAI streaming format
+func (h *StreamModelService) extractContentFromOpenAIStream(chunk string) string {
+	// Format 1: data: {"choices":[{"delta":{"content":"chunk"}}]}
+	if len(chunk) >= 6 && chunk[:6] == DataPrefix {
+		return h.parseOpenAIStreamData(chunk[6:])
+	}
+
+	// Format 2: Direct JSON without "data: " prefix
+	if content := h.parseOpenAIStreamData(chunk); content != "" {
+		return content
+	}
+
+	// Format 3: Simple content string (fallback)
+	if len(chunk) > 0 && chunk[0] == '"' && chunk[len(chunk)-1] == '"' {
+		var content string
+		if err := json.Unmarshal([]byte(chunk), &content); err == nil {
+			return content
+		}
+	}
+
+	return ""
+}
+
+// extractReasoningContentFromOpenAIStream extracts reasoning content from OpenAI streaming format
+func (h *StreamModelService) extractReasoningContentFromOpenAIStream(chunk string) string {
+	// Format 1: data: {"choices":[{"delta":{"reasoning_content":"chunk"}}]}
+	if len(chunk) >= 6 && chunk[:6] == DataPrefix {
+		return h.parseOpenAIStreamReasoningData(chunk[6:])
+	}
+
+	// Format 2: Direct JSON without "data: " prefix
+	if reasoningContent := h.parseOpenAIStreamReasoningData(chunk); reasoningContent != "" {
+		return reasoningContent
+	}
+
+	return ""
+}
+
+// parseOpenAIStreamReasoningData parses OpenAI streaming data and extracts reasoning content
+func (h *StreamModelService) parseOpenAIStreamReasoningData(jsonStr string) string {
+	var data OpenAIStreamData
+	if err := json.Unmarshal([]byte(jsonStr), &data); err != nil {
+		return ""
+	}
+
+	// Check if choices array is empty to prevent panic
+	if len(data.Choices) == 0 {
+		return ""
+	}
+
+	// Extract reasoning content
+	return data.Choices[0].Delta.ReasoningContent
+}
+
+// streamResponseToChannel handles the streaming response and sends data/errors to channels
+func (h *StreamModelService) streamResponseToChannel(reqCtx *gin.Context, request openai.ChatCompletionRequest, dataChan chan<- string, errChan chan<- error, responseID string, conv *conversation.Conversation, wg *sync.WaitGroup) {
+	defer wg.Done()
+
+	startTime := time.Now()
+
+	// Generate item ID for the message
+	itemID, _ := idgen.GenerateSecureID("msg", 42)
+	sequenceNumber := 1
+
+	// Emit response.in_progress event
+	inProgressEvent := responsetypes.ResponseInProgressEvent{
+		BaseStreamingEvent: responsetypes.BaseStreamingEvent{
+			Type:           "response.in_progress",
+			SequenceNumber: sequenceNumber,
+		},
+		Response: map[string]any{
+			"id":     responseID,
+			"status": "in_progress",
+		},
+	}
+	eventJSON, _ := json.Marshal(inProgressEvent)
+	dataChan <- fmt.Sprintf("event: response.in_progress\ndata: %s\n\n", string(eventJSON))
+	sequenceNumber++
+
+	// Emit response.output_item.added event
+	outputItemAddedEvent := responsetypes.ResponseOutputItemAddedEvent{
+		BaseStreamingEvent: responsetypes.BaseStreamingEvent{
+			Type:           "response.output_item.added",
+			SequenceNumber: sequenceNumber,
+		},
+		OutputIndex: 0,
+		Item: responsetypes.ResponseOutputItem{
+			ID:      itemID,
+			Type:    "message",
+			Status:  string(conversation.ItemStatusInProgress),
+			Content: []responsetypes.ResponseContentPart{},
+			Role:    "assistant",
+		},
+	}
+	eventJSON, _ = json.Marshal(outputItemAddedEvent)
+	dataChan <- fmt.Sprintf("event: response.output_item.added\ndata: %s\n\n", string(eventJSON))
+	sequenceNumber++
+
+	// Emit response.content_part.added event
+	contentPartAddedEvent := responsetypes.ResponseContentPartAddedEvent{
+		BaseStreamingEvent: responsetypes.BaseStreamingEvent{
+			Type:           "response.content_part.added",
+			SequenceNumber: sequenceNumber,
+		},
+		ItemID:       itemID,
+		OutputIndex:  0,
+		ContentIndex: 0,
+		Part: responsetypes.ResponseContentPart{
+			Type:        "output_text",
+			Annotations: []responsetypes.Annotation{},
+			Logprobs:    []responsetypes.Logprob{},
+			Text:        "",
+		},
+	}
+	eventJSON, _ = json.Marshal(contentPartAddedEvent)
+	dataChan <- fmt.Sprintf("event: response.content_part.added\ndata: %s\n\n", string(eventJSON))
+	sequenceNumber++
+
+	// Create a custom streaming client that processes OpenAI streaming format
+	req := janinference.JanInferenceRestyClient.R().SetBody(request)
+	resp, err := req.
+		SetContext(reqCtx.Request.Context()).
+		SetDoNotParseResponse(true).
+		Post("/v1/chat/completions")
+	if err != nil {
+		errChan <- err
+		return
+	}
+	defer resp.RawResponse.Body.Close()
+
+	// Buffer for accumulating content chunks
+	var contentBuffer strings.Builder
+	var fullResponse strings.Builder
+
+	// Buffer for accumulating reasoning content chunks
+	var reasoningBuffer strings.Builder
+	var fullReasoningResponse strings.Builder
+	var reasoningItemID string
+	var reasoningSequenceNumber int
+	var hasReasoningContent bool
+	var reasoningComplete bool
+
+	// Process the stream line by line
+	scanner := bufio.NewScanner(resp.RawResponse.Body)
+	for scanner.Scan() {
+		// Check if context was cancelled
+		if h.checkContextCancellation(reqCtx, errChan) {
+			return
+		}
+
+		line := scanner.Text()
+		if strings.HasPrefix(line, DataPrefix) {
+			data := strings.TrimPrefix(line, DataPrefix)
+			if data == DoneMarker {
+				break
+			}
+
+			// Extract content from OpenAI streaming format
+			content := h.extractContentFromOpenAIStream(data)
+
+			// Handle content - buffer until reasoning is complete
+			if content != "" {
+				contentBuffer.WriteString(content)
+				fullResponse.WriteString(content)
+
+				// Only send content if reasoning is complete or there's no reasoning content
+				if reasoningComplete || !hasReasoningContent {
+					// Check if we have enough words to send
+					bufferedContent := contentBuffer.String()
+					words := strings.Fields(bufferedContent)
+
+					if len(words) >= MinWordsPerChunk {
+						// Create delta event using helper method
+						deltaEvent := h.createTextDeltaEvent(itemID, sequenceNumber, bufferedContent)
+						h.marshalAndSendEvent(dataChan, "response.output_text.delta", deltaEvent)
+						sequenceNumber++
+						// Clear the buffer
+						contentBuffer.Reset()
+					}
+				}
+			}
+
+			// Handle reasoning content separately
+			reasoningContent := h.extractReasoningContentFromOpenAIStream(data)
+			if reasoningContent != "" {
+				// Initialize reasoning item if not already done
+				if !hasReasoningContent {
+					reasoningItemID = fmt.Sprintf("rs_%d", time.Now().UnixNano())
+					reasoningSequenceNumber = sequenceNumber
+					hasReasoningContent = true
+
+					// Emit response.output_item.added event for reasoning
+					reasoningItemAddedEvent := responsetypes.ResponseOutputItemAddedEvent{
+						BaseStreamingEvent: responsetypes.BaseStreamingEvent{
+							Type:           "response.output_item.added",
+							SequenceNumber: reasoningSequenceNumber,
+						},
+						OutputIndex: 0,
+						Item: responsetypes.ResponseOutputItem{
+							ID:      reasoningItemID,
+							Type:    "reasoning",
+							Status:  string(conversation.ItemStatusInProgress),
+							Content: []responsetypes.ResponseContentPart{},
+							Role:    "assistant",
+						},
+					}
+					eventJSON, _ := json.Marshal(reasoningItemAddedEvent)
+					dataChan <- fmt.Sprintf("event: response.output_item.added\ndata: %s\n\n", string(eventJSON))
+					reasoningSequenceNumber++
+
+					// Emit response.reasoning_summary_part.added event
+					reasoningSummaryPartAddedEvent := responsetypes.ResponseReasoningSummaryPartAddedEvent{
+						BaseStreamingEvent: responsetypes.BaseStreamingEvent{
+							Type:           "response.reasoning_summary_part.added",
+							SequenceNumber: reasoningSequenceNumber,
+						},
+						ItemID:       reasoningItemID,
+						OutputIndex:  0,
+						SummaryIndex: 0,
+						Part: struct {
+							Type string `json:"type"`
+							Text string `json:"text"`
+						}{
+							Type: "summary_text",
+							Text: "",
+						},
+					}
+					eventJSON, _ = json.Marshal(reasoningSummaryPartAddedEvent)
+					dataChan <- fmt.Sprintf("event: response.reasoning_summary_part.added\ndata: %s\n\n", string(eventJSON))
+					reasoningSequenceNumber++
+				}
+
+				reasoningBuffer.WriteString(reasoningContent)
+				fullReasoningResponse.WriteString(reasoningContent)
+
+				// Check if we have enough words to send reasoning content
+				bufferedReasoningContent := reasoningBuffer.String()
+				reasoningWords := strings.Fields(bufferedReasoningContent)
+
+				if len(reasoningWords) >= MinWordsPerChunk {
+					// Emit reasoning summary text delta event
+					reasoningSummaryTextDeltaEvent := responsetypes.ResponseReasoningSummaryTextDeltaEvent{
+						BaseStreamingEvent: responsetypes.BaseStreamingEvent{
+							Type:           "response.reasoning_summary_text.delta",
+							SequenceNumber: reasoningSequenceNumber,
+						},
+						ItemID:       reasoningItemID,
+						OutputIndex:  0,
+						SummaryIndex: 0,
+						Delta:        bufferedReasoningContent,
+						Obfuscation:  fmt.Sprintf("%x", time.Now().UnixNano())[:10], // Simple obfuscation
+					}
+					eventJSON, _ := json.Marshal(reasoningSummaryTextDeltaEvent)
+					dataChan <- fmt.Sprintf("event: response.reasoning_summary_text.delta\ndata: %s\n\n", string(eventJSON))
+					reasoningSequenceNumber++
+					// Clear the reasoning buffer
+					reasoningBuffer.Reset()
+				}
+			}
+
+		}
+	}
+
+	// Send any remaining buffered reasoning content
+	if hasReasoningContent && reasoningBuffer.Len() > 0 {
+		reasoningSummaryTextDeltaEvent := responsetypes.ResponseReasoningSummaryTextDeltaEvent{
+			BaseStreamingEvent: responsetypes.BaseStreamingEvent{
+				Type:           "response.reasoning_summary_text.delta",
+				SequenceNumber: reasoningSequenceNumber,
+			},
+			ItemID:       reasoningItemID,
+			OutputIndex:  0,
+			SummaryIndex: 0,
+			Delta:        reasoningBuffer.String(),
+			Obfuscation:  fmt.Sprintf("%x", time.Now().UnixNano())[:10], // Simple obfuscation
+		}
+		eventJSON, _ := json.Marshal(reasoningSummaryTextDeltaEvent)
+		dataChan <- fmt.Sprintf("event: response.reasoning_summary_text.delta\ndata: %s\n\n", string(eventJSON))
+		reasoningSequenceNumber++
+	}
+
+	// Handle reasoning completion events
+	if hasReasoningContent && fullReasoningResponse.Len() > 0 {
+		// Emit reasoning summary text done event
+		reasoningSummaryTextDoneEvent := responsetypes.ResponseReasoningSummaryTextDoneEvent{
+			BaseStreamingEvent: responsetypes.BaseStreamingEvent{
+				Type:           "response.reasoning_summary_text.done",
+				SequenceNumber: reasoningSequenceNumber,
+			},
+			ItemID:       reasoningItemID,
+			OutputIndex:  0,
+			SummaryIndex: 0,
+			Text:         fullReasoningResponse.String(),
+		}
+		eventJSON, _ := json.Marshal(reasoningSummaryTextDoneEvent)
+		dataChan <- fmt.Sprintf("event: response.reasoning_summary_text.done\ndata: %s\n\n", string(eventJSON))
+		reasoningSequenceNumber++
+
+		// Emit reasoning summary part done event
+		reasoningSummaryPartDoneEvent := responsetypes.ResponseReasoningSummaryPartDoneEvent{
+			BaseStreamingEvent: responsetypes.BaseStreamingEvent{
+				Type:           "response.reasoning_summary_part.done",
+				SequenceNumber: reasoningSequenceNumber,
+			},
+			ItemID:       reasoningItemID,
+			OutputIndex:  0,
+			SummaryIndex: 0,
+			Part: struct {
+				Type string `json:"type"`
+				Text string `json:"text"`
+			}{
+				Type: "summary_text",
+				Text: fullReasoningResponse.String(),
+			},
+		}
+		eventJSON, _ = json.Marshal(reasoningSummaryPartDoneEvent)
+		dataChan <- fmt.Sprintf("event: response.reasoning_summary_part.done\ndata: %s\n\n", string(eventJSON))
+		reasoningSequenceNumber++
+
+		// Mark reasoning as complete
+		reasoningComplete = true
+	}
+
+	// Send any remaining buffered content (only once, after reasoning is complete or if there's no reasoning content)
+	if (reasoningComplete || !hasReasoningContent) && contentBuffer.Len() > 0 {
+		deltaEvent := h.createTextDeltaEvent(itemID, sequenceNumber, contentBuffer.String())
+		h.marshalAndSendEvent(dataChan, "response.output_text.delta", deltaEvent)
+		sequenceNumber++
+		contentBuffer.Reset()
+	}
+
+	// Append assistant's complete response to conversation
+	if fullResponse.Len() > 0 && conv != nil {
+		assistantMessage := openai.ChatCompletionMessage{
+			Role:    openai.ChatMessageRoleAssistant,
+			Content: fullResponse.String(),
+		}
+		// Get response entity to get the internal ID
+		responseEntity, err := h.responseService.GetResponseByPublicID(reqCtx, responseID)
+		if err == nil && responseEntity != nil {
+			success, err := h.responseService.AppendMessagesToConversation(reqCtx, conv, []openai.ChatCompletionMessage{assistantMessage}, &responseEntity.ID)
+			if !success {
+				// Log error but don't fail the response
+				logger.GetLogger().Errorf("Failed to append assistant response to conversation: %s - %s", err.GetCode(), err.Error())
+			}
+		}
+	}
+
+	// Emit text done event
+	if fullResponse.Len() > 0 {
+		doneEvent := responsetypes.ResponseOutputTextDoneEvent{
+			BaseStreamingEvent: responsetypes.BaseStreamingEvent{
+				Type:           "response.output_text.done",
+				SequenceNumber: sequenceNumber,
+			},
+			ItemID:       itemID,
+			OutputIndex:  0,
+			ContentIndex: 0,
+			Text:         fullResponse.String(),
+			Logprobs:     []responsetypes.Logprob{},
+		}
+		eventJSON, _ := json.Marshal(doneEvent)
+		dataChan <- fmt.Sprintf("event: response.output_text.done\ndata: %s\n\n", string(eventJSON))
+		sequenceNumber++
+
+		// Emit response.content_part.done event
+		contentPartDoneEvent := responsetypes.ResponseContentPartDoneEvent{
+			BaseStreamingEvent: responsetypes.BaseStreamingEvent{
+				Type:           "response.content_part.done",
+				SequenceNumber: sequenceNumber,
+			},
+			ItemID:       itemID,
+			OutputIndex:  0,
+			ContentIndex: 0,
+			Part: responsetypes.ResponseContentPart{
+				Type:        "output_text",
+				Annotations: []responsetypes.Annotation{},
+				Logprobs:    []responsetypes.Logprob{},
+				Text:        fullResponse.String(),
+			},
+		}
+		eventJSON, _ = json.Marshal(contentPartDoneEvent)
+		dataChan <- fmt.Sprintf("event: response.content_part.done\ndata: %s\n\n", string(eventJSON))
+		sequenceNumber++
+
+		// Emit response.output_item.done event
+		outputItemDoneEvent := responsetypes.ResponseOutputItemDoneEvent{
+			BaseStreamingEvent: responsetypes.BaseStreamingEvent{
+				Type:           "response.output_item.done",
+				SequenceNumber: sequenceNumber,
+			},
+			OutputIndex: 0,
+			Item: responsetypes.ResponseOutputItem{
+				ID:     itemID,
+				Type:   "message",
+				Status: string(conversation.ItemStatusCompleted),
+				Content: []responsetypes.ResponseContentPart{
+					{
+						Type:        "output_text",
+						Annotations: []responsetypes.Annotation{},
+						Logprobs:    []responsetypes.Logprob{},
+						Text:        fullResponse.String(),
+					},
+				},
+				Role: "assistant",
+			},
+		}
+		eventJSON, _ = json.Marshal(outputItemDoneEvent)
+		dataChan <- fmt.Sprintf("event: response.output_item.done\ndata: %s\n\n", string(eventJSON))
+		sequenceNumber++
+	}
+
+	// Send [DONE] to close the stream
+	dataChan <- fmt.Sprintf(SSEDataFormat, DoneMarker)
+
+	// Update response status to completed and save output
+	// Get response entity by public ID to update status
+	responseEntity, getErr := h.responseService.GetResponseByPublicID(reqCtx, responseID)
+	if getErr == nil && responseEntity != nil {
+		// Prepare output data
+		outputData := map[string]any{
+			"type": "text",
+			"text": map[string]any{
+				"value": fullResponse.String(),
+			},
+		}
+
+		// Update response with all fields at once (optimized to prevent N+1 queries)
+		updates := &ResponseUpdates{
+			Status: ptr.ToString(string(ResponseStatusCompleted)),
+			Output: outputData,
+		}
+		success, updateErr := h.responseService.UpdateResponseFields(reqCtx, responseEntity.ID, updates)
+		if !success {
+			// Log error but don't fail the request since streaming is already complete
+			fmt.Printf("Failed to update response fields: %s - %s\n", updateErr.GetCode(), updateErr.Error())
+		}
+	} else {
+		fmt.Printf("Failed to get response entity for status update: %s - %s\n", getErr.GetCode(), getErr.Error())
+	}
+
+	// Log streaming metrics
+	wordCount := len(strings.Fields(fullResponse.String()))
+	h.logStreamingMetrics(responseID, startTime, wordCount)
+}
diff --git a/apps/jan-api-gateway/application/app/domain/response/response_service.go b/apps/jan-api-gateway/application/app/domain/response/response_service.go
new file mode 100644
index 00000000..0f6e40f9
--- /dev/null
+++ b/apps/jan-api-gateway/application/app/domain/response/response_service.go
@@ -0,0 +1,729 @@
+package response
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"net/http"
+
+	"github.com/gin-gonic/gin"
+	openai "github.com/sashabaranov/go-openai"
+	"menlo.ai/jan-api-gateway/app/domain/auth"
+	"menlo.ai/jan-api-gateway/app/domain/common"
+	"menlo.ai/jan-api-gateway/app/domain/conversation"
+	"menlo.ai/jan-api-gateway/app/domain/query"
+	requesttypes "menlo.ai/jan-api-gateway/app/interfaces/http/requests"
+	responsetypes "menlo.ai/jan-api-gateway/app/interfaces/http/responses"
+	"menlo.ai/jan-api-gateway/app/utils/idgen"
+	"menlo.ai/jan-api-gateway/app/utils/ptr"
+)
+
+// ResponseService handles business logic for responses
+type ResponseService struct {
+	responseRepo        ResponseRepository
+	itemRepo            conversation.ItemRepository
+	conversationService *conversation.ConversationService
+}
+
+// ResponseContextKey represents context keys for responses
+type ResponseContextKey string
+
+const (
+	ResponseContextKeyPublicID ResponseContextKey = "response_id"
+	ResponseContextEntity      ResponseContextKey = "ResponseContextEntity"
+
+	// ClientCreatedRootConversationID is the special conversation ID that indicates a new conversation should be created
+	ClientCreatedRootConversationID = "client-created-root"
+)
+
+// NewResponseService creates a new response service
+func NewResponseService(responseRepo ResponseRepository, itemRepo conversation.ItemRepository, conversationService *conversation.ConversationService) *ResponseService {
+	return &ResponseService{
+		responseRepo:        responseRepo,
+		itemRepo:            itemRepo,
+		conversationService: conversationService,
+	}
+}
+
+// CreateResponse creates a new response using a Response domain object
+func (s *ResponseService) CreateResponse(ctx context.Context, response *Response) (*Response, *common.Error) {
+	return s.CreateResponseWithPrevious(ctx, response, nil)
+}
+
+// CreateResponseWithPrevious creates a new response, optionally linking to a previous response
+func (s *ResponseService) CreateResponseWithPrevious(ctx context.Context, response *Response, previousResponseID *string) (*Response, *common.Error) {
+	// Handle previous_response_id logic
+	if previousResponseID != nil {
+		// Load the previous response
+		previousResponse, err := s.responseRepo.FindByPublicID(ctx, *previousResponseID)
+		if err != nil {
+			return nil, common.NewError(err, "b2c3d4e5-f6g7-8901-bcde-f23456789012")
+		}
+		if previousResponse == nil {
+			return nil, common.NewErrorWithMessage("Previous response not found", "c3d4e5f6-g7h8-9012-cdef-345678901234")
+		}
+
+		// Validate that the previous response belongs to the same user
+		if previousResponse.UserID != response.UserID {
+			return nil, common.NewErrorWithMessage("Previous response does not belong to the current user", "d4e5f6g7-h8i9-0123-defg-456789012345")
+		}
+
+		// Use the previous response's conversation ID
+		response.ConversationID = previousResponse.ConversationID
+		if response.ConversationID == nil {
+			return nil, common.NewErrorWithMessage("Previous response does not belong to any conversation", "e5f6g7h8-i9j0-1234-efgh-567890123456")
+		}
+	}
+
+	// Set the previous response ID
+	response.PreviousResponseID = previousResponseID
+
+	// Generate public ID if not already set
+	if response.PublicID == "" {
+		publicID, err := idgen.GenerateSecureID("resp", 42)
+		if err != nil {
+			return nil, common.NewError(err, "f6g7h8i9-j0k1-2345-fghi-678901234567")
+		}
+		response.PublicID = publicID
+	}
+
+	// Set default values
+	if response.Status == "" {
+		response.Status = ResponseStatusPending
+	}
+
+	// Validate required fields
+	if response.UserID == 0 {
+		return nil, common.NewErrorWithMessage("UserID is required", "m3n4o5p6-q7r8-9012-mnop-345678901234")
+	}
+	if response.Model == "" {
+		return nil, common.NewErrorWithMessage("Model is required", "n4o5p6q7-r8s9-0123-nopq-456789012345")
+	}
+	if response.Input == "" {
+		return nil, common.NewErrorWithMessage("Input is required", "o5p6q7r8-s9t0-1234-opqr-567890123456")
+	}
+
+	if err := s.responseRepo.Create(ctx, response); err != nil {
+		return nil, common.NewError(err, "m3n4o5p6-q7r8-9012-mnop-345678901234")
+	}
+
+	return response, nil
+}
+
+// UpdateResponseStatus updates the status of a response
+func (s *ResponseService) UpdateResponseStatus(ctx context.Context, responseID uint, status ResponseStatus) (bool, *common.Error) {
+	response, err := s.responseRepo.FindByID(ctx, responseID)
+	if err != nil {
+		return false, common.NewError(err, "n4o5p6q7-r8s9-0123-nopq-456789012345")
+	}
+	if response == nil {
+		return false, common.NewErrorWithMessage("Response not found", "o5p6q7r8-s9t0-1234-opqr-567890123456")
+	}
+
+	// Update the response object
+	UpdateResponseStatusOnObject(response, status)
+
+	// Save to database
+	if err := s.responseRepo.Update(ctx, response); err != nil {
+		return false, common.NewError(err, "p6q7r8s9-t0u1-2345-pqrs-678901234567")
+	}
+
+	return true, nil
+}
+
+// UpdateResponseOutput updates the output of a response
+func (s *ResponseService) UpdateResponseOutput(ctx context.Context, responseID uint, output any) (bool, *common.Error) {
+	response, err := s.responseRepo.FindByID(ctx, responseID)
+	if err != nil {
+		return false, common.NewError(err, "q7r8s9t0-u1v2-3456-qrst-789012345678")
+	}
+	if response == nil {
+		return false, common.NewErrorWithMessage("Response not found", "r8s9t0u1-v2w3-4567-rstu-890123456789")
+	}
+
+	// Update the response object
+	if err := UpdateResponseOutputOnObject(response, output); err != nil {
+		return false, err
+	}
+
+	// Save to database
+	if err := s.responseRepo.Update(ctx, response); err != nil {
+		return false, common.NewError(err, "t0u1v2w3-x4y5-6789-tuvw-012345678901")
+	}
+
+	return true, nil
+}
+
+// UpdateResponseUsage updates the usage statistics of a response
+func (s *ResponseService) UpdateResponseUsage(ctx context.Context, responseID uint, usage any) (bool, *common.Error) {
+	response, err := s.responseRepo.FindByID(ctx, responseID)
+	if err != nil {
+		return false, common.NewError(err, "u1v2w3x4-y5z6-7890-uvwx-123456789012")
+	}
+	if response == nil {
+		return false, common.NewErrorWithMessage("Response not found", "v2w3x4y5-z6a7-8901-vwxy-234567890123")
+	}
+
+	// Update the response object
+	if err := UpdateResponseUsageOnObject(response, usage); err != nil {
+		return false, err
+	}
+
+	// Save to database
+	if err := s.responseRepo.Update(ctx, response); err != nil {
+		return false, common.NewError(err, "x4y5z6a7-b8c9-0123-xyza-456789012345")
+	}
+
+	return true, nil
+}
+
+// UpdateResponseError updates the error information of a response
+func (s *ResponseService) UpdateResponseError(ctx context.Context, responseID uint, error any) (bool, *common.Error) {
+	response, err := s.responseRepo.FindByID(ctx, responseID)
+	if err != nil {
+		return false, common.NewError(err, "y5z6a7b8-c9d0-1234-yzab-567890123456")
+	}
+	if response == nil {
+		return false, common.NewErrorWithMessage("Response not found", "z6a7b8c9-d0e1-2345-zabc-678901234567")
+	}
+
+	// Update the response object
+	if err := UpdateResponseErrorOnObject(response, error); err != nil {
+		return false, err
+	}
+
+	// Save to database
+	if err := s.responseRepo.Update(ctx, response); err != nil {
+		return false, common.NewError(err, "b8c9d0e1-f2g3-4567-bcde-890123456789")
+	}
+
+	return true, nil
+}
+
+// UpdateResponseFields updates multiple fields on a response object and saves it once (optimized for N+1 prevention)
+func (s *ResponseService) UpdateResponseFields(ctx context.Context, responseID uint, updates *ResponseUpdates) (bool, *common.Error) {
+	response, err := s.responseRepo.FindByID(ctx, responseID)
+	if err != nil {
+		return false, common.NewError(err, "c9d0e1f2-g3h4-5678-cdef-901234567890")
+	}
+	if response == nil {
+		return false, common.NewErrorWithMessage("Response not found", "d0e1f2g3-h4i5-6789-defg-012345678901")
+	}
+
+	// Apply all updates to the response object
+	if err := ApplyResponseUpdates(response, updates); err != nil {
+		return false, err
+	}
+
+	// Save to database once
+	if err := s.responseRepo.Update(ctx, response); err != nil {
+		return false, common.NewError(err, "e1f2g3h4-i5j6-7890-efgh-123456789012")
+	}
+
+	return true, nil
+}
+
+// GetResponseByPublicID gets a response by public ID
+func (s *ResponseService) GetResponseByPublicID(ctx context.Context, publicID string) (*Response, *common.Error) {
+	response, err := s.responseRepo.FindByPublicID(ctx, publicID)
+	if err != nil {
+		return nil, common.NewError(err, "c9d0e1f2-g3h4-5678-cdef-901234567890")
+	}
+	return response, nil
+}
+
+// GetResponsesByUserID gets responses for a specific user
+func (s *ResponseService) GetResponsesByUserID(ctx context.Context, userID uint, pagination *query.Pagination) ([]*Response, *common.Error) {
+	responses, err := s.responseRepo.FindByUserID(ctx, userID, pagination)
+	if err != nil {
+		return nil, common.NewError(err, "d0e1f2g3-h4i5-6789-defg-012345678901")
+	}
+	return responses, nil
+}
+
+// GetResponsesByConversationID gets responses for a specific conversation
+func (s *ResponseService) GetResponsesByConversationID(ctx context.Context, conversationID uint, pagination *query.Pagination) ([]*Response, *common.Error) {
+	responses, err := s.responseRepo.FindByConversationID(ctx, conversationID, pagination)
+	if err != nil {
+		return nil, common.NewError(err, "e1f2g3h4-i5j6-7890-efgh-123456789012")
+	}
+	return responses, nil
+}
+
+// DeleteResponse deletes a response
+func (s *ResponseService) DeleteResponse(ctx context.Context, responseID uint) (bool, *common.Error) {
+	if err := s.responseRepo.DeleteByID(ctx, responseID); err != nil {
+		return false, common.NewError(err, "f2g3h4i5-j6k7-8901-fghi-234567890123")
+	}
+	return true, nil
+}
+
+// CreateItemsForResponse creates items for a specific response
+func (s *ResponseService) CreateItemsForResponse(ctx context.Context, responseID uint, conversationID uint, items []*conversation.Item) ([]*conversation.Item, *common.Error) {
+	response, err := s.responseRepo.FindByID(ctx, responseID)
+	if err != nil {
+		return nil, common.NewError(err, "g3h4i5j6-k7l8-9012-ghij-345678901234")
+	}
+	if response == nil {
+		return nil, common.NewErrorWithMessage("Response not found", "h4i5j6k7-l8m9-0123-hijk-456789012345")
+	}
+
+	// Validate that the response belongs to the specified conversation
+	if response.ConversationID == nil || *response.ConversationID != conversationID {
+		return nil, common.NewErrorWithMessage("Response does not belong to the specified conversation", "i5j6k7l8-m9n0-1234-ijkl-567890123456")
+	}
+
+	var createdItems []*conversation.Item
+	for _, itemData := range items {
+		// Generate public ID for the item
+		publicID, err := idgen.GenerateSecureID("msg", 42)
+		if err != nil {
+			return nil, common.NewError(err, "j6k7l8m9-n0o1-2345-jklm-678901234567")
+		}
+
+		item := conversation.NewItem(
+			publicID,
+			itemData.Type,
+			*itemData.Role,
+			itemData.Content,
+			conversationID,
+			&responseID,
+		)
+
+		if err := s.itemRepo.Create(ctx, item); err != nil {
+			return nil, common.NewError(err, "k7l8m9n0-o1p2-3456-klmn-789012345678")
+		}
+
+		createdItems = append(createdItems, item)
+	}
+
+	return createdItems, nil
+}
+
+// GetItemsForResponse gets items that belong to a specific response, optionally filtered by role
+func (s *ResponseService) GetItemsForResponse(ctx context.Context, responseID uint, itemRole *conversation.ItemRole) ([]*conversation.Item, *common.Error) {
+	response, err := s.responseRepo.FindByID(ctx, responseID)
+	if err != nil {
+		return nil, common.NewError(err, "l8m9n0o1-p2q3-4567-lmno-890123456789")
+	}
+	if response == nil {
+		return nil, common.NewErrorWithMessage("Response not found", "m9n0o1p2-q3r4-5678-mnop-901234567890")
+	}
+
+	// Create filter for database query
+	filter := conversation.ItemFilter{
+		ConversationID: response.ConversationID,
+		ResponseID:     &responseID,
+		Role:           itemRole,
+	}
+
+	// Get items using database filter (more efficient than in-memory filtering)
+	items, err := s.itemRepo.FindByFilter(ctx, filter, nil)
+	if err != nil {
+		return nil, common.NewError(err, "n0o1p2q3-r4s5-6789-nopq-012345678901")
+	}
+
+	return items, nil
+}
+
+// CreateResponseFromRequest creates a response from an API request structure
+func (s *ResponseService) CreateResponseFromRequest(ctx context.Context, userID uint, req *ResponseRequest) (*Response, *common.Error) {
+	// Convert input to JSON string
+	inputJSON, jsonErr := json.Marshal(req.Input)
+	if jsonErr != nil {
+		return nil, common.NewError(jsonErr, "a1b2c3d4-e5f6-7890-abcd-ef1234567890")
+	}
+
+	// Build Response object from request
+	response := &Response{
+		UserID:             userID,
+		ConversationID:     nil, // Will be set by CreateResponseWithPrevious if previousResponseID is provided
+		PreviousResponseID: req.PreviousResponseID,
+		Model:              req.Model,
+		Input:              string(inputJSON),
+		SystemPrompt:       nil,
+		Status:             ResponseStatusPending,
+		Stream:             req.Stream,
+	}
+
+	// Create the response with previous_response_id handling
+	return s.CreateResponseWithPrevious(ctx, response, req.PreviousResponseID)
+}
+
+// ResponseRequest represents the API request structure for creating a response
+type ResponseRequest struct {
+	Model              string  `json:"model"`
+	PreviousResponseID *string `json:"previous_response_id,omitempty"`
+	Input              any     `json:"input"`
+	Stream             *bool   `json:"stream,omitempty"`
+}
+
+// GetResponseMiddleWare creates middleware to load response by public ID and set it in context
+func (s *ResponseService) GetResponseMiddleWare() gin.HandlerFunc {
+	return func(reqCtx *gin.Context) {
+		ctx := reqCtx.Request.Context()
+		publicID := reqCtx.Param(string(ResponseContextKeyPublicID))
+		if publicID == "" {
+			reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responsetypes.ErrorResponse{
+				Code:  "r8s9t0u1-v2w3-4567-rstu-890123456789",
+				Error: "missing response public ID",
+			})
+			return
+		}
+		user, ok := auth.GetUserFromContext(reqCtx)
+		if !ok {
+			reqCtx.AbortWithStatusJSON(http.StatusUnauthorized, responsetypes.ErrorResponse{
+				Code: "s9t0u1v2-w3x4-5678-stuv-901234567890",
+			})
+			return
+		}
+		entities, err := s.responseRepo.FindByFilter(ctx, ResponseFilter{
+			PublicID: &publicID,
+			UserID:   &user.ID,
+		}, nil)
+
+		if err != nil {
+			reqCtx.AbortWithStatusJSON(http.StatusUnauthorized, responsetypes.ErrorResponse{
+				Code:  "t0u1v2w3-x4y5-6789-tuvw-012345678901",
+				Error: err.Error(),
+			})
+			return
+		}
+
+		if len(entities) == 0 {
+			reqCtx.AbortWithStatusJSON(http.StatusNotFound, responsetypes.ErrorResponse{
+				Code: "u1v2w3x4-y5z6-7890-uvwx-123456789012",
+			})
+			return
+		}
+
+		SetResponseFromContext(reqCtx, entities[0])
+		reqCtx.Next()
+	}
+}
+
+// SetResponseFromContext sets a response in the gin context
+func SetResponseFromContext(reqCtx *gin.Context, resp *Response) {
+	reqCtx.Set(string(ResponseContextEntity), resp)
+}
+
+// GetResponseFromContext gets a response from the gin context
+func GetResponseFromContext(reqCtx *gin.Context) (*Response, bool) {
+	resp, ok := reqCtx.Get(string(ResponseContextEntity))
+	if !ok {
+		return nil, false
+	}
+	response, ok := resp.(*Response)
+	return response, ok
+}
+
+// ProcessResponseRequest processes a response request and returns the appropriate handler
+func (s *ResponseService) ProcessResponseRequest(ctx context.Context, userID uint, req *ResponseRequest) (*Response, *common.Error) {
+	// Create response from request
+	responseEntity, err := s.CreateResponseFromRequest(ctx, userID, req)
+	if err != nil {
+		return nil, err
+	}
+
+	return responseEntity, nil
+}
+
+// ConvertDomainResponseToAPIResponse converts a domain response to API response format
+func (s *ResponseService) ConvertDomainResponseToAPIResponse(responseEntity *Response) responsetypes.Response {
+	apiResponse := responsetypes.Response{
+		ID:      responseEntity.PublicID,
+		Object:  "response",
+		Created: responseEntity.CreatedAt.Unix(),
+		Model:   responseEntity.Model,
+		Status:  responsetypes.ResponseStatus(responseEntity.Status),
+		Input:   responseEntity.Input,
+	}
+
+	// Add conversation if exists
+	if responseEntity.ConversationID != nil {
+		apiResponse.Conversation = &responsetypes.ConversationInfo{
+			ID: fmt.Sprintf("conv_%d", *responseEntity.ConversationID),
+		}
+	}
+
+	// Add timestamps
+	if responseEntity.CompletedAt != nil {
+		apiResponse.CompletedAt = ptr.ToInt64(responseEntity.CompletedAt.Unix())
+	}
+	if responseEntity.CancelledAt != nil {
+		apiResponse.CancelledAt = ptr.ToInt64(responseEntity.CancelledAt.Unix())
+	}
+	if responseEntity.FailedAt != nil {
+		apiResponse.FailedAt = ptr.ToInt64(responseEntity.FailedAt.Unix())
+	}
+
+	// Parse output if exists
+	if responseEntity.Output != nil {
+		var output any
+		if err := json.Unmarshal([]byte(*responseEntity.Output), &output); err == nil {
+			apiResponse.Output = output
+		}
+	}
+
+	// Parse usage if exists
+	if responseEntity.Usage != nil {
+		var usage responsetypes.DetailedUsage
+		if err := json.Unmarshal([]byte(*responseEntity.Usage), &usage); err == nil {
+			apiResponse.Usage = &usage
+		}
+	}
+
+	// Parse error if exists
+	if responseEntity.Error != nil {
+		var errorData responsetypes.ResponseError
+		if err := json.Unmarshal([]byte(*responseEntity.Error), &errorData); err == nil {
+			apiResponse.Error = &errorData
+		}
+	}
+
+	return apiResponse
+}
+
+// ConvertConversationItemToInputItem converts a conversation item to input item format
+func (s *ResponseService) ConvertConversationItemToInputItem(item *conversation.Item) responsetypes.InputItem {
+	inputItem := responsetypes.InputItem{
+		ID:      item.PublicID,
+		Object:  "input_item",
+		Created: item.CreatedAt.Unix(),
+		Type:    requesttypes.InputType(item.Type),
+	}
+
+	if len(item.Content) > 0 {
+		for _, content := range item.Content {
+			if content.Type == "text" && content.Text != nil {
+				inputItem.Text = &content.Text.Value
+				break
+			} else if content.Type == "input_text" && content.InputText != nil {
+				inputItem.Text = content.InputText
+				break
+			}
+		}
+	}
+
+	return inputItem
+}
+
+// HandleConversation handles conversation creation and management for responses
+func (s *ResponseService) HandleConversation(ctx context.Context, userID uint, request *requesttypes.CreateResponseRequest) (*conversation.Conversation, *common.Error) {
+	// If store is explicitly set to false, don't create or use any conversation
+	if request.Store != nil && !*request.Store {
+		return nil, nil
+	}
+
+	// If previous_response_id is provided, load the conversation from the previous response
+	if request.PreviousResponseID != nil && *request.PreviousResponseID != "" {
+		// Load the previous response
+		previousResponse, err := s.GetResponseByPublicID(ctx, *request.PreviousResponseID)
+		if err != nil {
+			return nil, err
+		}
+		if previousResponse == nil {
+			return nil, common.NewErrorWithMessage("Previous response not found", "o1p2q3r4-s5t6-7890-opqr-123456789012")
+		}
+
+		// Validate that the previous response belongs to the same user
+		if previousResponse.UserID != userID {
+			return nil, common.NewErrorWithMessage("Previous response does not belong to the current user", "p2q3r4s5-t6u7-8901-pqrs-234567890123")
+		}
+
+		// Load the conversation from the previous response
+		if previousResponse.ConversationID == nil {
+			return nil, common.NewErrorWithMessage("Previous response does not belong to any conversation", "q3r4s5t6-u7v8-9012-qrst-345678901234")
+		}
+
+		conv, err := s.conversationService.GetConversationByID(ctx, *previousResponse.ConversationID)
+		if err != nil {
+			return nil, err
+		}
+		return conv, nil
+	}
+
+	// Check if conversation is specified and not 'client-created-root'
+	if request.Conversation != nil && *request.Conversation != "" && *request.Conversation != ClientCreatedRootConversationID {
+		// Load existing conversation
+		conv, err := s.conversationService.GetConversationByPublicIDAndUserID(ctx, *request.Conversation, userID)
+		if err != nil {
+			return nil, err
+		}
+		return conv, nil
+	}
+
+	// Create new conversation
+	conv, err := s.conversationService.CreateConversation(ctx, userID, nil, true, nil)
+	if err != nil {
+		return nil, err
+	}
+
+	return conv, nil
+}
+
+// AppendMessagesToConversation appends messages to a conversation
+func (s *ResponseService) AppendMessagesToConversation(ctx context.Context, conv *conversation.Conversation, messages []openai.ChatCompletionMessage, responseID *uint) (bool, *common.Error) {
+	// Convert OpenAI messages to conversation items
+	items := make([]*conversation.Item, 0, len(messages))
+	for _, msg := range messages {
+		// Generate public ID for the item
+		publicID, err := idgen.GenerateSecureID("msg", 42)
+		if err != nil {
+			return false, common.NewErrorWithMessage("Failed to generate item ID", "u7v8w9x0-y1z2-3456-uvwx-789012345678")
+		}
+
+		// Convert role
+		var role conversation.ItemRole
+		switch msg.Role {
+		case openai.ChatMessageRoleSystem:
+			role = conversation.ItemRoleSystem
+		case openai.ChatMessageRoleUser:
+			role = conversation.ItemRoleUser
+		case openai.ChatMessageRoleAssistant:
+			role = conversation.ItemRoleAssistant
+		default:
+			role = conversation.ItemRoleUser
+		}
+
+		// Convert content
+		content := make([]conversation.Content, 0, len(msg.MultiContent))
+		for _, contentPart := range msg.MultiContent {
+			if contentPart.Type == openai.ChatMessagePartTypeText {
+				content = append(content, conversation.NewTextContent(contentPart.Text))
+			}
+		}
+
+		// If no multi-content, use simple text content
+		if len(content) == 0 && msg.Content != "" {
+			content = append(content, conversation.NewTextContent(msg.Content))
+		}
+
+		item := conversation.NewItem(
+			publicID,
+			conversation.ItemTypeMessage,
+			role,
+			content,
+			conv.ID,
+			responseID,
+		)
+
+		items = append(items, item)
+	}
+
+	// Add items to conversation
+	if len(items) > 0 {
+		_, err := s.conversationService.AddMultipleItems(ctx, conv, conv.UserID, items)
+		if err != nil {
+			return false, err
+		}
+	}
+
+	return true, nil
+}
+
+// ConvertToChatCompletionRequest converts a response request to OpenAI chat completion request
+func (s *ResponseService) ConvertToChatCompletionRequest(req *requesttypes.CreateResponseRequest) *openai.ChatCompletionRequest {
+	chatReq := &openai.ChatCompletionRequest{
+		Model:    req.Model,
+		Messages: make([]openai.ChatCompletionMessage, 0),
+	}
+
+	// Add system message if provided
+	if req.SystemPrompt != nil && *req.SystemPrompt != "" {
+		chatReq.Messages = append(chatReq.Messages, openai.ChatCompletionMessage{
+			Role:    openai.ChatMessageRoleSystem,
+			Content: *req.SystemPrompt,
+		})
+	}
+
+	// Add user input as message
+	if req.Input != nil {
+		// Try to parse input as JSON array of messages first
+		var messages []openai.ChatCompletionMessage
+		if err := json.Unmarshal([]byte(fmt.Sprintf("%v", req.Input)), &messages); err == nil {
+			// Input is an array of messages
+			chatReq.Messages = append(chatReq.Messages, messages...)
+		} else {
+			// Input is a single string message
+			chatReq.Messages = append(chatReq.Messages, openai.ChatCompletionMessage{
+				Role:    openai.ChatMessageRoleUser,
+				Content: fmt.Sprintf("%v", req.Input),
+			})
+		}
+	}
+
+	// Set optional parameters
+	if req.MaxTokens != nil {
+		chatReq.MaxTokens = *req.MaxTokens
+	}
+	if req.Temperature != nil {
+		chatReq.Temperature = float32(*req.Temperature)
+	}
+	if req.TopP != nil {
+		chatReq.TopP = float32(*req.TopP)
+	}
+	if req.Stop != nil {
+		chatReq.Stop = req.Stop
+	}
+	if req.PresencePenalty != nil {
+		chatReq.PresencePenalty = float32(*req.PresencePenalty)
+	}
+	if req.FrequencyPenalty != nil {
+		chatReq.FrequencyPenalty = float32(*req.FrequencyPenalty)
+	}
+	if req.User != nil {
+		chatReq.User = *req.User
+	}
+
+	return chatReq
+}
+
+// ConvertConversationItemsToMessages converts conversation items to OpenAI chat completion messages
+func (s *ResponseService) ConvertConversationItemsToMessages(ctx context.Context, conv *conversation.Conversation) ([]openai.ChatCompletionMessage, *common.Error) {
+	// Load conversation with items
+	convWithItems, err := s.conversationService.GetConversationByPublicIDAndUserID(ctx, conv.PublicID, conv.UserID)
+	if err != nil {
+		return nil, err
+	}
+
+	// Convert items to messages
+	messages := make([]openai.ChatCompletionMessage, 0, len(convWithItems.Items))
+	for _, item := range convWithItems.Items {
+		// Skip items that don't have a role or content
+		if item.Role == nil || len(item.Content) == 0 {
+			continue
+		}
+
+		// Convert conversation role to OpenAI role
+		var openaiRole string
+		switch *item.Role {
+		case conversation.ItemRoleSystem:
+			openaiRole = openai.ChatMessageRoleSystem
+		case conversation.ItemRoleUser:
+			openaiRole = openai.ChatMessageRoleUser
+		case conversation.ItemRoleAssistant:
+			openaiRole = openai.ChatMessageRoleAssistant
+		default:
+			openaiRole = openai.ChatMessageRoleUser
+		}
+
+		// Extract text content from the item
+		var content string
+		for _, contentPart := range item.Content {
+			if contentPart.Type == "text" && contentPart.Text != nil {
+				content += contentPart.Text.Value
+			}
+		}
+
+		// Only add message if it has content
+		if content != "" {
+			messages = append(messages, openai.ChatCompletionMessage{
+				Role:    openaiRole,
+				Content: content,
+			})
+		}
+	}
+
+	return messages, nil
+}
diff --git a/apps/jan-api-gateway/application/app/domain/response/response_validator.go b/apps/jan-api-gateway/application/app/domain/response/response_validator.go
new file mode 100644
index 00000000..365850f7
--- /dev/null
+++ b/apps/jan-api-gateway/application/app/domain/response/response_validator.go
@@ -0,0 +1,816 @@
+package response
+
+import (
+	"fmt"
+	"strings"
+
+	"menlo.ai/jan-api-gateway/app/domain/common"
+	requesttypes "menlo.ai/jan-api-gateway/app/interfaces/http/requests"
+)
+
+// ValidationError represents a validation error
+type ValidationError struct {
+	Field   string `json:"field"`
+	Message string `json:"message"`
+}
+
+// ValidationErrors represents multiple validation errors
+type ValidationErrors struct {
+	Errors []ValidationError `json:"errors"`
+}
+
+// ValidateCreateResponseRequest validates a CreateResponseRequest
+func ValidateCreateResponseRequest(req *requesttypes.CreateResponseRequest) (bool, *common.Error) {
+	// Validate model
+	if req.Model == "" {
+		return false, common.NewErrorWithMessage("model is required", "a1b2c3d4-e5f6-7890-abcd-ef1234567890")
+	}
+
+	// Validate input
+	if err := validateInput(req.Input); err != nil {
+		return false, common.NewErrorWithMessage("input validation error", "b2c3d4e5-f6g7-8901-bcde-f23456789012")
+	}
+
+	// Validate temperature
+	if req.Temperature != nil {
+		if *req.Temperature < 0 || *req.Temperature > 2 {
+			return false, common.NewErrorWithMessage("temperature must be between 0 and 2", "c3d4e5f6-g7h8-9012-cdef-345678901234")
+		}
+	}
+
+	// Validate top_p
+	if req.TopP != nil {
+		if *req.TopP < 0 || *req.TopP > 1 {
+			return false, common.NewErrorWithMessage("top_p must be between 0 and 1", "d4e5f6g7-h8i9-0123-defg-456789012345")
+		}
+	}
+
+	// Validate top_k
+	if req.TopK != nil {
+		if *req.TopK < 1 {
+			return false, common.NewErrorWithMessage("top_k must be greater than 0", "e5f6g7h8-i9j0-1234-efgh-567890123456")
+		}
+	}
+
+	// Validate repetition_penalty
+	if req.RepetitionPenalty != nil {
+		if *req.RepetitionPenalty < 0 || *req.RepetitionPenalty > 2 {
+			return false, common.NewErrorWithMessage("repetition_penalty must be between 0 and 2", "f6g7h8i9-j0k1-2345-fghi-678901234567")
+		}
+	}
+
+	// Validate presence_penalty
+	if req.PresencePenalty != nil {
+		if *req.PresencePenalty < -2 || *req.PresencePenalty > 2 {
+			return false, common.NewErrorWithMessage("presence_penalty must be between -2 and 2", "g7h8i9j0-k1l2-3456-ghij-789012345678")
+		}
+	}
+
+	// Validate frequency_penalty
+	if req.FrequencyPenalty != nil {
+		if *req.FrequencyPenalty < -2 || *req.FrequencyPenalty > 2 {
+			return false, common.NewErrorWithMessage("frequency_penalty must be between -2 and 2", "h8i9j0k1-l2m3-4567-hijk-890123456789")
+		}
+	}
+
+	// Validate max_tokens
+	if req.MaxTokens != nil {
+		if *req.MaxTokens < 1 {
+			return false, common.NewErrorWithMessage("max_tokens must be greater than 0", "i9j0k1l2-m3n4-5678-ijkl-901234567890")
+		}
+	}
+
+	// Validate timeout
+	if req.Timeout != nil {
+		if *req.Timeout < 1 {
+			return false, common.NewErrorWithMessage("timeout must be greater than 0", "j0k1l2m3-n4o5-6789-jklm-012345678901")
+		}
+	}
+
+	// Validate response_format
+	if req.ResponseFormat != nil {
+		if err := validateResponseFormat(req.ResponseFormat); err != nil {
+			return false, common.NewErrorWithMessage("response_format validation error", "k1l2m3n4-o5p6-7890-klmn-123456789012")
+		}
+	}
+
+	// Validate tools
+	if req.Tools != nil {
+		if err := validateTools(req.Tools); err != nil {
+			return false, common.NewErrorWithMessage("tools validation error", "l2m3n4o5-p6q7-8901-lmno-234567890123")
+		}
+	}
+
+	// Validate tool_choice
+	if req.ToolChoice != nil {
+		if err := validateToolChoice(req.ToolChoice); err != nil {
+			return false, common.NewErrorWithMessage("tool_choice validation error", "m3n4o5p6-q7r8-9012-mnop-345678901234")
+		}
+	}
+
+	return true, nil
+}
+
+// validateInput validates the input field (can be string, array of strings, or structured CreateResponseInput)
+func validateInput(input any) *[]ValidationError {
+	var errors []ValidationError
+
+	if input == nil {
+		errors = append(errors, ValidationError{
+			Field:   "input",
+			Message: "input is required",
+		})
+		return &errors
+	}
+
+	switch v := input.(type) {
+	case string:
+		if v == "" {
+			errors = append(errors, ValidationError{
+				Field:   "input",
+				Message: "input string cannot be empty",
+			})
+		}
+	case []any:
+		if len(v) == 0 {
+			errors = append(errors, ValidationError{
+				Field:   "input",
+				Message: "input array cannot be empty",
+			})
+		}
+		for i, item := range v {
+			switch itemVal := item.(type) {
+			case string:
+				if itemVal == "" {
+					errors = append(errors, ValidationError{
+						Field:   fmt.Sprintf("input[%d]", i),
+						Message: "input array string items cannot be empty",
+					})
+				}
+			case map[string]any:
+				// Validate message object format
+				if err := validateMessageObject(itemVal, i); err != nil {
+					errors = append(errors, *err...)
+				}
+			default:
+				errors = append(errors, ValidationError{
+					Field:   fmt.Sprintf("input[%d]", i),
+					Message: "input array items must be strings or message objects with 'role' and 'content'",
+				})
+			}
+		}
+	case map[string]any:
+		// Check if this is a structured CreateResponseInput object
+		if structuredInput := convertToCreateResponseInput(v); structuredInput != nil {
+			// Delegate to structured input validation
+			if err := validateCreateResponseInput(structuredInput); err != nil {
+				errors = append(errors, *err...)
+			}
+		} else {
+			// Treat as a single message object
+			if err := validateMessageObject(v, 0); err != nil {
+				errors = append(errors, *err...)
+			}
+		}
+	default:
+		errors = append(errors, ValidationError{
+			Field:   "input",
+			Message: "input must be a string, array of strings/message objects, or structured input object",
+		})
+	}
+
+	if len(errors) > 0 {
+		return &errors
+	}
+
+	return nil
+}
+
+// convertToCreateResponseInput attempts to convert a map to CreateResponseInput
+// Returns nil if the map doesn't represent a structured input
+func convertToCreateResponseInput(inputMap map[string]any) *requesttypes.CreateResponseInput {
+	// Check if this looks like a structured input by looking for a 'type' field
+	typeField, hasType := inputMap["type"]
+	if !hasType {
+		return nil
+	}
+
+	typeStr, ok := typeField.(string)
+	if !ok {
+		return nil
+	}
+
+	// Check if it's a valid input type
+	switch requesttypes.InputType(typeStr) {
+	case requesttypes.InputTypeText,
+		requesttypes.InputTypeImage,
+		requesttypes.InputTypeFile,
+		requesttypes.InputTypeWebSearch,
+		requesttypes.InputTypeFileSearch,
+		requesttypes.InputTypeStreaming,
+		requesttypes.InputTypeFunctionCalls,
+		requesttypes.InputTypeReasoning:
+		// This looks like a structured input, create the object
+		structuredInput := &requesttypes.CreateResponseInput{
+			Type: requesttypes.InputType(typeStr),
+		}
+
+		// Extract type-specific fields based on the input type
+		switch requesttypes.InputType(typeStr) {
+		case requesttypes.InputTypeText:
+			if text, ok := inputMap["text"].(string); ok {
+				structuredInput.Text = &text
+			}
+		case requesttypes.InputTypeImage:
+			if imageData, ok := inputMap["image"].(map[string]any); ok {
+				imageInput := &requesttypes.ImageInput{}
+				if url, ok := imageData["url"].(string); ok {
+					imageInput.URL = &url
+				}
+				if data, ok := imageData["data"].(string); ok {
+					imageInput.Data = &data
+				}
+				if detail, ok := imageData["detail"].(string); ok {
+					imageInput.Detail = &detail
+				}
+				structuredInput.Image = imageInput
+			}
+		case requesttypes.InputTypeFile:
+			if fileData, ok := inputMap["file"].(map[string]any); ok {
+				if fileID, ok := fileData["file_id"].(string); ok {
+					structuredInput.File = &requesttypes.FileInput{
+						FileID: fileID,
+					}
+				}
+			}
+		case requesttypes.InputTypeWebSearch:
+			if webSearchData, ok := inputMap["web_search"].(map[string]any); ok {
+				if query, ok := webSearchData["query"].(string); ok {
+					webSearchInput := &requesttypes.WebSearchInput{
+						Query: query,
+					}
+					if maxResults, ok := webSearchData["max_results"].(float64); ok {
+						maxResultsInt := int(maxResults)
+						webSearchInput.MaxResults = &maxResultsInt
+					}
+					if searchEngine, ok := webSearchData["search_engine"].(string); ok {
+						webSearchInput.SearchEngine = &searchEngine
+					}
+					structuredInput.WebSearch = webSearchInput
+				}
+			}
+		case requesttypes.InputTypeFileSearch:
+			if fileSearchData, ok := inputMap["file_search"].(map[string]any); ok {
+				if query, ok := fileSearchData["query"].(string); ok {
+					fileSearchInput := &requesttypes.FileSearchInput{
+						Query: query,
+					}
+					if fileIDs, ok := fileSearchData["file_ids"].([]any); ok {
+						fileSearchInput.FileIDs = make([]string, len(fileIDs))
+						for i, id := range fileIDs {
+							if idStr, ok := id.(string); ok {
+								fileSearchInput.FileIDs[i] = idStr
+							}
+						}
+					}
+					if maxResults, ok := fileSearchData["max_results"].(float64); ok {
+						maxResultsInt := int(maxResults)
+						fileSearchInput.MaxResults = &maxResultsInt
+					}
+					structuredInput.FileSearch = fileSearchInput
+				}
+			}
+		case requesttypes.InputTypeStreaming:
+			if streamingData, ok := inputMap["streaming"].(map[string]any); ok {
+				if url, ok := streamingData["url"].(string); ok {
+					streamingInput := &requesttypes.StreamingInput{
+						URL: url,
+					}
+					if method, ok := streamingData["method"].(string); ok {
+						streamingInput.Method = &method
+					}
+					if body, ok := streamingData["body"].(string); ok {
+						streamingInput.Body = &body
+					}
+					if headers, ok := streamingData["headers"].(map[string]any); ok {
+						streamingInput.Headers = make(map[string]string)
+						for k, v := range headers {
+							if vStr, ok := v.(string); ok {
+								streamingInput.Headers[k] = vStr
+							}
+						}
+					}
+					structuredInput.Streaming = streamingInput
+				}
+			}
+		case requesttypes.InputTypeFunctionCalls:
+			if functionCallsData, ok := inputMap["function_calls"].(map[string]any); ok {
+				if calls, ok := functionCallsData["calls"].([]any); ok {
+					functionCallsInput := &requesttypes.FunctionCallsInput{
+						Calls: make([]requesttypes.FunctionCall, len(calls)),
+					}
+					for i, call := range calls {
+						if callData, ok := call.(map[string]any); ok {
+							if name, ok := callData["name"].(string); ok {
+								functionCallsInput.Calls[i] = requesttypes.FunctionCall{
+									Name: name,
+								}
+								if args, ok := callData["arguments"].(map[string]any); ok {
+									functionCallsInput.Calls[i].Arguments = args
+								}
+							}
+						}
+					}
+					structuredInput.FunctionCalls = functionCallsInput
+				}
+			}
+		case requesttypes.InputTypeReasoning:
+			if reasoningData, ok := inputMap["reasoning"].(map[string]any); ok {
+				if task, ok := reasoningData["task"].(string); ok {
+					reasoningInput := &requesttypes.ReasoningInput{
+						Task: task,
+					}
+					if context, ok := reasoningData["context"].(string); ok {
+						reasoningInput.Context = &context
+					}
+					structuredInput.Reasoning = reasoningInput
+				}
+			}
+		}
+
+		return structuredInput
+	default:
+		return nil
+	}
+}
+
+// validateMessageObject validates a message object in the input array
+func validateMessageObject(msg map[string]any, index int) *[]ValidationError {
+	var errors []ValidationError
+
+	// Check for required role field
+	role, hasRole := msg["role"]
+	if !hasRole {
+		errors = append(errors, ValidationError{
+			Field:   fmt.Sprintf("input[%d].role", index),
+			Message: "role is required for message objects",
+		})
+	} else if roleStr, ok := role.(string); !ok || roleStr == "" {
+		errors = append(errors, ValidationError{
+			Field:   fmt.Sprintf("input[%d].role", index),
+			Message: "role must be a non-empty string",
+		})
+	} else if roleStr != "system" && roleStr != "user" && roleStr != "assistant" {
+		errors = append(errors, ValidationError{
+			Field:   fmt.Sprintf("input[%d].role", index),
+			Message: "role must be one of: system, user, assistant",
+		})
+	}
+	if len(errors) > 0 {
+		return &errors
+	}
+
+	return nil
+}
+
+// validateCreateResponseInput validates a CreateResponseInput (legacy function for backward compatibility)
+func validateCreateResponseInput(input *requesttypes.CreateResponseInput) *[]ValidationError {
+	var errors []ValidationError
+
+	// Validate type
+	if input.Type == "" {
+		errors = append(errors, ValidationError{
+			Field:   "input.type",
+			Message: "input.type is required",
+		})
+		return &errors
+	}
+
+	// Validate type-specific fields
+	switch input.Type {
+	case requesttypes.InputTypeText:
+		if input.Text == nil || *input.Text == "" {
+			errors = append(errors, ValidationError{
+				Field:   "input.text",
+				Message: "input.text is required for text type",
+			})
+		}
+	case requesttypes.InputTypeImage:
+		if input.Image == nil {
+			errors = append(errors, ValidationError{
+				Field:   "input.image",
+				Message: "input.image is required for image type",
+			})
+		} else {
+			if err := validateImageInput(input.Image); err != nil {
+				errors = append(errors, *err...)
+			}
+		}
+	case requesttypes.InputTypeFile:
+		if input.File == nil {
+			errors = append(errors, ValidationError{
+				Field:   "input.file",
+				Message: "input.file is required for file type",
+			})
+		} else {
+			if err := validateFileInput(input.File); err != nil {
+				errors = append(errors, *err...)
+			}
+		}
+	case requesttypes.InputTypeWebSearch:
+		if input.WebSearch == nil {
+			errors = append(errors, ValidationError{
+				Field:   "input.web_search",
+				Message: "input.web_search is required for web_search type",
+			})
+		} else {
+			if err := validateWebSearchInput(input.WebSearch); err != nil {
+				errors = append(errors, *err...)
+			}
+		}
+	case requesttypes.InputTypeFileSearch:
+		if input.FileSearch == nil {
+			errors = append(errors, ValidationError{
+				Field:   "input.file_search",
+				Message: "input.file_search is required for file_search type",
+			})
+		} else {
+			if err := validateFileSearchInput(input.FileSearch); err != nil {
+				errors = append(errors, *err...)
+			}
+		}
+	case requesttypes.InputTypeStreaming:
+		if input.Streaming == nil {
+			errors = append(errors, ValidationError{
+				Field:   "input.streaming",
+				Message: "input.streaming is required for streaming type",
+			})
+		} else {
+			if err := validateStreamingInput(input.Streaming); err != nil {
+				errors = append(errors, *err...)
+			}
+		}
+	case requesttypes.InputTypeFunctionCalls:
+		if input.FunctionCalls == nil {
+			errors = append(errors, ValidationError{
+				Field:   "input.function_calls",
+				Message: "input.function_calls is required for function_calls type",
+			})
+		} else {
+			if err := validateFunctionCallsInput(input.FunctionCalls); err != nil {
+				errors = append(errors, *err...)
+			}
+		}
+	case requesttypes.InputTypeReasoning:
+		if input.Reasoning == nil {
+			errors = append(errors, ValidationError{
+				Field:   "input.reasoning",
+				Message: "input.reasoning is required for reasoning type",
+			})
+		} else {
+			if err := validateReasoningInput(input.Reasoning); err != nil {
+				errors = append(errors, *err...)
+			}
+		}
+	default:
+		errors = append(errors, ValidationError{
+			Field:   "input.type",
+			Message: fmt.Sprintf("invalid input type: %s", input.Type),
+		})
+	}
+
+	if len(errors) > 0 {
+		return &errors
+	}
+
+	return nil
+}
+
+// validateImageInput validates an ImageInput
+func validateImageInput(image *requesttypes.ImageInput) *[]ValidationError {
+	var errors []ValidationError
+
+	// Either URL or data must be provided
+	if image.URL == nil && image.Data == nil {
+		errors = append(errors, ValidationError{
+			Field:   "input.image",
+			Message: "either url or data must be provided for image input",
+		})
+	}
+
+	// Both URL and data cannot be provided
+	if image.URL != nil && image.Data != nil {
+		errors = append(errors, ValidationError{
+			Field:   "input.image",
+			Message: "either url or data must be provided, not both",
+		})
+	}
+
+	// Validate URL if provided
+	if image.URL != nil && *image.URL != "" {
+		if !strings.HasPrefix(*image.URL, "http://") && !strings.HasPrefix(*image.URL, "https://") {
+			errors = append(errors, ValidationError{
+				Field:   "input.image.url",
+				Message: "url must be a valid HTTP or HTTPS URL",
+			})
+		}
+	}
+
+	// Validate data if provided
+	if image.Data != nil && *image.Data != "" {
+		if !strings.HasPrefix(*image.Data, "data:image/") {
+			errors = append(errors, ValidationError{
+				Field:   "input.image.data",
+				Message: "data must be a valid base64-encoded image with data URL format",
+			})
+		}
+	}
+
+	// Validate detail if provided
+	if image.Detail != nil {
+		if *image.Detail != "low" && *image.Detail != "high" && *image.Detail != "auto" {
+			errors = append(errors, ValidationError{
+				Field:   "input.image.detail",
+				Message: "detail must be one of: low, high, auto",
+			})
+		}
+	}
+
+	if len(errors) > 0 {
+		return &errors
+	}
+
+	return nil
+}
+
+// validateFileInput validates a FileInput
+func validateFileInput(file *requesttypes.FileInput) *[]ValidationError {
+	var errors []ValidationError
+
+	if file.FileID == "" {
+		errors = append(errors, ValidationError{
+			Field:   "input.file.file_id",
+			Message: "file_id is required",
+		})
+	}
+
+	if len(errors) > 0 {
+		return &errors
+	}
+
+	return nil
+}
+
+// validateWebSearchInput validates a WebSearchInput
+func validateWebSearchInput(webSearch *requesttypes.WebSearchInput) *[]ValidationError {
+	var errors []ValidationError
+
+	if webSearch.Query == "" {
+		errors = append(errors, ValidationError{
+			Field:   "input.web_search.query",
+			Message: "query is required",
+		})
+	}
+
+	if webSearch.MaxResults != nil {
+		if *webSearch.MaxResults < 1 || *webSearch.MaxResults > 20 {
+			errors = append(errors, ValidationError{
+				Field:   "input.web_search.max_results",
+				Message: "max_results must be between 1 and 20",
+			})
+		}
+	}
+
+	if len(errors) > 0 {
+		return &errors
+	}
+
+	return nil
+}
+
+// validateFileSearchInput validates a FileSearchInput
+func validateFileSearchInput(fileSearch *requesttypes.FileSearchInput) *[]ValidationError {
+	var errors []ValidationError
+
+	if fileSearch.Query == "" {
+		errors = append(errors, ValidationError{
+			Field:   "input.file_search.query",
+			Message: "query is required",
+		})
+	}
+
+	if len(fileSearch.FileIDs) == 0 {
+		errors = append(errors, ValidationError{
+			Field:   "input.file_search.file_ids",
+			Message: "file_ids is required and cannot be empty",
+		})
+	}
+
+	if fileSearch.MaxResults != nil {
+		if *fileSearch.MaxResults < 1 || *fileSearch.MaxResults > 20 {
+			errors = append(errors, ValidationError{
+				Field:   "input.file_search.max_results",
+				Message: "max_results must be between 1 and 20",
+			})
+		}
+	}
+
+	if len(errors) > 0 {
+		return &errors
+	}
+
+	return nil
+}
+
+// validateStreamingInput validates a StreamingInput
+func validateStreamingInput(streaming *requesttypes.StreamingInput) *[]ValidationError {
+	var errors []ValidationError
+
+	if streaming.URL == "" {
+		errors = append(errors, ValidationError{
+			Field:   "input.streaming.url",
+			Message: "url is required",
+		})
+	} else if !strings.HasPrefix(streaming.URL, "http://") && !strings.HasPrefix(streaming.URL, "https://") {
+		errors = append(errors, ValidationError{
+			Field:   "input.streaming.url",
+			Message: "url must be a valid HTTP or HTTPS URL",
+		})
+	}
+
+	if streaming.Method != nil {
+		method := strings.ToUpper(*streaming.Method)
+		if method != "GET" && method != "POST" && method != "PUT" && method != "DELETE" && method != "PATCH" {
+			errors = append(errors, ValidationError{
+				Field:   "input.streaming.method",
+				Message: "method must be one of: GET, POST, PUT, DELETE, PATCH",
+			})
+		}
+	}
+
+	if len(errors) > 0 {
+		return &errors
+	}
+
+	return nil
+}
+
+// validateFunctionCallsInput validates a FunctionCallsInput
+func validateFunctionCallsInput(functionCalls *requesttypes.FunctionCallsInput) *[]ValidationError {
+	var errors []ValidationError
+
+	if len(functionCalls.Calls) == 0 {
+		errors = append(errors, ValidationError{
+			Field:   "input.function_calls.calls",
+			Message: "calls is required and cannot be empty",
+		})
+	}
+
+	for i, call := range functionCalls.Calls {
+		if call.Name == "" {
+			errors = append(errors, ValidationError{
+				Field:   fmt.Sprintf("input.function_calls.calls[%d].name", i),
+				Message: "name is required",
+			})
+		}
+	}
+
+	if len(errors) > 0 {
+		return &errors
+	}
+
+	return nil
+}
+
+// validateReasoningInput validates a ReasoningInput
+func validateReasoningInput(reasoning *requesttypes.ReasoningInput) *[]ValidationError {
+	var errors []ValidationError
+
+	if reasoning.Task == "" {
+		errors = append(errors, ValidationError{
+			Field:   "input.reasoning.task",
+			Message: "task is required",
+		})
+	}
+
+	if len(errors) > 0 {
+		return &errors
+	}
+
+	return nil
+}
+
+// validateResponseFormat validates a ResponseFormat
+func validateResponseFormat(format *requesttypes.ResponseFormat) *[]ValidationError {
+	var errors []ValidationError
+
+	if format.Type == "" {
+		errors = append(errors, ValidationError{
+			Field:   "response_format.type",
+			Message: "type is required",
+		})
+	} else if format.Type != "text" && format.Type != "json_object" {
+		errors = append(errors, ValidationError{
+			Field:   "response_format.type",
+			Message: "type must be one of: text, json_object",
+		})
+	}
+
+	if len(errors) > 0 {
+		return &errors
+	}
+
+	return nil
+}
+
+// validateTools validates a slice of Tools
+func validateTools(tools []requesttypes.Tool) *[]ValidationError {
+	var errors []ValidationError
+
+	for i, tool := range tools {
+		if tool.Type == "" {
+			errors = append(errors, ValidationError{
+				Field:   fmt.Sprintf("tools[%d].type", i),
+				Message: "type is required",
+			})
+		} else if tool.Type != "function" {
+			errors = append(errors, ValidationError{
+				Field:   fmt.Sprintf("tools[%d].type", i),
+				Message: "type must be 'function'",
+			})
+		}
+
+		if tool.Type == "function" && tool.Function == nil {
+			errors = append(errors, ValidationError{
+				Field:   fmt.Sprintf("tools[%d].function", i),
+				Message: "function is required for function type tools",
+			})
+		}
+
+		if tool.Function != nil {
+			if tool.Function.Name == "" {
+				errors = append(errors, ValidationError{
+					Field:   fmt.Sprintf("tools[%d].function.name", i),
+					Message: "function name is required",
+				})
+			}
+		}
+	}
+
+	if len(errors) > 0 {
+		return &errors
+	}
+
+	return nil
+}
+
+// validateToolChoice validates a ToolChoice
+func validateToolChoice(choice *requesttypes.ToolChoice) *[]ValidationError {
+	var errors []ValidationError
+
+	if choice.Type == "" {
+		errors = append(errors, ValidationError{
+			Field:   "tool_choice.type",
+			Message: "type is required",
+		})
+	} else if choice.Type != "none" && choice.Type != "auto" && choice.Type != "function" {
+		errors = append(errors, ValidationError{
+			Field:   "tool_choice.type",
+			Message: "type must be one of: none, auto, function",
+		})
+	}
+
+	if choice.Type == "function" && choice.Function == nil {
+		errors = append(errors, ValidationError{
+			Field:   "tool_choice.function",
+			Message: "function is required for function type tool choice",
+		})
+	}
+
+	if choice.Function != nil {
+		if choice.Function.Name == "" {
+			errors = append(errors, ValidationError{
+				Field:   "tool_choice.function.name",
+				Message: "function name is required",
+			})
+		}
+	}
+
+	if len(errors) > 0 {
+		return &errors
+	}
+
+	return nil
+}
+
+// ValidateResponseID validates a response ID
+func ValidateResponseID(responseID string) *ValidationError {
+	if responseID == "" {
+		return &ValidationError{
+			Field:   "response_id",
+			Message: "response_id is required",
+		}
+	}
+
+	return nil
+}
diff --git a/apps/jan-api-gateway/application/app/domain/service_provider.go b/apps/jan-api-gateway/application/app/domain/service_provider.go
index 9a4668bc..e90e6720 100644
--- a/apps/jan-api-gateway/application/app/domain/service_provider.go
+++ b/apps/jan-api-gateway/application/app/domain/service_provider.go
@@ -8,6 +8,7 @@ import (
 	"menlo.ai/jan-api-gateway/app/domain/mcp/serpermcp"
 	"menlo.ai/jan-api-gateway/app/domain/organization"
 	"menlo.ai/jan-api-gateway/app/domain/project"
+	"menlo.ai/jan-api-gateway/app/domain/response"
 	"menlo.ai/jan-api-gateway/app/domain/user"
 )
 
@@ -18,5 +19,9 @@ var ServiceProvider = wire.NewSet(
 	apikey.NewService,
 	user.NewService,
 	conversation.NewService,
+	response.NewResponseService,
+	response.NewResponseModelService,
+	response.NewStreamModelService,
+	response.NewNonStreamModelService,
 	serpermcp.NewSerperService,
 )
diff --git a/apps/jan-api-gateway/application/app/infrastructure/database/dbschema/conversation.go b/apps/jan-api-gateway/application/app/infrastructure/database/dbschema/conversation.go
index 6a8c590b..40f145c3 100644
--- a/apps/jan-api-gateway/application/app/infrastructure/database/dbschema/conversation.go
+++ b/apps/jan-api-gateway/application/app/infrastructure/database/dbschema/conversation.go
@@ -2,6 +2,7 @@ package dbschema
 
 import (
 	"encoding/json"
+	"time"
 
 	"menlo.ai/jan-api-gateway/app/domain/conversation"
 	"menlo.ai/jan-api-gateway/app/infrastructure/database"
@@ -18,25 +19,27 @@ type Conversation struct {
 	PublicID  string `gorm:"type:varchar(50);uniqueIndex;not null"`
 	Title     string `gorm:"type:varchar(255)"`
 	UserID    uint   `gorm:"not null;index"`
-	Status    string `gorm:"type:varchar(20);not null;default:'active'"`
+	Status    string `gorm:"type:varchar(20);not null;default:'active';index"`
 	Metadata  string `gorm:"type:text"`
-	IsPrivate bool   `gorm:"not null;default:true"`
+	IsPrivate bool   `gorm:"not null;default:true;index"`
 	Items     []Item `gorm:"foreignKey:ConversationID"`
 	User      User   `gorm:"foreignKey:UserID"`
 }
 
 type Item struct {
 	BaseModel
-	PublicID          string       `gorm:"type:varchar(50);uniqueIndex;not null"` // OpenAI-compatible string ID
+	PublicID          string       `gorm:"type:varchar(50);uniqueIndex;not null"`
 	ConversationID    uint         `gorm:"not null;index"`
-	Type              string       `gorm:"type:varchar(50);not null"`
-	Role              string       `gorm:"type:varchar(20)"`
+	ResponseID        *uint        `gorm:"index"`
+	Type              string       `gorm:"type:varchar(50);not null;index"`
+	Role              string       `gorm:"type:varchar(20);index"`
 	Content           string       `gorm:"type:text"`
-	Status            string       `gorm:"type:varchar(50)"`
-	IncompleteAt      *int64       `gorm:"type:bigint"`
+	Status            string       `gorm:"type:varchar(50);index"`
+	IncompleteAt      *time.Time   `gorm:"type:timestamp"`
 	IncompleteDetails string       `gorm:"type:text"`
-	CompletedAt       *int64       `gorm:"type:bigint"`
+	CompletedAt       *time.Time   `gorm:"type:timestamp"`
 	Conversation      Conversation `gorm:"foreignKey:ConversationID"`
+	Response          *Response    `gorm:"foreignKey:ResponseID"`
 }
 
 func NewSchemaConversation(c *conversation.Conversation) *Conversation {
@@ -55,7 +58,7 @@ func NewSchemaConversation(c *conversation.Conversation) *Conversation {
 			ID: c.ID,
 		},
 		PublicID:  c.PublicID,
-		Title:     stringPtrToString(c.Title),
+		Title:     ptr.FromString(c.Title),
 		UserID:    c.UserID,
 		Status:    string(c.Status),
 		Metadata:  metadataJSON,
@@ -79,8 +82,8 @@ func (c *Conversation) EtoD() *conversation.Conversation {
 		Status:    conversation.ConversationStatus(c.Status),
 		Metadata:  metadata,
 		IsPrivate: c.IsPrivate,
-		CreatedAt: c.CreatedAt.Unix(),
-		UpdatedAt: c.UpdatedAt.Unix(),
+		CreatedAt: c.CreatedAt,
+		UpdatedAt: c.UpdatedAt,
 	}
 }
 
@@ -103,11 +106,13 @@ func NewSchemaItem(i *conversation.Item) *Item {
 		BaseModel: BaseModel{
 			ID: i.ID,
 		},
-		PublicID:          i.PublicID, // Add PublicID field
+		PublicID:          i.PublicID,
+		ConversationID:    i.ConversationID,
+		ResponseID:        i.ResponseID,
 		Type:              string(i.Type),
-		Role:              stringPtrToString((*string)(i.Role)),
+		Role:              string(*i.Role),
 		Content:           contentJSON,
-		Status:            stringPtrToString(i.Status),
+		Status:            string(*i.Status),
 		IncompleteAt:      i.IncompleteAt,
 		IncompleteDetails: incompleteDetailsJSON,
 		CompletedAt:       i.CompletedAt,
@@ -134,19 +139,12 @@ func (i *Item) EtoD() *conversation.Item {
 		Type:              conversation.ItemType(i.Type),
 		Role:              (*conversation.ItemRole)(ptr.ToString(i.Role)),
 		Content:           content,
-		Status:            ptr.ToString(i.Status),
+		Status:            (*conversation.ItemStatus)(ptr.ToString(i.Status)),
 		IncompleteAt:      i.IncompleteAt,
 		IncompleteDetails: incompleteDetails,
 		CompletedAt:       i.CompletedAt,
-		CreatedAt:         i.CreatedAt.Unix(), // Convert time.Time to Unix timestamp
 		ConversationID:    i.ConversationID,
+		ResponseID:        i.ResponseID,
+		CreatedAt:         i.CreatedAt,
 	}
 }
-
-// Helper functions
-func stringPtrToString(s *string) string {
-	if s == nil {
-		return ""
-	}
-	return *s
-}
diff --git a/apps/jan-api-gateway/application/app/infrastructure/database/dbschema/response.go b/apps/jan-api-gateway/application/app/infrastructure/database/dbschema/response.go
new file mode 100644
index 00000000..4907df3e
--- /dev/null
+++ b/apps/jan-api-gateway/application/app/infrastructure/database/dbschema/response.go
@@ -0,0 +1,141 @@
+package dbschema
+
+import (
+	"time"
+
+	"menlo.ai/jan-api-gateway/app/domain/response"
+	"menlo.ai/jan-api-gateway/app/infrastructure/database"
+)
+
+// Response represents the response table in the database
+type Response struct {
+	BaseModel
+	PublicID           string  `gorm:"size:255;not null;uniqueIndex"`
+	UserID             uint    `gorm:"not null;index"`
+	ConversationID     *uint   `gorm:"index"`
+	PreviousResponseID *string `gorm:"size:255;index"`
+	Model              string  `gorm:"size:255;not null;index"`
+	Status             string  `gorm:"size:50;not null;default:'pending';index"`
+	Input              string  `gorm:"type:text;not null"`
+	Output             *string `gorm:"type:text"`
+	SystemPrompt       *string `gorm:"type:text"`
+	MaxTokens          *int
+	Temperature        *float64
+	TopP               *float64
+	TopK               *int
+	RepetitionPenalty  *float64
+	Seed               *int
+	Stop               *string `gorm:"type:text"`
+	PresencePenalty    *float64
+	FrequencyPenalty   *float64
+	LogitBias          *string `gorm:"type:text"`
+	ResponseFormat     *string `gorm:"type:text"`
+	Tools              *string `gorm:"type:text"`
+	ToolChoice         *string `gorm:"type:text"`
+	Metadata           *string `gorm:"type:text"`
+	Stream             *bool
+	Background         *bool
+	Timeout            *int
+	User               *string `gorm:"size:255"`
+	Usage              *string `gorm:"type:text"`
+	Error              *string `gorm:"type:text"`
+	CompletedAt        *time.Time
+	CancelledAt        *time.Time
+	FailedAt           *time.Time
+
+	// Relationships
+	UserEntity   User          `gorm:"foreignKey:UserID;references:ID"`
+	Conversation *Conversation `gorm:"foreignKey:ConversationID;references:ID"`
+	Items        []Item        `gorm:"foreignKey:ResponseID;references:ID"`
+}
+
+// TableName returns the table name for the Response model
+func (Response) TableName() string {
+	return "responses"
+}
+
+func init() {
+	database.RegisterSchemaForAutoMigrate(Response{})
+}
+
+// NewSchemaResponse converts domain Response to database Response
+func NewSchemaResponse(r *response.Response) *Response {
+	return &Response{
+		BaseModel: BaseModel{
+			ID: r.ID,
+		},
+		PublicID:           r.PublicID,
+		UserID:             r.UserID,
+		ConversationID:     r.ConversationID,
+		PreviousResponseID: r.PreviousResponseID,
+		Model:              r.Model,
+		Status:             string(r.Status),
+		Input:              r.Input,
+		Output:             r.Output,
+		SystemPrompt:       r.SystemPrompt,
+		MaxTokens:          r.MaxTokens,
+		Temperature:        r.Temperature,
+		TopP:               r.TopP,
+		TopK:               r.TopK,
+		RepetitionPenalty:  r.RepetitionPenalty,
+		Seed:               r.Seed,
+		Stop:               r.Stop,
+		PresencePenalty:    r.PresencePenalty,
+		FrequencyPenalty:   r.FrequencyPenalty,
+		LogitBias:          r.LogitBias,
+		ResponseFormat:     r.ResponseFormat,
+		Tools:              r.Tools,
+		ToolChoice:         r.ToolChoice,
+		Metadata:           r.Metadata,
+		Stream:             r.Stream,
+		Background:         r.Background,
+		Timeout:            r.Timeout,
+		User:               r.User,
+		Usage:              r.Usage,
+		Error:              r.Error,
+		CompletedAt:        r.CompletedAt,
+		CancelledAt:        r.CancelledAt,
+		FailedAt:           r.FailedAt,
+	}
+}
+
+// EtoD converts database Response to domain Response
+func (r *Response) EtoD() *response.Response {
+	return &response.Response{
+		ID:                 r.ID,
+		PublicID:           r.PublicID,
+		UserID:             r.UserID,
+		ConversationID:     r.ConversationID,
+		PreviousResponseID: r.PreviousResponseID,
+		Model:              r.Model,
+		Status:             response.ResponseStatus(r.Status),
+		Input:              r.Input,
+		Output:             r.Output,
+		SystemPrompt:       r.SystemPrompt,
+		MaxTokens:          r.MaxTokens,
+		Temperature:        r.Temperature,
+		TopP:               r.TopP,
+		TopK:               r.TopK,
+		RepetitionPenalty:  r.RepetitionPenalty,
+		Seed:               r.Seed,
+		Stop:               r.Stop,
+		PresencePenalty:    r.PresencePenalty,
+		FrequencyPenalty:   r.FrequencyPenalty,
+		LogitBias:          r.LogitBias,
+		ResponseFormat:     r.ResponseFormat,
+		Tools:              r.Tools,
+		ToolChoice:         r.ToolChoice,
+		Metadata:           r.Metadata,
+		Stream:             r.Stream,
+		Background:         r.Background,
+		Timeout:            r.Timeout,
+		User:               r.User,
+		Usage:              r.Usage,
+		Error:              r.Error,
+		CompletedAt:        r.CompletedAt,
+		CancelledAt:        r.CancelledAt,
+		FailedAt:           r.FailedAt,
+		CreatedAt:          r.CreatedAt,
+		UpdatedAt:          r.UpdatedAt,
+	}
+}
diff --git a/apps/jan-api-gateway/application/app/infrastructure/database/gormgen/conversations.gen.go b/apps/jan-api-gateway/application/app/infrastructure/database/gormgen/conversations.gen.go
index 101cde20..fdf8fb89 100644
--- a/apps/jan-api-gateway/application/app/infrastructure/database/gormgen/conversations.gen.go
+++ b/apps/jan-api-gateway/application/app/infrastructure/database/gormgen/conversations.gen.go
@@ -85,6 +85,35 @@ func newConversation(db *gorm.DB, opts ...gen.DOOption) conversation {
 				RelationField: field.NewRelation("Items.Conversation.Items", "dbschema.Item"),
 			},
 		},
+		Response: struct {
+			field.RelationField
+			UserEntity struct {
+				field.RelationField
+			}
+			Conversation struct {
+				field.RelationField
+			}
+			Items struct {
+				field.RelationField
+			}
+		}{
+			RelationField: field.NewRelation("Items.Response", "dbschema.Response"),
+			UserEntity: struct {
+				field.RelationField
+			}{
+				RelationField: field.NewRelation("Items.Response.UserEntity", "dbschema.User"),
+			},
+			Conversation: struct {
+				field.RelationField
+			}{
+				RelationField: field.NewRelation("Items.Response.Conversation", "dbschema.Conversation"),
+			},
+			Items: struct {
+				field.RelationField
+			}{
+				RelationField: field.NewRelation("Items.Response.Items", "dbschema.Item"),
+			},
+		},
 	}
 
 	_conversation.User = conversationBelongsToUser{
@@ -207,6 +236,18 @@ type conversationHasManyItems struct {
 			field.RelationField
 		}
 	}
+	Response struct {
+		field.RelationField
+		UserEntity struct {
+			field.RelationField
+		}
+		Conversation struct {
+			field.RelationField
+		}
+		Items struct {
+			field.RelationField
+		}
+	}
 }
 
 func (a conversationHasManyItems) Where(conds ...field.Expr) *conversationHasManyItems {
diff --git a/apps/jan-api-gateway/application/app/infrastructure/database/gormgen/gen.go b/apps/jan-api-gateway/application/app/infrastructure/database/gormgen/gen.go
index 76c3b612..edfc12c0 100644
--- a/apps/jan-api-gateway/application/app/infrastructure/database/gormgen/gen.go
+++ b/apps/jan-api-gateway/application/app/infrastructure/database/gormgen/gen.go
@@ -25,6 +25,7 @@ var (
 	OrganizationMember *organizationMember
 	Project            *project
 	ProjectMember      *projectMember
+	Response           *response
 	User               *user
 )
 
@@ -38,6 +39,7 @@ func SetDefault(db *gorm.DB, opts ...gen.DOOption) {
 	OrganizationMember = &Q.OrganizationMember
 	Project = &Q.Project
 	ProjectMember = &Q.ProjectMember
+	Response = &Q.Response
 	User = &Q.User
 }
 
@@ -52,6 +54,7 @@ func Use(db *gorm.DB, opts ...gen.DOOption) *Query {
 		OrganizationMember: newOrganizationMember(db, opts...),
 		Project:            newProject(db, opts...),
 		ProjectMember:      newProjectMember(db, opts...),
+		Response:           newResponse(db, opts...),
 		User:               newUser(db, opts...),
 	}
 }
@@ -67,6 +70,7 @@ type Query struct {
 	OrganizationMember organizationMember
 	Project            project
 	ProjectMember      projectMember
+	Response           response
 	User               user
 }
 
@@ -83,6 +87,7 @@ func (q *Query) clone(db *gorm.DB) *Query {
 		OrganizationMember: q.OrganizationMember.clone(db),
 		Project:            q.Project.clone(db),
 		ProjectMember:      q.ProjectMember.clone(db),
+		Response:           q.Response.clone(db),
 		User:               q.User.clone(db),
 	}
 }
@@ -106,6 +111,7 @@ func (q *Query) ReplaceDB(db *gorm.DB) *Query {
 		OrganizationMember: q.OrganizationMember.replaceDB(db),
 		Project:            q.Project.replaceDB(db),
 		ProjectMember:      q.ProjectMember.replaceDB(db),
+		Response:           q.Response.replaceDB(db),
 		User:               q.User.replaceDB(db),
 	}
 }
@@ -119,6 +125,7 @@ type queryCtx struct {
 	OrganizationMember IOrganizationMemberDo
 	Project            IProjectDo
 	ProjectMember      IProjectMemberDo
+	Response           IResponseDo
 	User               IUserDo
 }
 
@@ -132,6 +139,7 @@ func (q *Query) WithContext(ctx context.Context) *queryCtx {
 		OrganizationMember: q.OrganizationMember.WithContext(ctx),
 		Project:            q.Project.WithContext(ctx),
 		ProjectMember:      q.ProjectMember.WithContext(ctx),
+		Response:           q.Response.WithContext(ctx),
 		User:               q.User.WithContext(ctx),
 	}
 }
diff --git a/apps/jan-api-gateway/application/app/infrastructure/database/gormgen/items.gen.go b/apps/jan-api-gateway/application/app/infrastructure/database/gormgen/items.gen.go
index 7acbf68d..0f495980 100644
--- a/apps/jan-api-gateway/application/app/infrastructure/database/gormgen/items.gen.go
+++ b/apps/jan-api-gateway/application/app/infrastructure/database/gormgen/items.gen.go
@@ -34,13 +34,14 @@ func newItem(db *gorm.DB, opts ...gen.DOOption) item {
 	_item.DeletedAt = field.NewField(tableName, "deleted_at")
 	_item.PublicID = field.NewString(tableName, "public_id")
 	_item.ConversationID = field.NewUint(tableName, "conversation_id")
+	_item.ResponseID = field.NewUint(tableName, "response_id")
 	_item.Type = field.NewString(tableName, "type")
 	_item.Role = field.NewString(tableName, "role")
 	_item.Content = field.NewString(tableName, "content")
 	_item.Status = field.NewString(tableName, "status")
-	_item.IncompleteAt = field.NewInt64(tableName, "incomplete_at")
+	_item.IncompleteAt = field.NewTime(tableName, "incomplete_at")
 	_item.IncompleteDetails = field.NewString(tableName, "incomplete_details")
-	_item.CompletedAt = field.NewInt64(tableName, "completed_at")
+	_item.CompletedAt = field.NewTime(tableName, "completed_at")
 	_item.Conversation = itemBelongsToConversation{
 		db: db.Session(&gorm.Session{}),
 
@@ -71,6 +72,18 @@ func newItem(db *gorm.DB, opts ...gen.DOOption) item {
 			Conversation struct {
 				field.RelationField
 			}
+			Response struct {
+				field.RelationField
+				UserEntity struct {
+					field.RelationField
+				}
+				Conversation struct {
+					field.RelationField
+				}
+				Items struct {
+					field.RelationField
+				}
+			}
 		}{
 			RelationField: field.NewRelation("Conversation.Items", "dbschema.Item"),
 			Conversation: struct {
@@ -78,9 +91,44 @@ func newItem(db *gorm.DB, opts ...gen.DOOption) item {
 			}{
 				RelationField: field.NewRelation("Conversation.Items.Conversation", "dbschema.Conversation"),
 			},
+			Response: struct {
+				field.RelationField
+				UserEntity struct {
+					field.RelationField
+				}
+				Conversation struct {
+					field.RelationField
+				}
+				Items struct {
+					field.RelationField
+				}
+			}{
+				RelationField: field.NewRelation("Conversation.Items.Response", "dbschema.Response"),
+				UserEntity: struct {
+					field.RelationField
+				}{
+					RelationField: field.NewRelation("Conversation.Items.Response.UserEntity", "dbschema.User"),
+				},
+				Conversation: struct {
+					field.RelationField
+				}{
+					RelationField: field.NewRelation("Conversation.Items.Response.Conversation", "dbschema.Conversation"),
+				},
+				Items: struct {
+					field.RelationField
+				}{
+					RelationField: field.NewRelation("Conversation.Items.Response.Items", "dbschema.Item"),
+				},
+			},
 		},
 	}
 
+	_item.Response = itemBelongsToResponse{
+		db: db.Session(&gorm.Session{}),
+
+		RelationField: field.NewRelation("Response", "dbschema.Response"),
+	}
+
 	_item.fillFieldMap()
 
 	return _item
@@ -96,15 +144,18 @@ type item struct {
 	DeletedAt         field.Field
 	PublicID          field.String
 	ConversationID    field.Uint
+	ResponseID        field.Uint
 	Type              field.String
 	Role              field.String
 	Content           field.String
 	Status            field.String
-	IncompleteAt      field.Int64
+	IncompleteAt      field.Time
 	IncompleteDetails field.String
-	CompletedAt       field.Int64
+	CompletedAt       field.Time
 	Conversation      itemBelongsToConversation
 
+	Response itemBelongsToResponse
+
 	fieldMap map[string]field.Expr
 }
 
@@ -126,13 +177,14 @@ func (i *item) updateTableName(table string) *item {
 	i.DeletedAt = field.NewField(table, "deleted_at")
 	i.PublicID = field.NewString(table, "public_id")
 	i.ConversationID = field.NewUint(table, "conversation_id")
+	i.ResponseID = field.NewUint(table, "response_id")
 	i.Type = field.NewString(table, "type")
 	i.Role = field.NewString(table, "role")
 	i.Content = field.NewString(table, "content")
 	i.Status = field.NewString(table, "status")
-	i.IncompleteAt = field.NewInt64(table, "incomplete_at")
+	i.IncompleteAt = field.NewTime(table, "incomplete_at")
 	i.IncompleteDetails = field.NewString(table, "incomplete_details")
-	i.CompletedAt = field.NewInt64(table, "completed_at")
+	i.CompletedAt = field.NewTime(table, "completed_at")
 
 	i.fillFieldMap()
 
@@ -149,13 +201,14 @@ func (i *item) GetFieldByName(fieldName string) (field.OrderExpr, bool) {
 }
 
 func (i *item) fillFieldMap() {
-	i.fieldMap = make(map[string]field.Expr, 14)
+	i.fieldMap = make(map[string]field.Expr, 16)
 	i.fieldMap["id"] = i.ID
 	i.fieldMap["created_at"] = i.CreatedAt
 	i.fieldMap["updated_at"] = i.UpdatedAt
 	i.fieldMap["deleted_at"] = i.DeletedAt
 	i.fieldMap["public_id"] = i.PublicID
 	i.fieldMap["conversation_id"] = i.ConversationID
+	i.fieldMap["response_id"] = i.ResponseID
 	i.fieldMap["type"] = i.Type
 	i.fieldMap["role"] = i.Role
 	i.fieldMap["content"] = i.Content
@@ -170,12 +223,15 @@ func (i item) clone(db *gorm.DB) item {
 	i.itemDo.ReplaceConnPool(db.Statement.ConnPool)
 	i.Conversation.db = db.Session(&gorm.Session{Initialized: true})
 	i.Conversation.db.Statement.ConnPool = db.Statement.ConnPool
+	i.Response.db = db.Session(&gorm.Session{Initialized: true})
+	i.Response.db.Statement.ConnPool = db.Statement.ConnPool
 	return i
 }
 
 func (i item) replaceDB(db *gorm.DB) item {
 	i.itemDo.ReplaceDB(db)
 	i.Conversation.db = db.Session(&gorm.Session{})
+	i.Response.db = db.Session(&gorm.Session{})
 	return i
 }
 
@@ -198,6 +254,18 @@ type itemBelongsToConversation struct {
 		Conversation struct {
 			field.RelationField
 		}
+		Response struct {
+			field.RelationField
+			UserEntity struct {
+				field.RelationField
+			}
+			Conversation struct {
+				field.RelationField
+			}
+			Items struct {
+				field.RelationField
+			}
+		}
 	}
 }
 
@@ -276,6 +344,87 @@ func (a itemBelongsToConversationTx) Unscoped() *itemBelongsToConversationTx {
 	return &a
 }
 
+type itemBelongsToResponse struct {
+	db *gorm.DB
+
+	field.RelationField
+}
+
+func (a itemBelongsToResponse) Where(conds ...field.Expr) *itemBelongsToResponse {
+	if len(conds) == 0 {
+		return &a
+	}
+
+	exprs := make([]clause.Expression, 0, len(conds))
+	for _, cond := range conds {
+		exprs = append(exprs, cond.BeCond().(clause.Expression))
+	}
+	a.db = a.db.Clauses(clause.Where{Exprs: exprs})
+	return &a
+}
+
+func (a itemBelongsToResponse) WithContext(ctx context.Context) *itemBelongsToResponse {
+	a.db = a.db.WithContext(ctx)
+	return &a
+}
+
+func (a itemBelongsToResponse) Session(session *gorm.Session) *itemBelongsToResponse {
+	a.db = a.db.Session(session)
+	return &a
+}
+
+func (a itemBelongsToResponse) Model(m *dbschema.Item) *itemBelongsToResponseTx {
+	return &itemBelongsToResponseTx{a.db.Model(m).Association(a.Name())}
+}
+
+func (a itemBelongsToResponse) Unscoped() *itemBelongsToResponse {
+	a.db = a.db.Unscoped()
+	return &a
+}
+
+type itemBelongsToResponseTx struct{ tx *gorm.Association }
+
+func (a itemBelongsToResponseTx) Find() (result *dbschema.Response, err error) {
+	return result, a.tx.Find(&result)
+}
+
+func (a itemBelongsToResponseTx) Append(values ...*dbschema.Response) (err error) {
+	targetValues := make([]interface{}, len(values))
+	for i, v := range values {
+		targetValues[i] = v
+	}
+	return a.tx.Append(targetValues...)
+}
+
+func (a itemBelongsToResponseTx) Replace(values ...*dbschema.Response) (err error) {
+	targetValues := make([]interface{}, len(values))
+	for i, v := range values {
+		targetValues[i] = v
+	}
+	return a.tx.Replace(targetValues...)
+}
+
+func (a itemBelongsToResponseTx) Delete(values ...*dbschema.Response) (err error) {
+	targetValues := make([]interface{}, len(values))
+	for i, v := range values {
+		targetValues[i] = v
+	}
+	return a.tx.Delete(targetValues...)
+}
+
+func (a itemBelongsToResponseTx) Clear() error {
+	return a.tx.Clear()
+}
+
+func (a itemBelongsToResponseTx) Count() int64 {
+	return a.tx.Count()
+}
+
+func (a itemBelongsToResponseTx) Unscoped() *itemBelongsToResponseTx {
+	a.tx = a.tx.Unscoped()
+	return &a
+}
+
 type itemDo struct{ gen.DO }
 
 type IItemDo interface {
diff --git a/apps/jan-api-gateway/application/app/infrastructure/database/gormgen/responses.gen.go b/apps/jan-api-gateway/application/app/infrastructure/database/gormgen/responses.gen.go
new file mode 100644
index 00000000..2f2d8f40
--- /dev/null
+++ b/apps/jan-api-gateway/application/app/infrastructure/database/gormgen/responses.gen.go
@@ -0,0 +1,898 @@
+// Code generated by gorm.io/gen. DO NOT EDIT.
+// Code generated by gorm.io/gen. DO NOT EDIT.
+// Code generated by gorm.io/gen. DO NOT EDIT.
+
+package gormgen
+
+import (
+	"context"
+	"database/sql"
+
+	"gorm.io/gorm"
+	"gorm.io/gorm/clause"
+	"gorm.io/gorm/schema"
+
+	"gorm.io/gen"
+	"gorm.io/gen/field"
+
+	"gorm.io/plugin/dbresolver"
+
+	"menlo.ai/jan-api-gateway/app/infrastructure/database/dbschema"
+)
+
+func newResponse(db *gorm.DB, opts ...gen.DOOption) response {
+	_response := response{}
+
+	_response.responseDo.UseDB(db, opts...)
+	_response.responseDo.UseModel(&dbschema.Response{})
+
+	tableName := _response.responseDo.TableName()
+	_response.ALL = field.NewAsterisk(tableName)
+	_response.ID = field.NewUint(tableName, "id")
+	_response.CreatedAt = field.NewTime(tableName, "created_at")
+	_response.UpdatedAt = field.NewTime(tableName, "updated_at")
+	_response.DeletedAt = field.NewField(tableName, "deleted_at")
+	_response.PublicID = field.NewString(tableName, "public_id")
+	_response.UserID = field.NewUint(tableName, "user_id")
+	_response.ConversationID = field.NewUint(tableName, "conversation_id")
+	_response.PreviousResponseID = field.NewString(tableName, "previous_response_id")
+	_response.Model = field.NewString(tableName, "model")
+	_response.Status = field.NewString(tableName, "status")
+	_response.Input = field.NewString(tableName, "input")
+	_response.Output = field.NewString(tableName, "output")
+	_response.SystemPrompt = field.NewString(tableName, "system_prompt")
+	_response.MaxTokens = field.NewInt(tableName, "max_tokens")
+	_response.Temperature = field.NewFloat64(tableName, "temperature")
+	_response.TopP = field.NewFloat64(tableName, "top_p")
+	_response.TopK = field.NewInt(tableName, "top_k")
+	_response.RepetitionPenalty = field.NewFloat64(tableName, "repetition_penalty")
+	_response.Seed = field.NewInt(tableName, "seed")
+	_response.Stop = field.NewString(tableName, "stop")
+	_response.PresencePenalty = field.NewFloat64(tableName, "presence_penalty")
+	_response.FrequencyPenalty = field.NewFloat64(tableName, "frequency_penalty")
+	_response.LogitBias = field.NewString(tableName, "logit_bias")
+	_response.ResponseFormat = field.NewString(tableName, "response_format")
+	_response.Tools = field.NewString(tableName, "tools")
+	_response.ToolChoice = field.NewString(tableName, "tool_choice")
+	_response.Metadata = field.NewString(tableName, "metadata")
+	_response.Stream = field.NewBool(tableName, "stream")
+	_response.Background = field.NewBool(tableName, "background")
+	_response.Timeout = field.NewInt(tableName, "timeout")
+	_response.User = field.NewString(tableName, "user")
+	_response.Usage = field.NewString(tableName, "usage")
+	_response.Error = field.NewString(tableName, "error")
+	_response.CompletedAt = field.NewTime(tableName, "completed_at")
+	_response.CancelledAt = field.NewTime(tableName, "cancelled_at")
+	_response.FailedAt = field.NewTime(tableName, "failed_at")
+	_response.Items = responseHasManyItems{
+		db: db.Session(&gorm.Session{}),
+
+		RelationField: field.NewRelation("Items", "dbschema.Item"),
+		Conversation: struct {
+			field.RelationField
+			User struct {
+				field.RelationField
+				Organizations struct {
+					field.RelationField
+				}
+				Projects struct {
+					field.RelationField
+				}
+			}
+			Items struct {
+				field.RelationField
+			}
+		}{
+			RelationField: field.NewRelation("Items.Conversation", "dbschema.Conversation"),
+			User: struct {
+				field.RelationField
+				Organizations struct {
+					field.RelationField
+				}
+				Projects struct {
+					field.RelationField
+				}
+			}{
+				RelationField: field.NewRelation("Items.Conversation.User", "dbschema.User"),
+				Organizations: struct {
+					field.RelationField
+				}{
+					RelationField: field.NewRelation("Items.Conversation.User.Organizations", "dbschema.OrganizationMember"),
+				},
+				Projects: struct {
+					field.RelationField
+				}{
+					RelationField: field.NewRelation("Items.Conversation.User.Projects", "dbschema.ProjectMember"),
+				},
+			},
+			Items: struct {
+				field.RelationField
+			}{
+				RelationField: field.NewRelation("Items.Conversation.Items", "dbschema.Item"),
+			},
+		},
+		Response: struct {
+			field.RelationField
+			UserEntity struct {
+				field.RelationField
+			}
+			Conversation struct {
+				field.RelationField
+			}
+			Items struct {
+				field.RelationField
+			}
+		}{
+			RelationField: field.NewRelation("Items.Response", "dbschema.Response"),
+			UserEntity: struct {
+				field.RelationField
+			}{
+				RelationField: field.NewRelation("Items.Response.UserEntity", "dbschema.User"),
+			},
+			Conversation: struct {
+				field.RelationField
+			}{
+				RelationField: field.NewRelation("Items.Response.Conversation", "dbschema.Conversation"),
+			},
+			Items: struct {
+				field.RelationField
+			}{
+				RelationField: field.NewRelation("Items.Response.Items", "dbschema.Item"),
+			},
+		},
+	}
+
+	_response.UserEntity = responseBelongsToUserEntity{
+		db: db.Session(&gorm.Session{}),
+
+		RelationField: field.NewRelation("UserEntity", "dbschema.User"),
+	}
+
+	_response.Conversation = responseBelongsToConversation{
+		db: db.Session(&gorm.Session{}),
+
+		RelationField: field.NewRelation("Conversation", "dbschema.Conversation"),
+	}
+
+	_response.fillFieldMap()
+
+	return _response
+}
+
+type response struct {
+	responseDo
+
+	ALL                field.Asterisk
+	ID                 field.Uint
+	CreatedAt          field.Time
+	UpdatedAt          field.Time
+	DeletedAt          field.Field
+	PublicID           field.String
+	UserID             field.Uint
+	ConversationID     field.Uint
+	PreviousResponseID field.String
+	Model              field.String
+	Status             field.String
+	Input              field.String
+	Output             field.String
+	SystemPrompt       field.String
+	MaxTokens          field.Int
+	Temperature        field.Float64
+	TopP               field.Float64
+	TopK               field.Int
+	RepetitionPenalty  field.Float64
+	Seed               field.Int
+	Stop               field.String
+	PresencePenalty    field.Float64
+	FrequencyPenalty   field.Float64
+	LogitBias          field.String
+	ResponseFormat     field.String
+	Tools              field.String
+	ToolChoice         field.String
+	Metadata           field.String
+	Stream             field.Bool
+	Background         field.Bool
+	Timeout            field.Int
+	User               field.String
+	Usage              field.String
+	Error              field.String
+	CompletedAt        field.Time
+	CancelledAt        field.Time
+	FailedAt           field.Time
+	Items              responseHasManyItems
+
+	UserEntity responseBelongsToUserEntity
+
+	Conversation responseBelongsToConversation
+
+	fieldMap map[string]field.Expr
+}
+
+func (r response) Table(newTableName string) *response {
+	r.responseDo.UseTable(newTableName)
+	return r.updateTableName(newTableName)
+}
+
+func (r response) As(alias string) *response {
+	r.responseDo.DO = *(r.responseDo.As(alias).(*gen.DO))
+	return r.updateTableName(alias)
+}
+
+func (r *response) updateTableName(table string) *response {
+	r.ALL = field.NewAsterisk(table)
+	r.ID = field.NewUint(table, "id")
+	r.CreatedAt = field.NewTime(table, "created_at")
+	r.UpdatedAt = field.NewTime(table, "updated_at")
+	r.DeletedAt = field.NewField(table, "deleted_at")
+	r.PublicID = field.NewString(table, "public_id")
+	r.UserID = field.NewUint(table, "user_id")
+	r.ConversationID = field.NewUint(table, "conversation_id")
+	r.PreviousResponseID = field.NewString(table, "previous_response_id")
+	r.Model = field.NewString(table, "model")
+	r.Status = field.NewString(table, "status")
+	r.Input = field.NewString(table, "input")
+	r.Output = field.NewString(table, "output")
+	r.SystemPrompt = field.NewString(table, "system_prompt")
+	r.MaxTokens = field.NewInt(table, "max_tokens")
+	r.Temperature = field.NewFloat64(table, "temperature")
+	r.TopP = field.NewFloat64(table, "top_p")
+	r.TopK = field.NewInt(table, "top_k")
+	r.RepetitionPenalty = field.NewFloat64(table, "repetition_penalty")
+	r.Seed = field.NewInt(table, "seed")
+	r.Stop = field.NewString(table, "stop")
+	r.PresencePenalty = field.NewFloat64(table, "presence_penalty")
+	r.FrequencyPenalty = field.NewFloat64(table, "frequency_penalty")
+	r.LogitBias = field.NewString(table, "logit_bias")
+	r.ResponseFormat = field.NewString(table, "response_format")
+	r.Tools = field.NewString(table, "tools")
+	r.ToolChoice = field.NewString(table, "tool_choice")
+	r.Metadata = field.NewString(table, "metadata")
+	r.Stream = field.NewBool(table, "stream")
+	r.Background = field.NewBool(table, "background")
+	r.Timeout = field.NewInt(table, "timeout")
+	r.User = field.NewString(table, "user")
+	r.Usage = field.NewString(table, "usage")
+	r.Error = field.NewString(table, "error")
+	r.CompletedAt = field.NewTime(table, "completed_at")
+	r.CancelledAt = field.NewTime(table, "cancelled_at")
+	r.FailedAt = field.NewTime(table, "failed_at")
+
+	r.fillFieldMap()
+
+	return r
+}
+
+func (r *response) GetFieldByName(fieldName string) (field.OrderExpr, bool) {
+	_f, ok := r.fieldMap[fieldName]
+	if !ok || _f == nil {
+		return nil, false
+	}
+	_oe, ok := _f.(field.OrderExpr)
+	return _oe, ok
+}
+
+func (r *response) fillFieldMap() {
+	r.fieldMap = make(map[string]field.Expr, 39)
+	r.fieldMap["id"] = r.ID
+	r.fieldMap["created_at"] = r.CreatedAt
+	r.fieldMap["updated_at"] = r.UpdatedAt
+	r.fieldMap["deleted_at"] = r.DeletedAt
+	r.fieldMap["public_id"] = r.PublicID
+	r.fieldMap["user_id"] = r.UserID
+	r.fieldMap["conversation_id"] = r.ConversationID
+	r.fieldMap["previous_response_id"] = r.PreviousResponseID
+	r.fieldMap["model"] = r.Model
+	r.fieldMap["status"] = r.Status
+	r.fieldMap["input"] = r.Input
+	r.fieldMap["output"] = r.Output
+	r.fieldMap["system_prompt"] = r.SystemPrompt
+	r.fieldMap["max_tokens"] = r.MaxTokens
+	r.fieldMap["temperature"] = r.Temperature
+	r.fieldMap["top_p"] = r.TopP
+	r.fieldMap["top_k"] = r.TopK
+	r.fieldMap["repetition_penalty"] = r.RepetitionPenalty
+	r.fieldMap["seed"] = r.Seed
+	r.fieldMap["stop"] = r.Stop
+	r.fieldMap["presence_penalty"] = r.PresencePenalty
+	r.fieldMap["frequency_penalty"] = r.FrequencyPenalty
+	r.fieldMap["logit_bias"] = r.LogitBias
+	r.fieldMap["response_format"] = r.ResponseFormat
+	r.fieldMap["tools"] = r.Tools
+	r.fieldMap["tool_choice"] = r.ToolChoice
+	r.fieldMap["metadata"] = r.Metadata
+	r.fieldMap["stream"] = r.Stream
+	r.fieldMap["background"] = r.Background
+	r.fieldMap["timeout"] = r.Timeout
+	r.fieldMap["user"] = r.User
+	r.fieldMap["usage"] = r.Usage
+	r.fieldMap["error"] = r.Error
+	r.fieldMap["completed_at"] = r.CompletedAt
+	r.fieldMap["cancelled_at"] = r.CancelledAt
+	r.fieldMap["failed_at"] = r.FailedAt
+
+}
+
+func (r response) clone(db *gorm.DB) response {
+	r.responseDo.ReplaceConnPool(db.Statement.ConnPool)
+	r.Items.db = db.Session(&gorm.Session{Initialized: true})
+	r.Items.db.Statement.ConnPool = db.Statement.ConnPool
+	r.UserEntity.db = db.Session(&gorm.Session{Initialized: true})
+	r.UserEntity.db.Statement.ConnPool = db.Statement.ConnPool
+	r.Conversation.db = db.Session(&gorm.Session{Initialized: true})
+	r.Conversation.db.Statement.ConnPool = db.Statement.ConnPool
+	return r
+}
+
+func (r response) replaceDB(db *gorm.DB) response {
+	r.responseDo.ReplaceDB(db)
+	r.Items.db = db.Session(&gorm.Session{})
+	r.UserEntity.db = db.Session(&gorm.Session{})
+	r.Conversation.db = db.Session(&gorm.Session{})
+	return r
+}
+
+type responseHasManyItems struct {
+	db *gorm.DB
+
+	field.RelationField
+
+	Conversation struct {
+		field.RelationField
+		User struct {
+			field.RelationField
+			Organizations struct {
+				field.RelationField
+			}
+			Projects struct {
+				field.RelationField
+			}
+		}
+		Items struct {
+			field.RelationField
+		}
+	}
+	Response struct {
+		field.RelationField
+		UserEntity struct {
+			field.RelationField
+		}
+		Conversation struct {
+			field.RelationField
+		}
+		Items struct {
+			field.RelationField
+		}
+	}
+}
+
+func (a responseHasManyItems) Where(conds ...field.Expr) *responseHasManyItems {
+	if len(conds) == 0 {
+		return &a
+	}
+
+	exprs := make([]clause.Expression, 0, len(conds))
+	for _, cond := range conds {
+		exprs = append(exprs, cond.BeCond().(clause.Expression))
+	}
+	a.db = a.db.Clauses(clause.Where{Exprs: exprs})
+	return &a
+}
+
+func (a responseHasManyItems) WithContext(ctx context.Context) *responseHasManyItems {
+	a.db = a.db.WithContext(ctx)
+	return &a
+}
+
+func (a responseHasManyItems) Session(session *gorm.Session) *responseHasManyItems {
+	a.db = a.db.Session(session)
+	return &a
+}
+
+func (a responseHasManyItems) Model(m *dbschema.Response) *responseHasManyItemsTx {
+	return &responseHasManyItemsTx{a.db.Model(m).Association(a.Name())}
+}
+
+func (a responseHasManyItems) Unscoped() *responseHasManyItems {
+	a.db = a.db.Unscoped()
+	return &a
+}
+
+type responseHasManyItemsTx struct{ tx *gorm.Association }
+
+func (a responseHasManyItemsTx) Find() (result []*dbschema.Item, err error) {
+	return result, a.tx.Find(&result)
+}
+
+func (a responseHasManyItemsTx) Append(values ...*dbschema.Item) (err error) {
+	targetValues := make([]interface{}, len(values))
+	for i, v := range values {
+		targetValues[i] = v
+	}
+	return a.tx.Append(targetValues...)
+}
+
+func (a responseHasManyItemsTx) Replace(values ...*dbschema.Item) (err error) {
+	targetValues := make([]interface{}, len(values))
+	for i, v := range values {
+		targetValues[i] = v
+	}
+	return a.tx.Replace(targetValues...)
+}
+
+func (a responseHasManyItemsTx) Delete(values ...*dbschema.Item) (err error) {
+	targetValues := make([]interface{}, len(values))
+	for i, v := range values {
+		targetValues[i] = v
+	}
+	return a.tx.Delete(targetValues...)
+}
+
+func (a responseHasManyItemsTx) Clear() error {
+	return a.tx.Clear()
+}
+
+func (a responseHasManyItemsTx) Count() int64 {
+	return a.tx.Count()
+}
+
+func (a responseHasManyItemsTx) Unscoped() *responseHasManyItemsTx {
+	a.tx = a.tx.Unscoped()
+	return &a
+}
+
+type responseBelongsToUserEntity struct {
+	db *gorm.DB
+
+	field.RelationField
+}
+
+func (a responseBelongsToUserEntity) Where(conds ...field.Expr) *responseBelongsToUserEntity {
+	if len(conds) == 0 {
+		return &a
+	}
+
+	exprs := make([]clause.Expression, 0, len(conds))
+	for _, cond := range conds {
+		exprs = append(exprs, cond.BeCond().(clause.Expression))
+	}
+	a.db = a.db.Clauses(clause.Where{Exprs: exprs})
+	return &a
+}
+
+func (a responseBelongsToUserEntity) WithContext(ctx context.Context) *responseBelongsToUserEntity {
+	a.db = a.db.WithContext(ctx)
+	return &a
+}
+
+func (a responseBelongsToUserEntity) Session(session *gorm.Session) *responseBelongsToUserEntity {
+	a.db = a.db.Session(session)
+	return &a
+}
+
+func (a responseBelongsToUserEntity) Model(m *dbschema.Response) *responseBelongsToUserEntityTx {
+	return &responseBelongsToUserEntityTx{a.db.Model(m).Association(a.Name())}
+}
+
+func (a responseBelongsToUserEntity) Unscoped() *responseBelongsToUserEntity {
+	a.db = a.db.Unscoped()
+	return &a
+}
+
+type responseBelongsToUserEntityTx struct{ tx *gorm.Association }
+
+func (a responseBelongsToUserEntityTx) Find() (result *dbschema.User, err error) {
+	return result, a.tx.Find(&result)
+}
+
+func (a responseBelongsToUserEntityTx) Append(values ...*dbschema.User) (err error) {
+	targetValues := make([]interface{}, len(values))
+	for i, v := range values {
+		targetValues[i] = v
+	}
+	return a.tx.Append(targetValues...)
+}
+
+func (a responseBelongsToUserEntityTx) Replace(values ...*dbschema.User) (err error) {
+	targetValues := make([]interface{}, len(values))
+	for i, v := range values {
+		targetValues[i] = v
+	}
+	return a.tx.Replace(targetValues...)
+}
+
+func (a responseBelongsToUserEntityTx) Delete(values ...*dbschema.User) (err error) {
+	targetValues := make([]interface{}, len(values))
+	for i, v := range values {
+		targetValues[i] = v
+	}
+	return a.tx.Delete(targetValues...)
+}
+
+func (a responseBelongsToUserEntityTx) Clear() error {
+	return a.tx.Clear()
+}
+
+func (a responseBelongsToUserEntityTx) Count() int64 {
+	return a.tx.Count()
+}
+
+func (a responseBelongsToUserEntityTx) Unscoped() *responseBelongsToUserEntityTx {
+	a.tx = a.tx.Unscoped()
+	return &a
+}
+
+type responseBelongsToConversation struct {
+	db *gorm.DB
+
+	field.RelationField
+}
+
+func (a responseBelongsToConversation) Where(conds ...field.Expr) *responseBelongsToConversation {
+	if len(conds) == 0 {
+		return &a
+	}
+
+	exprs := make([]clause.Expression, 0, len(conds))
+	for _, cond := range conds {
+		exprs = append(exprs, cond.BeCond().(clause.Expression))
+	}
+	a.db = a.db.Clauses(clause.Where{Exprs: exprs})
+	return &a
+}
+
+func (a responseBelongsToConversation) WithContext(ctx context.Context) *responseBelongsToConversation {
+	a.db = a.db.WithContext(ctx)
+	return &a
+}
+
+func (a responseBelongsToConversation) Session(session *gorm.Session) *responseBelongsToConversation {
+	a.db = a.db.Session(session)
+	return &a
+}
+
+func (a responseBelongsToConversation) Model(m *dbschema.Response) *responseBelongsToConversationTx {
+	return &responseBelongsToConversationTx{a.db.Model(m).Association(a.Name())}
+}
+
+func (a responseBelongsToConversation) Unscoped() *responseBelongsToConversation {
+	a.db = a.db.Unscoped()
+	return &a
+}
+
+type responseBelongsToConversationTx struct{ tx *gorm.Association }
+
+func (a responseBelongsToConversationTx) Find() (result *dbschema.Conversation, err error) {
+	return result, a.tx.Find(&result)
+}
+
+func (a responseBelongsToConversationTx) Append(values ...*dbschema.Conversation) (err error) {
+	targetValues := make([]interface{}, len(values))
+	for i, v := range values {
+		targetValues[i] = v
+	}
+	return a.tx.Append(targetValues...)
+}
+
+func (a responseBelongsToConversationTx) Replace(values ...*dbschema.Conversation) (err error) {
+	targetValues := make([]interface{}, len(values))
+	for i, v := range values {
+		targetValues[i] = v
+	}
+	return a.tx.Replace(targetValues...)
+}
+
+func (a responseBelongsToConversationTx) Delete(values ...*dbschema.Conversation) (err error) {
+	targetValues := make([]interface{}, len(values))
+	for i, v := range values {
+		targetValues[i] = v
+	}
+	return a.tx.Delete(targetValues...)
+}
+
+func (a responseBelongsToConversationTx) Clear() error {
+	return a.tx.Clear()
+}
+
+func (a responseBelongsToConversationTx) Count() int64 {
+	return a.tx.Count()
+}
+
+func (a responseBelongsToConversationTx) Unscoped() *responseBelongsToConversationTx {
+	a.tx = a.tx.Unscoped()
+	return &a
+}
+
+type responseDo struct{ gen.DO }
+
+type IResponseDo interface {
+	gen.SubQuery
+	Debug() IResponseDo
+	WithContext(ctx context.Context) IResponseDo
+	WithResult(fc func(tx gen.Dao)) gen.ResultInfo
+	ReplaceDB(db *gorm.DB)
+	ReadDB() IResponseDo
+	WriteDB() IResponseDo
+	As(alias string) gen.Dao
+	Session(config *gorm.Session) IResponseDo
+	Columns(cols ...field.Expr) gen.Columns
+	Clauses(conds ...clause.Expression) IResponseDo
+	Not(conds ...gen.Condition) IResponseDo
+	Or(conds ...gen.Condition) IResponseDo
+	Select(conds ...field.Expr) IResponseDo
+	Where(conds ...gen.Condition) IResponseDo
+	Order(conds ...field.Expr) IResponseDo
+	Distinct(cols ...field.Expr) IResponseDo
+	Omit(cols ...field.Expr) IResponseDo
+	Join(table schema.Tabler, on ...field.Expr) IResponseDo
+	LeftJoin(table schema.Tabler, on ...field.Expr) IResponseDo
+	RightJoin(table schema.Tabler, on ...field.Expr) IResponseDo
+	Group(cols ...field.Expr) IResponseDo
+	Having(conds ...gen.Condition) IResponseDo
+	Limit(limit int) IResponseDo
+	Offset(offset int) IResponseDo
+	Count() (count int64, err error)
+	Scopes(funcs ...func(gen.Dao) gen.Dao) IResponseDo
+	Unscoped() IResponseDo
+	Create(values ...*dbschema.Response) error
+	CreateInBatches(values []*dbschema.Response, batchSize int) error
+	Save(values ...*dbschema.Response) error
+	First() (*dbschema.Response, error)
+	Take() (*dbschema.Response, error)
+	Last() (*dbschema.Response, error)
+	Find() ([]*dbschema.Response, error)
+	FindInBatch(batchSize int, fc func(tx gen.Dao, batch int) error) (results []*dbschema.Response, err error)
+	FindInBatches(result *[]*dbschema.Response, batchSize int, fc func(tx gen.Dao, batch int) error) error
+	Pluck(column field.Expr, dest interface{}) error
+	Delete(...*dbschema.Response) (info gen.ResultInfo, err error)
+	Update(column field.Expr, value interface{}) (info gen.ResultInfo, err error)
+	UpdateSimple(columns ...field.AssignExpr) (info gen.ResultInfo, err error)
+	Updates(value interface{}) (info gen.ResultInfo, err error)
+	UpdateColumn(column field.Expr, value interface{}) (info gen.ResultInfo, err error)
+	UpdateColumnSimple(columns ...field.AssignExpr) (info gen.ResultInfo, err error)
+	UpdateColumns(value interface{}) (info gen.ResultInfo, err error)
+	UpdateFrom(q gen.SubQuery) gen.Dao
+	Attrs(attrs ...field.AssignExpr) IResponseDo
+	Assign(attrs ...field.AssignExpr) IResponseDo
+	Joins(fields ...field.RelationField) IResponseDo
+	Preload(fields ...field.RelationField) IResponseDo
+	FirstOrInit() (*dbschema.Response, error)
+	FirstOrCreate() (*dbschema.Response, error)
+	FindByPage(offset int, limit int) (result []*dbschema.Response, count int64, err error)
+	ScanByPage(result interface{}, offset int, limit int) (count int64, err error)
+	Rows() (*sql.Rows, error)
+	Row() *sql.Row
+	Scan(result interface{}) (err error)
+	Returning(value interface{}, columns ...string) IResponseDo
+	UnderlyingDB() *gorm.DB
+	schema.Tabler
+}
+
+func (r responseDo) Debug() IResponseDo {
+	return r.withDO(r.DO.Debug())
+}
+
+func (r responseDo) WithContext(ctx context.Context) IResponseDo {
+	return r.withDO(r.DO.WithContext(ctx))
+}
+
+func (r responseDo) ReadDB() IResponseDo {
+	return r.Clauses(dbresolver.Read)
+}
+
+func (r responseDo) WriteDB() IResponseDo {
+	return r.Clauses(dbresolver.Write)
+}
+
+func (r responseDo) Session(config *gorm.Session) IResponseDo {
+	return r.withDO(r.DO.Session(config))
+}
+
+func (r responseDo) Clauses(conds ...clause.Expression) IResponseDo {
+	return r.withDO(r.DO.Clauses(conds...))
+}
+
+func (r responseDo) Returning(value interface{}, columns ...string) IResponseDo {
+	return r.withDO(r.DO.Returning(value, columns...))
+}
+
+func (r responseDo) Not(conds ...gen.Condition) IResponseDo {
+	return r.withDO(r.DO.Not(conds...))
+}
+
+func (r responseDo) Or(conds ...gen.Condition) IResponseDo {
+	return r.withDO(r.DO.Or(conds...))
+}
+
+func (r responseDo) Select(conds ...field.Expr) IResponseDo {
+	return r.withDO(r.DO.Select(conds...))
+}
+
+func (r responseDo) Where(conds ...gen.Condition) IResponseDo {
+	return r.withDO(r.DO.Where(conds...))
+}
+
+func (r responseDo) Order(conds ...field.Expr) IResponseDo {
+	return r.withDO(r.DO.Order(conds...))
+}
+
+func (r responseDo) Distinct(cols ...field.Expr) IResponseDo {
+	return r.withDO(r.DO.Distinct(cols...))
+}
+
+func (r responseDo) Omit(cols ...field.Expr) IResponseDo {
+	return r.withDO(r.DO.Omit(cols...))
+}
+
+func (r responseDo) Join(table schema.Tabler, on ...field.Expr) IResponseDo {
+	return r.withDO(r.DO.Join(table, on...))
+}
+
+func (r responseDo) LeftJoin(table schema.Tabler, on ...field.Expr) IResponseDo {
+	return r.withDO(r.DO.LeftJoin(table, on...))
+}
+
+func (r responseDo) RightJoin(table schema.Tabler, on ...field.Expr) IResponseDo {
+	return r.withDO(r.DO.RightJoin(table, on...))
+}
+
+func (r responseDo) Group(cols ...field.Expr) IResponseDo {
+	return r.withDO(r.DO.Group(cols...))
+}
+
+func (r responseDo) Having(conds ...gen.Condition) IResponseDo {
+	return r.withDO(r.DO.Having(conds...))
+}
+
+func (r responseDo) Limit(limit int) IResponseDo {
+	return r.withDO(r.DO.Limit(limit))
+}
+
+func (r responseDo) Offset(offset int) IResponseDo {
+	return r.withDO(r.DO.Offset(offset))
+}
+
+func (r responseDo) Scopes(funcs ...func(gen.Dao) gen.Dao) IResponseDo {
+	return r.withDO(r.DO.Scopes(funcs...))
+}
+
+func (r responseDo) Unscoped() IResponseDo {
+	return r.withDO(r.DO.Unscoped())
+}
+
+func (r responseDo) Create(values ...*dbschema.Response) error {
+	if len(values) == 0 {
+		return nil
+	}
+	return r.DO.Create(values)
+}
+
+func (r responseDo) CreateInBatches(values []*dbschema.Response, batchSize int) error {
+	return r.DO.CreateInBatches(values, batchSize)
+}
+
+// Save : !!! underlying implementation is different with GORM
+// The method is equivalent to executing the statement: db.Clauses(clause.OnConflict{UpdateAll: true}).Create(values)
+func (r responseDo) Save(values ...*dbschema.Response) error {
+	if len(values) == 0 {
+		return nil
+	}
+	return r.DO.Save(values)
+}
+
+func (r responseDo) First() (*dbschema.Response, error) {
+	if result, err := r.DO.First(); err != nil {
+		return nil, err
+	} else {
+		return result.(*dbschema.Response), nil
+	}
+}
+
+func (r responseDo) Take() (*dbschema.Response, error) {
+	if result, err := r.DO.Take(); err != nil {
+		return nil, err
+	} else {
+		return result.(*dbschema.Response), nil
+	}
+}
+
+func (r responseDo) Last() (*dbschema.Response, error) {
+	if result, err := r.DO.Last(); err != nil {
+		return nil, err
+	} else {
+		return result.(*dbschema.Response), nil
+	}
+}
+
+func (r responseDo) Find() ([]*dbschema.Response, error) {
+	result, err := r.DO.Find()
+	return result.([]*dbschema.Response), err
+}
+
+func (r responseDo) FindInBatch(batchSize int, fc func(tx gen.Dao, batch int) error) (results []*dbschema.Response, err error) {
+	buf := make([]*dbschema.Response, 0, batchSize)
+	err = r.DO.FindInBatches(&buf, batchSize, func(tx gen.Dao, batch int) error {
+		defer func() { results = append(results, buf...) }()
+		return fc(tx, batch)
+	})
+	return results, err
+}
+
+func (r responseDo) FindInBatches(result *[]*dbschema.Response, batchSize int, fc func(tx gen.Dao, batch int) error) error {
+	return r.DO.FindInBatches(result, batchSize, fc)
+}
+
+func (r responseDo) Attrs(attrs ...field.AssignExpr) IResponseDo {
+	return r.withDO(r.DO.Attrs(attrs...))
+}
+
+func (r responseDo) Assign(attrs ...field.AssignExpr) IResponseDo {
+	return r.withDO(r.DO.Assign(attrs...))
+}
+
+func (r responseDo) Joins(fields ...field.RelationField) IResponseDo {
+	for _, _f := range fields {
+		r = *r.withDO(r.DO.Joins(_f))
+	}
+	return &r
+}
+
+func (r responseDo) Preload(fields ...field.RelationField) IResponseDo {
+	for _, _f := range fields {
+		r = *r.withDO(r.DO.Preload(_f))
+	}
+	return &r
+}
+
+func (r responseDo) FirstOrInit() (*dbschema.Response, error) {
+	if result, err := r.DO.FirstOrInit(); err != nil {
+		return nil, err
+	} else {
+		return result.(*dbschema.Response), nil
+	}
+}
+
+func (r responseDo) FirstOrCreate() (*dbschema.Response, error) {
+	if result, err := r.DO.FirstOrCreate(); err != nil {
+		return nil, err
+	} else {
+		return result.(*dbschema.Response), nil
+	}
+}
+
+func (r responseDo) FindByPage(offset int, limit int) (result []*dbschema.Response, count int64, err error) {
+	result, err = r.Offset(offset).Limit(limit).Find()
+	if err != nil {
+		return
+	}
+
+	if size := len(result); 0 < limit && 0 < size && size < limit {
+		count = int64(size + offset)
+		return
+	}
+
+	count, err = r.Offset(-1).Limit(-1).Count()
+	return
+}
+
+func (r responseDo) ScanByPage(result interface{}, offset int, limit int) (count int64, err error) {
+	count, err = r.Count()
+	if err != nil {
+		return
+	}
+
+	err = r.Offset(offset).Limit(limit).Scan(result)
+	return
+}
+
+func (r responseDo) Scan(result interface{}) (err error) {
+	return r.DO.Scan(result)
+}
+
+func (r responseDo) Delete(models ...*dbschema.Response) (result gen.ResultInfo, err error) {
+	return r.DO.Delete(models)
+}
+
+func (r *responseDo) withDO(do gen.Dao) *responseDo {
+	r.DO = *do.(*gen.DO)
+	return r
+}
diff --git a/apps/jan-api-gateway/application/app/infrastructure/database/repository/conversationrepo/conversation_repository.go b/apps/jan-api-gateway/application/app/infrastructure/database/repository/conversationrepo/conversation_repository.go
index 2905d52b..612f1f0d 100644
--- a/apps/jan-api-gateway/application/app/infrastructure/database/repository/conversationrepo/conversation_repository.go
+++ b/apps/jan-api-gateway/application/app/infrastructure/database/repository/conversationrepo/conversation_repository.go
@@ -16,6 +16,8 @@ type ConversationGormRepository struct {
 	db *transaction.Database
 }
 
+var _ domain.ConversationRepository = (*ConversationGormRepository)(nil)
+
 func NewConversationGormRepository(db *transaction.Database) domain.ConversationRepository {
 	return &ConversationGormRepository{
 		db: db,
diff --git a/apps/jan-api-gateway/application/app/infrastructure/database/repository/itemrepo/item_repository.go b/apps/jan-api-gateway/application/app/infrastructure/database/repository/itemrepo/item_repository.go
index a8733823..29a9c92b 100644
--- a/apps/jan-api-gateway/application/app/infrastructure/database/repository/itemrepo/item_repository.go
+++ b/apps/jan-api-gateway/application/app/infrastructure/database/repository/itemrepo/item_repository.go
@@ -191,5 +191,11 @@ func (repo *ItemGormRepository) applyFilter(
 	if filter.ConversationID != nil {
 		sql = sql.Where(query.Item.ConversationID.Eq(*filter.ConversationID))
 	}
+	if filter.Role != nil {
+		sql = sql.Where(query.Item.Role.Eq(string(*filter.Role)))
+	}
+	if filter.ResponseID != nil {
+		sql = sql.Where(query.Item.ResponseID.Eq(*filter.ResponseID))
+	}
 	return sql
 }
diff --git a/apps/jan-api-gateway/application/app/infrastructure/database/repository/repository_provider.go b/apps/jan-api-gateway/application/app/infrastructure/database/repository/repository_provider.go
index 03e49ffd..fd63691d 100644
--- a/apps/jan-api-gateway/application/app/infrastructure/database/repository/repository_provider.go
+++ b/apps/jan-api-gateway/application/app/infrastructure/database/repository/repository_provider.go
@@ -7,6 +7,7 @@ import (
 	"menlo.ai/jan-api-gateway/app/infrastructure/database/repository/itemrepo"
 	"menlo.ai/jan-api-gateway/app/infrastructure/database/repository/organizationrepo"
 	"menlo.ai/jan-api-gateway/app/infrastructure/database/repository/projectrepo"
+	"menlo.ai/jan-api-gateway/app/infrastructure/database/repository/responserepo"
 	"menlo.ai/jan-api-gateway/app/infrastructure/database/repository/transaction"
 	"menlo.ai/jan-api-gateway/app/infrastructure/database/repository/userrepo"
 )
@@ -18,5 +19,6 @@ var RepositoryProvider = wire.NewSet(
 	userrepo.NewUserGormRepository,
 	conversationrepo.NewConversationGormRepository,
 	itemrepo.NewItemGormRepository,
+	responserepo.NewResponseGormRepository,
 	transaction.NewDatabase,
 )
diff --git a/apps/jan-api-gateway/application/app/infrastructure/database/repository/responserepo/response_repository.go b/apps/jan-api-gateway/application/app/infrastructure/database/repository/responserepo/response_repository.go
new file mode 100644
index 00000000..20ab28fb
--- /dev/null
+++ b/apps/jan-api-gateway/application/app/infrastructure/database/repository/responserepo/response_repository.go
@@ -0,0 +1,165 @@
+package responserepo
+
+import (
+	"context"
+
+	"menlo.ai/jan-api-gateway/app/domain/query"
+	"menlo.ai/jan-api-gateway/app/domain/response"
+	"menlo.ai/jan-api-gateway/app/infrastructure/database/dbschema"
+	"menlo.ai/jan-api-gateway/app/infrastructure/database/gormgen"
+	"menlo.ai/jan-api-gateway/app/infrastructure/database/repository/transaction"
+	"menlo.ai/jan-api-gateway/app/utils/functional"
+	"menlo.ai/jan-api-gateway/app/utils/idgen"
+)
+
+type ResponseGormRepository struct {
+	db *transaction.Database
+}
+
+var _ response.ResponseRepository = (*ResponseGormRepository)(nil)
+
+func NewResponseGormRepository(db *transaction.Database) response.ResponseRepository {
+	return &ResponseGormRepository{
+		db: db,
+	}
+}
+
+// Create creates a new response in the database
+func (r *ResponseGormRepository) Create(ctx context.Context, resp *response.Response) error {
+	// Generate public ID if not provided
+	if resp.PublicID == "" {
+		id, err := idgen.GenerateSecureID("resp", 42)
+		if err != nil {
+			return err
+		}
+		resp.PublicID = id
+	}
+
+	model := dbschema.NewSchemaResponse(resp)
+	if err := r.db.GetQuery(ctx).Response.WithContext(ctx).Create(model); err != nil {
+		return err
+	}
+	resp.ID = model.ID
+	return nil
+}
+
+// Update updates an existing response in the database
+func (r *ResponseGormRepository) Update(ctx context.Context, resp *response.Response) error {
+	model := dbschema.NewSchemaResponse(resp)
+	model.ID = resp.ID
+
+	query := r.db.GetQuery(ctx)
+	_, err := query.Response.WithContext(ctx).Where(query.Response.ID.Eq(resp.ID)).Updates(model)
+	return err
+}
+
+// DeleteByID deletes a response by ID
+func (r *ResponseGormRepository) DeleteByID(ctx context.Context, id uint) error {
+	query := r.db.GetQuery(ctx)
+	_, err := query.Response.WithContext(ctx).Where(query.Response.ID.Eq(id)).Delete()
+	return err
+}
+
+// FindByID finds a response by ID
+func (r *ResponseGormRepository) FindByID(ctx context.Context, id uint) (*response.Response, error) {
+	query := r.db.GetQuery(ctx)
+	model, err := query.Response.WithContext(ctx).Where(query.Response.ID.Eq(id)).First()
+	if err != nil {
+		return nil, err
+	}
+
+	return model.EtoD(), nil
+}
+
+// FindByPublicID finds a response by public ID
+func (r *ResponseGormRepository) FindByPublicID(ctx context.Context, publicID string) (*response.Response, error) {
+	query := r.db.GetQuery(ctx)
+	model, err := query.Response.WithContext(ctx).Where(query.Response.PublicID.Eq(publicID)).First()
+	if err != nil {
+		return nil, err
+	}
+
+	return model.EtoD(), nil
+}
+
+// FindByFilter finds responses by filter criteria
+func (r *ResponseGormRepository) FindByFilter(ctx context.Context, filter response.ResponseFilter, p *query.Pagination) ([]*response.Response, error) {
+	query := r.db.GetQuery(ctx)
+	sql := query.Response.WithContext(ctx)
+	sql = r.applyFilter(query, sql, filter)
+	if p != nil {
+		if p.Limit != nil && *p.Limit > 0 {
+			sql = sql.Limit(*p.Limit)
+		}
+		if p.After != nil {
+			if p.Order == "desc" {
+				sql = sql.Where(query.Response.ID.Lt(*p.After))
+			} else {
+				sql = sql.Where(query.Response.ID.Gt(*p.After))
+			}
+		}
+		if p.Order == "desc" {
+			sql = sql.Order(query.Response.ID.Desc())
+		} else {
+			sql = sql.Order(query.Response.ID.Asc())
+		}
+	}
+	rows, err := sql.Find()
+	if err != nil {
+		return nil, err
+	}
+	result := functional.Map(rows, func(item *dbschema.Response) *response.Response {
+		return item.EtoD()
+	})
+	return result, nil
+}
+
+// Count counts responses by filter criteria
+func (r *ResponseGormRepository) Count(ctx context.Context, filter response.ResponseFilter) (int64, error) {
+	query := r.db.GetQuery(ctx)
+	q := query.Response.WithContext(ctx)
+	q = r.applyFilter(query, q, filter)
+	return q.Count()
+}
+
+// FindByUserID finds responses by user ID
+func (r *ResponseGormRepository) FindByUserID(ctx context.Context, userID uint, pagination *query.Pagination) ([]*response.Response, error) {
+	filter := response.ResponseFilter{UserID: &userID}
+	return r.FindByFilter(ctx, filter, pagination)
+}
+
+// FindByConversationID finds responses by conversation ID
+func (r *ResponseGormRepository) FindByConversationID(ctx context.Context, conversationID uint, pagination *query.Pagination) ([]*response.Response, error) {
+	filter := response.ResponseFilter{ConversationID: &conversationID}
+	return r.FindByFilter(ctx, filter, pagination)
+}
+
+// applyFilter applies conditions dynamically to the query
+func (r *ResponseGormRepository) applyFilter(
+	query *gormgen.Query,
+	sql gormgen.IResponseDo,
+	filter response.ResponseFilter,
+) gormgen.IResponseDo {
+	if filter.PublicID != nil {
+		sql = sql.Where(query.Response.PublicID.Eq(*filter.PublicID))
+	}
+	if filter.UserID != nil {
+		sql = sql.Where(query.Response.UserID.Eq(*filter.UserID))
+	}
+	if filter.ConversationID != nil {
+		sql = sql.Where(query.Response.ConversationID.Eq(*filter.ConversationID))
+	}
+	if filter.Model != nil {
+		sql = sql.Where(query.Response.Model.Eq(*filter.Model))
+	}
+	if filter.Status != nil {
+		sql = sql.Where(query.Response.Status.Eq(string(*filter.Status)))
+	}
+	if filter.CreatedAfter != nil {
+		sql = sql.Where(query.Response.CreatedAt.Gte(*filter.CreatedAfter))
+	}
+	if filter.CreatedBefore != nil {
+		sql = sql.Where(query.Response.CreatedAt.Lte(*filter.CreatedBefore))
+	}
+	return sql
+}
diff --git a/apps/jan-api-gateway/application/app/infrastructure/inference/jan_inference_provider.go b/apps/jan-api-gateway/application/app/infrastructure/inference/jan_inference_provider.go
new file mode 100644
index 00000000..fbbc640d
--- /dev/null
+++ b/apps/jan-api-gateway/application/app/infrastructure/inference/jan_inference_provider.go
@@ -0,0 +1,88 @@
+package inference
+
+import (
+	"context"
+	"io"
+
+	openai "github.com/sashabaranov/go-openai"
+	"menlo.ai/jan-api-gateway/app/domain/inference"
+	janinference "menlo.ai/jan-api-gateway/app/utils/httpclients/jan_inference"
+)
+
+// JanInferenceProvider implements InferenceProvider using Jan Inference service
+type JanInferenceProvider struct {
+	client *janinference.JanInferenceClient
+}
+
+// NewJanInferenceProvider creates a new JanInferenceProvider
+func NewJanInferenceProvider(client *janinference.JanInferenceClient) inference.InferenceProvider {
+	return &JanInferenceProvider{
+		client: client,
+	}
+}
+
+// CreateCompletion creates a non-streaming chat completion
+func (p *JanInferenceProvider) CreateCompletion(ctx context.Context, apiKey string, request openai.ChatCompletionRequest) (*openai.ChatCompletionResponse, error) {
+	return p.client.CreateChatCompletion(ctx, apiKey, request)
+}
+
+// CreateCompletionStream creates a streaming chat completion
+func (p *JanInferenceProvider) CreateCompletionStream(ctx context.Context, apiKey string, request openai.ChatCompletionRequest) (io.ReadCloser, error) {
+	// Create a pipe for streaming
+	reader, writer := io.Pipe()
+
+	go func() {
+		defer writer.Close()
+
+		// Use the existing streaming logic but write to pipe instead of HTTP response
+		req := janinference.JanInferenceRestyClient.R().SetBody(request)
+		resp, err := req.
+			SetContext(ctx).
+			SetDoNotParseResponse(true).
+			Post("/v1/chat/completions")
+		if err != nil {
+			writer.CloseWithError(err)
+			return
+		}
+		defer resp.RawResponse.Body.Close()
+
+		// Stream data to pipe
+		_, err = io.Copy(writer, resp.RawResponse.Body)
+		if err != nil {
+			writer.CloseWithError(err)
+		}
+	}()
+
+	return reader, nil
+}
+
+// GetModels returns available models
+func (p *JanInferenceProvider) GetModels(ctx context.Context) (*inference.ModelsResponse, error) {
+	response, err := p.client.GetModels(ctx)
+	if err != nil {
+		return nil, err
+	}
+
+	// Convert to domain models
+	models := make([]inference.Model, len(response.Data))
+	for i, model := range response.Data {
+		models[i] = inference.Model{
+			ID:      model.ID,
+			Object:  model.Object,
+			Created: model.Created,
+			OwnedBy: model.OwnedBy,
+		}
+	}
+
+	return &inference.ModelsResponse{
+		Object: response.Object,
+		Data:   models,
+	}, nil
+}
+
+// ValidateModel checks if a model is supported
+func (p *JanInferenceProvider) ValidateModel(model string) error {
+	// For now, assume all models are supported by Jan Inference
+	// In the future, this could check against a list of supported models
+	return nil
+}
diff --git a/apps/jan-api-gateway/application/app/infrastructure/infrastructure_provider.go b/apps/jan-api-gateway/application/app/infrastructure/infrastructure_provider.go
new file mode 100644
index 00000000..c4eeca88
--- /dev/null
+++ b/apps/jan-api-gateway/application/app/infrastructure/infrastructure_provider.go
@@ -0,0 +1,12 @@
+package infrastructure
+
+import (
+	"github.com/google/wire"
+	"menlo.ai/jan-api-gateway/app/infrastructure/inference"
+	janinference "menlo.ai/jan-api-gateway/app/utils/httpclients/jan_inference"
+)
+
+var InfrastructureProvider = wire.NewSet(
+	janinference.NewJanInferenceClient,
+	inference.NewJanInferenceProvider,
+)
diff --git a/apps/jan-api-gateway/application/app/interfaces/http/requests/response_requests.go b/apps/jan-api-gateway/application/app/interfaces/http/requests/response_requests.go
new file mode 100644
index 00000000..2aaf6df1
--- /dev/null
+++ b/apps/jan-api-gateway/application/app/interfaces/http/requests/response_requests.go
@@ -0,0 +1,244 @@
+package requests
+
+// CreateResponseRequest represents the request body for creating a response
+// Reference: https://platform.openai.com/docs/api-reference/responses/create
+type CreateResponseRequest struct {
+	// The ID of the model to use for this response.
+	Model string `json:"model" binding:"required"`
+
+	// The input to the model. Can be a string or array of strings.
+	Input any `json:"input" binding:"required"`
+
+	// The system prompt to use for this response.
+	SystemPrompt *string `json:"system_prompt,omitempty"`
+
+	// The maximum number of tokens to generate.
+	MaxTokens *int `json:"max_tokens,omitempty"`
+
+	// The temperature to use for this response.
+	Temperature *float64 `json:"temperature,omitempty"`
+
+	// The top_p to use for this response.
+	TopP *float64 `json:"top_p,omitempty"`
+
+	// The top_k to use for this response.
+	TopK *int `json:"top_k,omitempty"`
+
+	// The repetition penalty to use for this response.
+	RepetitionPenalty *float64 `json:"repetition_penalty,omitempty"`
+
+	// The seed to use for this response.
+	Seed *int `json:"seed,omitempty"`
+
+	// The stop sequences to use for this response.
+	Stop []string `json:"stop,omitempty"`
+
+	// The presence penalty to use for this response.
+	PresencePenalty *float64 `json:"presence_penalty,omitempty"`
+
+	// The frequency penalty to use for this response.
+	FrequencyPenalty *float64 `json:"frequency_penalty,omitempty"`
+
+	// The logit bias to use for this response.
+	LogitBias map[string]float64 `json:"logit_bias,omitempty"`
+
+	// The response format to use for this response.
+	ResponseFormat *ResponseFormat `json:"response_format,omitempty"`
+
+	// The tools to use for this response.
+	Tools []Tool `json:"tools,omitempty"`
+
+	// The tool choice to use for this response.
+	ToolChoice *ToolChoice `json:"tool_choice,omitempty"`
+
+	// The metadata to use for this response.
+	Metadata map[string]any `json:"metadata,omitempty"`
+
+	// Whether to stream the response.
+	Stream *bool `json:"stream,omitempty"`
+
+	// Whether to run the response in the background.
+	Background *bool `json:"background,omitempty"`
+
+	// The timeout in seconds for this response.
+	Timeout *int `json:"timeout,omitempty"`
+
+	// The user to use for this response.
+	User *string `json:"user,omitempty"`
+
+	// The conversation ID to append items to. If not set or set to ClientCreatedRootConversationID, a new conversation will be created.
+	Conversation *string `json:"conversation,omitempty"`
+
+	// The ID of the previous response to continue from. If set, the conversation will be loaded from the previous response.
+	PreviousResponseID *string `json:"previous_response_id,omitempty"`
+
+	// Whether to store the conversation. If false, no conversation will be created or used.
+	Store *bool `json:"store,omitempty"`
+}
+
+// CreateResponseInput represents the input to the model
+type CreateResponseInput struct {
+	// The type of input.
+	Type InputType `json:"type" binding:"required"`
+
+	// The text input (required for text type).
+	Text *string `json:"text,omitempty"`
+
+	// The image input (required for image type).
+	Image *ImageInput `json:"image,omitempty"`
+
+	// The file input (required for file type).
+	File *FileInput `json:"file,omitempty"`
+
+	// The web search input (required for web_search type).
+	WebSearch *WebSearchInput `json:"web_search,omitempty"`
+
+	// The file search input (required for file_search type).
+	FileSearch *FileSearchInput `json:"file_search,omitempty"`
+
+	// The streaming input (required for streaming type).
+	Streaming *StreamingInput `json:"streaming,omitempty"`
+
+	// The function calls input (required for function_calls type).
+	FunctionCalls *FunctionCallsInput `json:"function_calls,omitempty"`
+
+	// The reasoning input (required for reasoning type).
+	Reasoning *ReasoningInput `json:"reasoning,omitempty"`
+}
+
+// InputType represents the type of input
+type InputType string
+
+const (
+	InputTypeText          InputType = "text"
+	InputTypeImage         InputType = "image"
+	InputTypeFile          InputType = "file"
+	InputTypeWebSearch     InputType = "web_search"
+	InputTypeFileSearch    InputType = "file_search"
+	InputTypeStreaming     InputType = "streaming"
+	InputTypeFunctionCalls InputType = "function_calls"
+	InputTypeReasoning     InputType = "reasoning"
+)
+
+// ImageInput represents an image input
+type ImageInput struct {
+	// The URL of the image.
+	URL *string `json:"url,omitempty"`
+
+	// The base64 encoded image data.
+	Data *string `json:"data,omitempty"`
+
+	// The detail level for the image.
+	Detail *string `json:"detail,omitempty"`
+}
+
+// FileInput represents a file input
+type FileInput struct {
+	// The ID of the file.
+	FileID string `json:"file_id" binding:"required"`
+}
+
+// WebSearchInput represents a web search input
+type WebSearchInput struct {
+	// The query to search for.
+	Query string `json:"query" binding:"required"`
+
+	// The number of results to return.
+	MaxResults *int `json:"max_results,omitempty"`
+
+	// The search engine to use.
+	SearchEngine *string `json:"search_engine,omitempty"`
+}
+
+// FileSearchInput represents a file search input
+type FileSearchInput struct {
+	// The query to search for.
+	Query string `json:"query" binding:"required"`
+
+	// The IDs of the files to search in.
+	FileIDs []string `json:"file_ids" binding:"required"`
+
+	// The number of results to return.
+	MaxResults *int `json:"max_results,omitempty"`
+}
+
+// StreamingInput represents a streaming input
+type StreamingInput struct {
+	// The URL to stream from.
+	URL string `json:"url" binding:"required"`
+
+	// The headers to send with the request.
+	Headers map[string]string `json:"headers,omitempty"`
+
+	// The method to use for the request.
+	Method *string `json:"method,omitempty"`
+
+	// The body to send with the request.
+	Body *string `json:"body,omitempty"`
+}
+
+// FunctionCallsInput represents function calls input
+type FunctionCallsInput struct {
+	// The function calls to make.
+	Calls []FunctionCall `json:"calls" binding:"required"`
+}
+
+// FunctionCall represents a function call
+type FunctionCall struct {
+	// The name of the function to call.
+	Name string `json:"name" binding:"required"`
+
+	// The arguments to pass to the function.
+	Arguments map[string]any `json:"arguments,omitempty"`
+}
+
+// ReasoningInput represents a reasoning input
+type ReasoningInput struct {
+	// The reasoning task to perform.
+	Task string `json:"task" binding:"required"`
+
+	// The context for the reasoning task.
+	Context *string `json:"context,omitempty"`
+}
+
+// ResponseFormat represents the format of the response
+type ResponseFormat struct {
+	// The type of response format.
+	Type string `json:"type" binding:"required"`
+}
+
+// Tool represents a tool that can be used by the model
+type Tool struct {
+	// The type of tool.
+	Type string `json:"type" binding:"required"`
+
+	// The function definition for function tools.
+	Function *FunctionDefinition `json:"function,omitempty"`
+}
+
+// FunctionDefinition represents a function definition
+type FunctionDefinition struct {
+	// The name of the function.
+	Name string `json:"name" binding:"required"`
+
+	// The description of the function.
+	Description *string `json:"description,omitempty"`
+
+	// The parameters of the function.
+	Parameters map[string]any `json:"parameters,omitempty"`
+}
+
+// ToolChoice represents the tool choice for the model
+type ToolChoice struct {
+	// The type of tool choice.
+	Type string `json:"type" binding:"required"`
+
+	// The function to use for function tool choice.
+	Function *FunctionChoice `json:"function,omitempty"`
+}
+
+// FunctionChoice represents a function choice
+type FunctionChoice struct {
+	// The name of the function.
+	Name string `json:"name" binding:"required"`
+}
diff --git a/apps/jan-api-gateway/application/app/interfaces/http/responses/response.go b/apps/jan-api-gateway/application/app/interfaces/http/responses/response.go
index acdf7eef..a3d87b32 100644
--- a/apps/jan-api-gateway/application/app/interfaces/http/responses/response.go
+++ b/apps/jan-api-gateway/application/app/interfaces/http/responses/response.go
@@ -20,4 +20,24 @@ type ListResponse[T any] struct {
 	HasMore bool    `json:"has_more"`
 }
 
+// OpenAIListResponse includes common fields and inline embedding
+// All fields of T will be promoted to the top level of the JSON response
+// All fields except T are nullable (omitempty)
+type OpenAIListResponse[T any] struct {
+	JanStatus *string     `json:"jan_status,omitempty"`
+	Object    *ObjectType `json:"object"`
+	FirstID   *string     `json:"first_id,omitempty"`
+	LastID    *string     `json:"last_id,omitempty"`
+	HasMore   *bool       `json:"has_more,omitempty"`
+	T         []T         `json:"data,inline"` // Inline T - all fields of T will be at the top level
+}
+
+// ObjectType represents the type of object in responses
+type ObjectType string
+
+const (
+	ObjectTypeResponse ObjectType = "response"
+	ObjectTypeList     ObjectType = "list"
+)
+
 const ResponseCodeOk = "000000"
diff --git a/apps/jan-api-gateway/application/app/interfaces/http/responses/response_nonstreaming.go b/apps/jan-api-gateway/application/app/interfaces/http/responses/response_nonstreaming.go
new file mode 100644
index 00000000..718fe23c
--- /dev/null
+++ b/apps/jan-api-gateway/application/app/interfaces/http/responses/response_nonstreaming.go
@@ -0,0 +1,477 @@
+package responses
+
+import (
+	requesttypes "menlo.ai/jan-api-gateway/app/interfaces/http/requests"
+)
+
+// Response represents a model response
+// Reference: https://platform.openai.com/docs/api-reference/responses
+type Response struct {
+	// The unique identifier for the response.
+	ID string `json:"id"`
+
+	// The object type, which is always "response".
+	Object string `json:"object"`
+
+	// The Unix timestamp (in seconds) when the response was created.
+	Created int64 `json:"created"`
+
+	// The ID of the model used for this response.
+	Model string `json:"model"`
+
+	// The status of the response.
+	Status ResponseStatus `json:"status"`
+
+	// The input that was provided to the model. Can be a string or array of strings.
+	Input any `json:"input"`
+
+	// The output generated by the model.
+	Output any `json:"output,omitempty"`
+
+	// The system prompt that was used for this response.
+	SystemPrompt *string `json:"system_prompt,omitempty"`
+
+	// The maximum number of tokens that were generated.
+	MaxTokens *int `json:"max_tokens,omitempty"`
+
+	// The temperature that was used for this response.
+	Temperature *float64 `json:"temperature,omitempty"`
+
+	// The top_p that was used for this response.
+	TopP *float64 `json:"top_p,omitempty"`
+
+	// The top_k that was used for this response.
+	TopK *int `json:"top_k,omitempty"`
+
+	// The repetition penalty that was used for this response.
+	RepetitionPenalty *float64 `json:"repetition_penalty,omitempty"`
+
+	// The seed that was used for this response.
+	Seed *int `json:"seed,omitempty"`
+
+	// The stop sequences that were used for this response.
+	Stop []string `json:"stop,omitempty"`
+
+	// The presence penalty that was used for this response.
+	PresencePenalty *float64 `json:"presence_penalty,omitempty"`
+
+	// The frequency penalty that was used for this response.
+	FrequencyPenalty *float64 `json:"frequency_penalty,omitempty"`
+
+	// The logit bias that was used for this response.
+	LogitBias map[string]float64 `json:"logit_bias,omitempty"`
+
+	// The response format that was used for this response.
+	ResponseFormat *requesttypes.ResponseFormat `json:"response_format,omitempty"`
+
+	// The tools that were used for this response.
+	Tools []requesttypes.Tool `json:"tools,omitempty"`
+
+	// The tool choice that was used for this response.
+	ToolChoice *requesttypes.ToolChoice `json:"tool_choice,omitempty"`
+
+	// The metadata that was provided for this response.
+	Metadata map[string]any `json:"metadata,omitempty"`
+
+	// Whether the response was streamed.
+	Stream *bool `json:"stream,omitempty"`
+
+	// Whether the response was run in the background.
+	Background *bool `json:"background,omitempty"`
+
+	// The timeout in seconds that was used for this response.
+	Timeout *int `json:"timeout,omitempty"`
+
+	// The user that was provided for this response.
+	User *string `json:"user,omitempty"`
+
+	// The conversation that this response belongs to.
+	Conversation *ConversationInfo `json:"conversation,omitempty"`
+
+	// The usage statistics for this response.
+	Usage *DetailedUsage `json:"usage,omitempty"`
+
+	// The error that occurred during processing, if any.
+	Error *ResponseError `json:"error,omitempty"`
+
+	// The Unix timestamp (in seconds) when the response was completed.
+	CompletedAt *int64 `json:"completed_at,omitempty"`
+
+	// The Unix timestamp (in seconds) when the response was cancelled.
+	CancelledAt *int64 `json:"cancelled_at,omitempty"`
+
+	// The Unix timestamp (in seconds) when the response was failed.
+	FailedAt *int64 `json:"failed_at,omitempty"`
+
+	// OpenAI API response fields
+	IncompleteDetails  any         `json:"incomplete_details,omitempty"`
+	Instructions       any         `json:"instructions,omitempty"`
+	MaxOutputTokens    *int        `json:"max_output_tokens,omitempty"`
+	ParallelToolCalls  bool        `json:"parallel_tool_calls,omitempty"`
+	PreviousResponseID *string     `json:"previous_response_id,omitempty"`
+	Reasoning          *Reasoning  `json:"reasoning,omitempty"`
+	Store              bool        `json:"store,omitempty"`
+	Text               *TextFormat `json:"text,omitempty"`
+	Truncation         string      `json:"truncation,omitempty"`
+}
+
+// ResponseStatus represents the status of a response
+type ResponseStatus string
+
+const (
+	ResponseStatusPending   ResponseStatus = "pending"
+	ResponseStatusRunning   ResponseStatus = "running"
+	ResponseStatusCompleted ResponseStatus = "completed"
+	ResponseStatusCancelled ResponseStatus = "cancelled"
+	ResponseStatusFailed    ResponseStatus = "failed"
+)
+
+// ResponseOutput represents the output generated by the model
+type ResponseOutput struct {
+	// The type of output.
+	Type OutputType `json:"type"`
+
+	// The text output (for text type).
+	Text *TextOutput `json:"text,omitempty"`
+
+	// The image output (for image type).
+	Image *ImageOutput `json:"image,omitempty"`
+
+	// The file output (for file type).
+	File *FileOutput `json:"file,omitempty"`
+
+	// The web search output (for web_search type).
+	WebSearch *WebSearchOutput `json:"web_search,omitempty"`
+
+	// The file search output (for file_search type).
+	FileSearch *FileSearchOutput `json:"file_search,omitempty"`
+
+	// The streaming output (for streaming type).
+	Streaming *StreamingOutput `json:"streaming,omitempty"`
+
+	// The function calls output (for function_calls type).
+	FunctionCalls *FunctionCallsOutput `json:"function_calls,omitempty"`
+
+	// The reasoning output (for reasoning type).
+	Reasoning *ReasoningOutput `json:"reasoning,omitempty"`
+}
+
+// OutputType represents the type of output
+type OutputType string
+
+const (
+	OutputTypeText          OutputType = "text"
+	OutputTypeImage         OutputType = "image"
+	OutputTypeFile          OutputType = "file"
+	OutputTypeWebSearch     OutputType = "web_search"
+	OutputTypeFileSearch    OutputType = "file_search"
+	OutputTypeStreaming     OutputType = "streaming"
+	OutputTypeFunctionCalls OutputType = "function_calls"
+	OutputTypeReasoning     OutputType = "reasoning"
+)
+
+// TextOutput represents a text output
+type TextOutput struct {
+	// The generated text.
+	Value string `json:"value"`
+
+	// The annotations for the text.
+	Annotations []Annotation `json:"annotations,omitempty"`
+}
+
+// ImageOutput represents an image output
+type ImageOutput struct {
+	// The URL of the generated image.
+	URL string `json:"url"`
+
+	// The format of the image.
+	Format string `json:"format"`
+
+	// The size of the image.
+	Size *ImageSize `json:"size,omitempty"`
+}
+
+// FileOutput represents a file output
+type FileOutput struct {
+	// The ID of the generated file.
+	FileID string `json:"file_id"`
+
+	// The name of the file.
+	Name string `json:"name"`
+
+	// The size of the file in bytes.
+	Size int64 `json:"size"`
+
+	// The MIME type of the file.
+	MimeType string `json:"mime_type"`
+}
+
+// WebSearchOutput represents a web search output
+type WebSearchOutput struct {
+	// The search results.
+	Results []WebSearchResult `json:"results"`
+
+	// The search query that was used.
+	Query string `json:"query"`
+}
+
+// WebSearchResult represents a web search result
+type WebSearchResult struct {
+	// The title of the result.
+	Title string `json:"title"`
+
+	// The URL of the result.
+	URL string `json:"url"`
+
+	// The snippet of the result.
+	Snippet string `json:"snippet"`
+
+	// The source of the result.
+	Source *string `json:"source,omitempty"`
+}
+
+// FileSearchOutput represents a file search output
+type FileSearchOutput struct {
+	// The search results.
+	Results []FileSearchResult `json:"results"`
+
+	// The search query that was used.
+	Query string `json:"query"`
+}
+
+// FileSearchResult represents a file search result
+type FileSearchResult struct {
+	// The ID of the file.
+	FileID string `json:"file_id"`
+
+	// The name of the file.
+	Name string `json:"name"`
+
+	// The snippet of the result.
+	Snippet string `json:"snippet"`
+
+	// The score of the result.
+	Score float64 `json:"score"`
+}
+
+// StreamingOutput represents a streaming output
+type StreamingOutput struct {
+	// The URL that was streamed from.
+	URL string `json:"url"`
+
+	// The data that was received.
+	Data string `json:"data"`
+
+	// The status code of the response.
+	StatusCode int `json:"status_code"`
+
+	// The headers of the response.
+	Headers map[string]string `json:"headers"`
+}
+
+// FunctionCallsOutput represents function calls output
+type FunctionCallsOutput struct {
+	// The function calls that were made.
+	Calls []FunctionCallResult `json:"calls"`
+}
+
+// FunctionCallResult represents a function call result
+type FunctionCallResult struct {
+	// The name of the function that was called.
+	Name string `json:"name"`
+
+	// The arguments that were passed to the function.
+	Arguments map[string]any `json:"arguments"`
+
+	// The result of the function call.
+	Result any `json:"result"`
+
+	// The error that occurred during the function call, if any.
+	Error *string `json:"error,omitempty"`
+}
+
+// ReasoningOutput represents a reasoning output
+type ReasoningOutput struct {
+	// The reasoning task that was performed.
+	Task string `json:"task"`
+
+	// The result of the reasoning task.
+	Result string `json:"result"`
+
+	// The steps taken during reasoning.
+	Steps []ReasoningStep `json:"steps,omitempty"`
+}
+
+// ReasoningStep represents a step in reasoning
+type ReasoningStep struct {
+	// The step number.
+	Step int `json:"step"`
+
+	// The description of the step.
+	Description string `json:"description"`
+
+	// The result of the step.
+	Result string `json:"result"`
+}
+
+// Annotation represents an annotation for text
+type Annotation struct {
+	// The type of annotation.
+	Type string `json:"type"`
+
+	// The start index of the annotation.
+	StartIndex int `json:"start_index"`
+
+	// The end index of the annotation.
+	EndIndex int `json:"end_index"`
+
+	// The text of the annotation.
+	Text string `json:"text"`
+
+	// The metadata for the annotation.
+	Metadata map[string]any `json:"metadata,omitempty"`
+}
+
+// ImageSize represents the size of an image
+type ImageSize struct {
+	// The width of the image in pixels.
+	Width int `json:"width"`
+
+	// The height of the image in pixels.
+	Height int `json:"height"`
+}
+
+// Usage represents usage statistics
+type Usage struct {
+	// The number of tokens in the prompt.
+	PromptTokens int `json:"prompt_tokens"`
+
+	// The number of tokens in the completion.
+	CompletionTokens int `json:"completion_tokens"`
+
+	// The total number of tokens used.
+	TotalTokens int `json:"total_tokens"`
+}
+
+// ResponseError represents an error that occurred during processing
+type ResponseError struct {
+	// The error code.
+	Code string `json:"code"`
+
+	// The error message.
+	Message string `json:"message"`
+
+	// The error details.
+	Details map[string]any `json:"details,omitempty"`
+}
+
+// InputItem represents an input item for a response
+type InputItem struct {
+	// The unique identifier for the input item.
+	ID string `json:"id"`
+
+	// The object type, which is always "input_item".
+	Object string `json:"object"`
+
+	// The Unix timestamp (in seconds) when the input item was created.
+	Created int64 `json:"created"`
+
+	// The type of input item.
+	Type requesttypes.InputType `json:"type"`
+
+	// The text content (for text type).
+	Text *string `json:"text,omitempty"`
+
+	// The image content (for image type).
+	Image *requesttypes.ImageInput `json:"image,omitempty"`
+
+	// The file content (for file type).
+	File *requesttypes.FileInput `json:"file,omitempty"`
+
+	// The web search content (for web_search type).
+	WebSearch *requesttypes.WebSearchInput `json:"web_search,omitempty"`
+
+	// The file search content (for file_search type).
+	FileSearch *requesttypes.FileSearchInput `json:"file_search,omitempty"`
+
+	// The streaming content (for streaming type).
+	Streaming *requesttypes.StreamingInput `json:"streaming,omitempty"`
+
+	// The function calls content (for function_calls type).
+	FunctionCalls *requesttypes.FunctionCallsInput `json:"function_calls,omitempty"`
+
+	// The reasoning content (for reasoning type).
+	Reasoning *requesttypes.ReasoningInput `json:"reasoning,omitempty"`
+}
+
+// ListInputItemsResponse represents the response for listing input items
+type ListInputItemsResponse struct {
+	// The object type, which is always "list".
+	Object string `json:"object"`
+
+	// The list of input items.
+	Data []InputItem `json:"data"`
+
+	// The first ID in the list.
+	FirstID *string `json:"first_id,omitempty"`
+
+	// The last ID in the list.
+	LastID *string `json:"last_id,omitempty"`
+
+	// Whether there are more items available.
+	HasMore bool `json:"has_more"`
+}
+
+// ConversationInfo represents the conversation that a response belongs to
+type ConversationInfo struct {
+	// The unique ID of the conversation.
+	ID string `json:"id"`
+}
+
+// DetailedUsage represents detailed usage statistics with token details
+type DetailedUsage struct {
+	// The number of tokens in the prompt.
+	InputTokens int `json:"input_tokens"`
+
+	// The number of tokens in the completion.
+	OutputTokens int `json:"output_tokens"`
+
+	// The total number of tokens used.
+	TotalTokens int `json:"total_tokens"`
+
+	// Details about input tokens.
+	InputTokensDetails *TokenDetails `json:"input_tokens_details,omitempty"`
+
+	// Details about output tokens.
+	OutputTokensDetails *TokenDetails `json:"output_tokens_details,omitempty"`
+}
+
+// TokenDetails represents detailed token information
+type TokenDetails struct {
+	// The number of cached tokens.
+	CachedTokens int `json:"cached_tokens,omitempty"`
+
+	// The number of reasoning tokens.
+	ReasoningTokens int `json:"reasoning_tokens,omitempty"`
+}
+
+// Reasoning represents reasoning information
+type Reasoning struct {
+	// The effort level for reasoning.
+	Effort *string `json:"effort,omitempty"`
+
+	// The summary of reasoning.
+	Summary *string `json:"summary,omitempty"`
+}
+
+// TextFormat represents text format information
+type TextFormat struct {
+	// The format type.
+	Format *FormatType `json:"format,omitempty"`
+}
+
+// FormatType represents the type of format
+type FormatType struct {
+	// The type of format.
+	Type string `json:"type"`
+}
diff --git a/apps/jan-api-gateway/application/app/interfaces/http/responses/response_streaming.go b/apps/jan-api-gateway/application/app/interfaces/http/responses/response_streaming.go
new file mode 100644
index 00000000..897bfd3d
--- /dev/null
+++ b/apps/jan-api-gateway/application/app/interfaces/http/responses/response_streaming.go
@@ -0,0 +1,505 @@
+package responses
+
+// BaseStreamingEvent represents the base structure for all streaming events
+type BaseStreamingEvent struct {
+	// The type of event.
+	Type string `json:"type"`
+
+	// The sequence number of the event.
+	SequenceNumber int `json:"sequence_number"`
+}
+
+// ResponseCreatedEvent represents a response.created event
+type ResponseCreatedEvent struct {
+	BaseStreamingEvent
+	Response Response `json:"response"`
+}
+
+// ResponseInProgressEvent represents a response.in_progress event
+type ResponseInProgressEvent struct {
+	BaseStreamingEvent
+	Response map[string]any `json:"response"`
+}
+
+// ResponseOutputItemAddedEvent represents a response.output_item.added event
+type ResponseOutputItemAddedEvent struct {
+	BaseStreamingEvent
+	OutputIndex int                `json:"output_index"`
+	Item        ResponseOutputItem `json:"item"`
+}
+
+// ResponseContentPartAddedEvent represents a response.content_part.added event
+type ResponseContentPartAddedEvent struct {
+	BaseStreamingEvent
+	ItemID       string              `json:"item_id"`
+	OutputIndex  int                 `json:"output_index"`
+	ContentIndex int                 `json:"content_index"`
+	Part         ResponseContentPart `json:"part"`
+}
+
+// ResponseOutputTextDeltaEvent represents a response.output_text.delta event
+type ResponseOutputTextDeltaEvent struct {
+	BaseStreamingEvent
+	ItemID       string    `json:"item_id"`
+	OutputIndex  int       `json:"output_index"`
+	ContentIndex int       `json:"content_index"`
+	Delta        string    `json:"delta"`
+	Logprobs     []Logprob `json:"logprobs"`
+	Obfuscation  string    `json:"obfuscation"`
+}
+
+// ResponseOutputItem represents an output item
+type ResponseOutputItem struct {
+	ID      string                `json:"id"`
+	Type    string                `json:"type"`
+	Status  string                `json:"status"`
+	Content []ResponseContentPart `json:"content"`
+	Role    string                `json:"role"`
+}
+
+// ResponseContentPart represents a content part
+type ResponseContentPart struct {
+	Type        string       `json:"type"`
+	Annotations []Annotation `json:"annotations"`
+	Logprobs    []Logprob    `json:"logprobs"`
+	Text        string       `json:"text"`
+}
+
+// Logprob represents log probability data
+type Logprob struct {
+	Token       string       `json:"token"`
+	Logprob     float64      `json:"logprob"`
+	Bytes       []int        `json:"bytes,omitempty"`
+	TopLogprobs []TopLogprob `json:"top_logprobs,omitempty"`
+}
+
+// TopLogprob represents top log probability data
+type TopLogprob struct {
+	Token   string  `json:"token"`
+	Logprob float64 `json:"logprob"`
+	Bytes   []int   `json:"bytes,omitempty"`
+}
+
+// TextDelta represents a delta for text output (legacy)
+type TextDelta struct {
+	// The delta text.
+	Delta string `json:"delta"`
+
+	// The annotations for the delta.
+	Annotations []Annotation `json:"annotations,omitempty"`
+}
+
+// ResponseOutputTextDoneEvent represents a response.output_text.done event
+type ResponseOutputTextDoneEvent struct {
+	BaseStreamingEvent
+	ItemID       string    `json:"item_id"`
+	OutputIndex  int       `json:"output_index"`
+	ContentIndex int       `json:"content_index"`
+	Text         string    `json:"text"`
+	Logprobs     []Logprob `json:"logprobs"`
+}
+
+// ResponseContentPartDoneEvent represents a response.content_part.done event
+type ResponseContentPartDoneEvent struct {
+	BaseStreamingEvent
+	ItemID       string              `json:"item_id"`
+	OutputIndex  int                 `json:"output_index"`
+	ContentIndex int                 `json:"content_index"`
+	Part         ResponseContentPart `json:"part"`
+}
+
+// ResponseOutputItemDoneEvent represents a response.output_item.done event
+type ResponseOutputItemDoneEvent struct {
+	BaseStreamingEvent
+	OutputIndex int                `json:"output_index"`
+	Item        ResponseOutputItem `json:"item"`
+}
+
+// ResponseCompletedEvent represents a response.completed event
+type ResponseCompletedEvent struct {
+	BaseStreamingEvent
+	Response Response `json:"response"`
+}
+
+// TextCompletion represents the completion of text output
+type TextCompletion struct {
+	// The final text.
+	Value string `json:"value"`
+
+	// The annotations for the text.
+	Annotations []Annotation `json:"annotations,omitempty"`
+}
+
+// ResponseOutputImageDeltaEvent represents a response.output_image.delta event
+type ResponseOutputImageDeltaEvent struct {
+	// The type of event, always "response.output_image.delta".
+	Event string `json:"event"`
+
+	// The Unix timestamp (in seconds) when the event was created.
+	Created int64 `json:"created"`
+
+	// The ID of the response this event belongs to.
+	ResponseID string `json:"response_id"`
+
+	// The delta data.
+	Data ImageDelta `json:"data"`
+}
+
+// ImageDelta represents a delta for image output
+type ImageDelta struct {
+	// The delta image data.
+	Delta ImageOutput `json:"delta"`
+}
+
+// ResponseOutputImageDoneEvent represents a response.output_image.done event
+type ResponseOutputImageDoneEvent struct {
+	// The type of event, always "response.output_image.done".
+	Event string `json:"event"`
+
+	// The Unix timestamp (in seconds) when the event was created.
+	Created int64 `json:"created"`
+
+	// The ID of the response this event belongs to.
+	ResponseID string `json:"response_id"`
+
+	// The completion data.
+	Data ImageCompletion `json:"data"`
+}
+
+// ImageCompletion represents the completion of image output
+type ImageCompletion struct {
+	// The final image data.
+	Value ImageOutput `json:"value"`
+}
+
+// ResponseOutputFileDeltaEvent represents a response.output_file.delta event
+type ResponseOutputFileDeltaEvent struct {
+	// The type of event, always "response.output_file.delta".
+	Event string `json:"event"`
+
+	// The Unix timestamp (in seconds) when the event was created.
+	Created int64 `json:"created"`
+
+	// The ID of the response this event belongs to.
+	ResponseID string `json:"response_id"`
+
+	// The delta data.
+	Data FileDelta `json:"data"`
+}
+
+// FileDelta represents a delta for file output
+type FileDelta struct {
+	// The delta file data.
+	Delta FileOutput `json:"delta"`
+}
+
+// ResponseOutputFileDoneEvent represents a response.output_file.done event
+type ResponseOutputFileDoneEvent struct {
+	// The type of event, always "response.output_file.done".
+	Event string `json:"event"`
+
+	// The Unix timestamp (in seconds) when the event was created.
+	Created int64 `json:"created"`
+
+	// The ID of the response this event belongs to.
+	ResponseID string `json:"response_id"`
+
+	// The completion data.
+	Data FileCompletion `json:"data"`
+}
+
+// FileCompletion represents the completion of file output
+type FileCompletion struct {
+	// The final file data.
+	Value FileOutput `json:"value"`
+}
+
+// ResponseOutputWebSearchDeltaEvent represents a response.output_web_search.delta event
+type ResponseOutputWebSearchDeltaEvent struct {
+	// The type of event, always "response.output_web_search.delta".
+	Event string `json:"event"`
+
+	// The Unix timestamp (in seconds) when the event was created.
+	Created int64 `json:"created"`
+
+	// The ID of the response this event belongs to.
+	ResponseID string `json:"response_id"`
+
+	// The delta data.
+	Data WebSearchDelta `json:"data"`
+}
+
+// WebSearchDelta represents a delta for web search output
+type WebSearchDelta struct {
+	// The delta web search data.
+	Delta WebSearchOutput `json:"delta"`
+}
+
+// ResponseOutputWebSearchDoneEvent represents a response.output_web_search.done event
+type ResponseOutputWebSearchDoneEvent struct {
+	// The type of event, always "response.output_web_search.done".
+	Event string `json:"event"`
+
+	// The Unix timestamp (in seconds) when the event was created.
+	Created int64 `json:"created"`
+
+	// The ID of the response this event belongs to.
+	ResponseID string `json:"response_id"`
+
+	// The completion data.
+	Data WebSearchCompletion `json:"data"`
+}
+
+// WebSearchCompletion represents the completion of web search output
+type WebSearchCompletion struct {
+	// The final web search data.
+	Value WebSearchOutput `json:"value"`
+}
+
+// ResponseOutputFileSearchDeltaEvent represents a response.output_file_search.delta event
+type ResponseOutputFileSearchDeltaEvent struct {
+	// The type of event, always "response.output_file_search.delta".
+	Event string `json:"event"`
+
+	// The Unix timestamp (in seconds) when the event was created.
+	Created int64 `json:"created"`
+
+	// The ID of the response this event belongs to.
+	ResponseID string `json:"response_id"`
+
+	// The delta data.
+	Data FileSearchDelta `json:"data"`
+}
+
+// FileSearchDelta represents a delta for file search output
+type FileSearchDelta struct {
+	// The delta file search data.
+	Delta FileSearchOutput `json:"delta"`
+}
+
+// ResponseOutputFileSearchDoneEvent represents a response.output_file_search.done event
+type ResponseOutputFileSearchDoneEvent struct {
+	// The type of event, always "response.output_file_search.done".
+	Event string `json:"event"`
+
+	// The Unix timestamp (in seconds) when the event was created.
+	Created int64 `json:"created"`
+
+	// The ID of the response this event belongs to.
+	ResponseID string `json:"response_id"`
+
+	// The completion data.
+	Data FileSearchCompletion `json:"data"`
+}
+
+// FileSearchCompletion represents the completion of file search output
+type FileSearchCompletion struct {
+	// The final file search data.
+	Value FileSearchOutput `json:"value"`
+}
+
+// ResponseOutputStreamingDeltaEvent represents a response.output_streaming.delta event
+type ResponseOutputStreamingDeltaEvent struct {
+	// The type of event, always "response.output_streaming.delta".
+	Event string `json:"event"`
+
+	// The Unix timestamp (in seconds) when the event was created.
+	Created int64 `json:"created"`
+
+	// The ID of the response this event belongs to.
+	ResponseID string `json:"response_id"`
+
+	// The delta data.
+	Data StreamingDelta `json:"data"`
+}
+
+// StreamingDelta represents a delta for streaming output
+type StreamingDelta struct {
+	// The delta streaming data.
+	Delta StreamingOutput `json:"delta"`
+}
+
+// ResponseOutputStreamingDoneEvent represents a response.output_streaming.done event
+type ResponseOutputStreamingDoneEvent struct {
+	// The type of event, always "response.output_streaming.done".
+	Event string `json:"event"`
+
+	// The Unix timestamp (in seconds) when the event was created.
+	Created int64 `json:"created"`
+
+	// The ID of the response this event belongs to.
+	ResponseID string `json:"response_id"`
+
+	// The completion data.
+	Data StreamingCompletion `json:"data"`
+}
+
+// StreamingCompletion represents the completion of streaming output
+type StreamingCompletion struct {
+	// The final streaming data.
+	Value StreamingOutput `json:"value"`
+}
+
+// ResponseOutputFunctionCallsDeltaEvent represents a response.output_function_calls.delta event
+type ResponseOutputFunctionCallsDeltaEvent struct {
+	BaseStreamingEvent
+	ItemID       string            `json:"item_id"`
+	OutputIndex  int               `json:"output_index"`
+	ContentIndex int               `json:"content_index"`
+	Delta        FunctionCallDelta `json:"delta"`
+	Logprobs     []Logprob         `json:"logprobs"`
+}
+
+// FunctionCallDelta represents a delta for function call
+type FunctionCallDelta struct {
+	Name      string                 `json:"name"`
+	Arguments map[string]any `json:"arguments"`
+}
+
+// FunctionCallsDelta represents a delta for function calls output
+type FunctionCallsDelta struct {
+	// The delta function calls data.
+	Delta FunctionCallsOutput `json:"delta"`
+}
+
+// ResponseOutputFunctionCallsDoneEvent represents a response.output_function_calls.done event
+type ResponseOutputFunctionCallsDoneEvent struct {
+	// The type of event, always "response.output_function_calls.done".
+	Event string `json:"event"`
+
+	// The Unix timestamp (in seconds) when the event was created.
+	Created int64 `json:"created"`
+
+	// The ID of the response this event belongs to.
+	ResponseID string `json:"response_id"`
+
+	// The completion data.
+	Data FunctionCallsCompletion `json:"data"`
+}
+
+// FunctionCallsCompletion represents the completion of function calls output
+type FunctionCallsCompletion struct {
+	// The final function calls data.
+	Value FunctionCallsOutput `json:"value"`
+}
+
+// ResponseOutputReasoningDeltaEvent represents a response.output_reasoning.delta event
+type ResponseOutputReasoningDeltaEvent struct {
+	// The type of event, always "response.output_reasoning.delta".
+	Event string `json:"event"`
+
+	// The Unix timestamp (in seconds) when the event was created.
+	Created int64 `json:"created"`
+
+	// The ID of the response this event belongs to.
+	ResponseID string `json:"response_id"`
+
+	// The delta data.
+	Data ReasoningDelta `json:"data"`
+}
+
+// ReasoningDelta represents a delta for reasoning output
+type ReasoningDelta struct {
+	// The delta reasoning data.
+	Delta ReasoningOutput `json:"delta"`
+}
+
+// ResponseOutputReasoningDoneEvent represents a response.output_reasoning.done event
+type ResponseOutputReasoningDoneEvent struct {
+	// The type of event, always "response.output_reasoning.done".
+	Event string `json:"event"`
+
+	// The Unix timestamp (in seconds) when the event was created.
+	Created int64 `json:"created"`
+
+	// The ID of the response this event belongs to.
+	ResponseID string `json:"response_id"`
+
+	// The completion data.
+	Data ReasoningCompletion `json:"data"`
+}
+
+// ReasoningCompletion represents the completion of reasoning output
+type ReasoningCompletion struct {
+	// The final reasoning data.
+	Value ReasoningOutput `json:"value"`
+}
+
+// ResponseDoneEvent represents a response.done event
+type ResponseDoneEvent struct {
+	// The type of event, always "response.done".
+	Event string `json:"event"`
+
+	// The Unix timestamp (in seconds) when the event was created.
+	Created int64 `json:"created"`
+
+	// The ID of the response this event belongs to.
+	ResponseID string `json:"response_id"`
+
+	// The completion data.
+	Data ResponseCompletion `json:"data"`
+}
+
+// ResponseCompletion represents the completion of a response
+type ResponseCompletion struct {
+	// The final response data.
+	Value Response `json:"value"`
+}
+
+// ResponseErrorEvent represents a response.error event
+type ResponseErrorEvent struct {
+	// The type of event, always "response.error".
+	Event string `json:"event"`
+
+	// The Unix timestamp (in seconds) when the event was created.
+	Created int64 `json:"created"`
+
+	// The ID of the response this event belongs to.
+	ResponseID string `json:"response_id"`
+
+	// The error data.
+	Data ResponseError `json:"data"`
+}
+
+// ResponseReasoningSummaryPartAddedEvent represents a response.reasoning_summary_part.added event
+type ResponseReasoningSummaryPartAddedEvent struct {
+	BaseStreamingEvent
+	ItemID       string `json:"item_id"`
+	OutputIndex  int    `json:"output_index"`
+	SummaryIndex int    `json:"summary_index"`
+	Part         struct {
+		Type string `json:"type"`
+		Text string `json:"text"`
+	} `json:"part"`
+}
+
+// ResponseReasoningSummaryTextDeltaEvent represents a response.reasoning_summary_text.delta event
+type ResponseReasoningSummaryTextDeltaEvent struct {
+	BaseStreamingEvent
+	ItemID       string `json:"item_id"`
+	OutputIndex  int    `json:"output_index"`
+	SummaryIndex int    `json:"summary_index"`
+	Delta        string `json:"delta"`
+	Obfuscation  string `json:"obfuscation"`
+}
+
+// ResponseReasoningSummaryTextDoneEvent represents a response.reasoning_summary_text.done event
+type ResponseReasoningSummaryTextDoneEvent struct {
+	BaseStreamingEvent
+	ItemID       string `json:"item_id"`
+	OutputIndex  int    `json:"output_index"`
+	SummaryIndex int    `json:"summary_index"`
+	Text         string `json:"text"`
+}
+
+// ResponseReasoningSummaryPartDoneEvent represents a response.reasoning_summary_part.done event
+type ResponseReasoningSummaryPartDoneEvent struct {
+	BaseStreamingEvent
+	ItemID       string `json:"item_id"`
+	OutputIndex  int    `json:"output_index"`
+	SummaryIndex int    `json:"summary_index"`
+	Part         struct {
+		Type string `json:"type"`
+		Text string `json:"text"`
+	} `json:"part"`
+}
diff --git a/apps/jan-api-gateway/application/app/interfaces/http/routes/routes_provider.go b/apps/jan-api-gateway/application/app/interfaces/http/routes/routes_provider.go
index 16046c90..f7c28803 100644
--- a/apps/jan-api-gateway/application/app/interfaces/http/routes/routes_provider.go
+++ b/apps/jan-api-gateway/application/app/interfaces/http/routes/routes_provider.go
@@ -11,7 +11,8 @@ import (
 	mcp_impl "menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/mcp/mcp_impl"
 	"menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/organization"
 	"menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/organization/projects"
-	projectApikeyRoute "menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/organization/projects/api_keys"
+	api_keys "menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/organization/projects/api_keys"
+	"menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/responses"
 )
 
 var RouteProvider = wire.NewSet(
@@ -25,7 +26,8 @@ var RouteProvider = wire.NewSet(
 	chat.NewChatRoute,
 	mcp.NewMCPAPI,
 	v1.NewModelAPI,
+	responses.NewResponseRoute,
 	v1.NewV1Route,
 	conversations.NewConversationAPI,
-	projectApikeyRoute.NewProjectApiKeyRoute,
+	api_keys.NewProjectApiKeyRoute,
 )
diff --git a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/auth/auth.go b/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/auth/auth.go
index 665a5f19..e571204d 100644
--- a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/auth/auth.go
+++ b/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/auth/auth.go
@@ -52,6 +52,8 @@ type AccessTokenResponseObjectType string
 
 const AccessTokenResponseObjectTypeObject = "access.token"
 
+const AccessTokenExpirationDuration = 15 * time.Minute
+
 type AccessTokenResponse struct {
 	Object      AccessTokenResponseObjectType `json:"object"`
 	AccessToken string                        `json:"access_token"`
@@ -162,7 +164,7 @@ func (authRoute *AuthRoute) RefreshToken(reqCtx *gin.Context) {
 		userClaim.ID = user.PublicID
 	}
 
-	accessTokenExp := time.Now().Add(15 * time.Minute)
+	accessTokenExp := time.Now().Add(AccessTokenExpirationDuration)
 	accessTokenString, err := auth.CreateJwtSignedString(auth.UserClaim{
 		Email: userClaim.Email,
 		Name:  userClaim.Name,
@@ -251,7 +253,7 @@ func (authRoute *AuthRoute) GuestLogin(reqCtx *gin.Context) {
 		id = userClaim.ID
 	}
 
-	accessTokenExp := time.Now().Add(15 * time.Minute)
+	accessTokenExp := time.Now().Add(AccessTokenExpirationDuration)
 	accessTokenString, err := auth.CreateJwtSignedString(auth.UserClaim{
 		Email: email,
 		Name:  name,
diff --git a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/auth/google/google.go b/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/auth/google/google.go
index e7b0df1d..5a70b512 100644
--- a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/auth/google/google.go
+++ b/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/auth/google/google.go
@@ -4,6 +4,7 @@ import (
 	"context"
 	"crypto/rand"
 	"encoding/base64"
+	"fmt"
 	"net/http"
 	"time"
 
@@ -59,11 +60,17 @@ type GoogleCallbackRequest struct {
 	State string `json:"state"`
 }
 
+type TokenResponse struct {
+	ExpiresIn int `json:"expires_in"`
+}
+
 // @Enum(access.token)
 type AccessTokenResponseObjectType string
 
 const AccessTokenResponseObjectTypeObject = "access.token"
 
+const AccessTokenExpirationDuration = 15 * time.Minute
+
 type AccessTokenResponse struct {
 	Object      AccessTokenResponseObjectType `json:"object"`
 	AccessToken string                        `json:"access_token"`
@@ -78,6 +85,15 @@ func generateState() (string, error) {
 	return base64.URLEncoding.EncodeToString(b), nil
 }
 
+func handleGoogleToken(tokenResp TokenResponse) (time.Time, error) {
+	if tokenResp.ExpiresIn <= 0 {
+		return time.Time{}, fmt.Errorf("invalid expires_in value")
+	}
+	// Set expiration with a 10-second buffer
+	accessTokenExp := time.Now().Add(AccessTokenExpirationDuration)
+	return accessTokenExp, nil
+}
+
 // @Summary Google OAuth2 Callback
 // @Description Handles the callback from the Google OAuth2 provider to exchange the authorization code for a token, verify the user, and issue access and refresh tokens.
 // @Tags Authentication
@@ -121,6 +137,11 @@ func (googleAuthAPI *GoogleAuthAPI) HandleGoogleCallback(reqCtx *gin.Context) {
 		return
 	}
 
+	// Extract expires_in from token response
+	tokenResp := TokenResponse{
+		ExpiresIn: int(time.Until(token.Expiry).Seconds()),
+	}
+
 	rawIDToken, ok := token.Extra("id_token").(string)
 	if !ok {
 		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
@@ -161,7 +182,7 @@ func (googleAuthAPI *GoogleAuthAPI) HandleGoogleCallback(reqCtx *gin.Context) {
 		return
 	}
 	if exists == nil {
-		exists, err = googleAuthAPI.authService.RegisterUser(reqCtx.Request.Context(), &user.User{
+		exists, err = googleAuthAPI.userService.RegisterUser(reqCtx.Request.Context(), &user.User{
 			Name:    claims.Name,
 			Email:   claims.Email,
 			Enabled: true,
@@ -175,7 +196,17 @@ func (googleAuthAPI *GoogleAuthAPI) HandleGoogleCallback(reqCtx *gin.Context) {
 		}
 	}
 
-	accessTokenExp := time.Now().Add(15 * time.Minute)
+	// Use handleGoogleToken to calculate expiration with buffer
+	// Instead of hardcoded 15 minutes, the access token now uses the actual expiration time from Google's token response
+	accessTokenExp, err := handleGoogleToken(tokenResp)
+	if err != nil {
+		reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
+			Code:  "c6d6bafd-b9f3-4ebb-9c90-a21b07308ebc",
+			Error: err.Error(),
+		})
+		return
+	}
+
 	accessTokenString, err := auth.CreateJwtSignedString(auth.UserClaim{
 		Email: exists.Email,
 		Name:  exists.Name,
diff --git a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/chat/chat_route.go b/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/chat/chat_route.go
index 9526f530..47931f79 100644
--- a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/chat/chat_route.go
+++ b/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/chat/chat_route.go
@@ -1,20 +1,29 @@
 package chat
 
-import "github.com/gin-gonic/gin"
+import (
+	"github.com/gin-gonic/gin"
+	"menlo.ai/jan-api-gateway/app/domain/auth"
+)
 
 type ChatRoute struct {
 	completionAPI *CompletionAPI
+	authService   *auth.AuthService
 }
 
 func NewChatRoute(
 	completionAPI *CompletionAPI,
+	authService *auth.AuthService,
 ) *ChatRoute {
 	return &ChatRoute{
-		completionAPI,
+		completionAPI: completionAPI,
+		authService:   authService,
 	}
 }
 
 func (chatRoute *ChatRoute) RegisterRouter(router gin.IRouter) {
-	chatRouter := router.Group("/chat")
+	chatRouter := router.Group("/chat",
+		chatRoute.authService.AppUserAuthMiddleware(),
+		chatRoute.authService.RegisteredUserMiddleware(),
+	)
 	chatRoute.completionAPI.RegisterRouter(chatRouter)
 }
diff --git a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/chat/completion_nonstream_handler.go b/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/chat/completion_nonstream_handler.go
new file mode 100644
index 00000000..0c2b961c
--- /dev/null
+++ b/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/chat/completion_nonstream_handler.go
@@ -0,0 +1,229 @@
+package chat
+
+import (
+	"context"
+
+	openai "github.com/sashabaranov/go-openai"
+	"menlo.ai/jan-api-gateway/app/domain/common"
+	"menlo.ai/jan-api-gateway/app/domain/conversation"
+	"menlo.ai/jan-api-gateway/app/domain/inference"
+)
+
+// CompletionNonStreamHandler handles non-streaming completion business logic
+type CompletionNonStreamHandler struct {
+	inferenceProvider   inference.InferenceProvider
+	conversationService *conversation.ConversationService
+}
+
+// NewCompletionNonStreamHandler creates a new CompletionNonStreamHandler instance
+func NewCompletionNonStreamHandler(inferenceProvider inference.InferenceProvider, conversationService *conversation.ConversationService) *CompletionNonStreamHandler {
+	return &CompletionNonStreamHandler{
+		inferenceProvider:   inferenceProvider,
+		conversationService: conversationService,
+	}
+}
+
+// CreateCompletion creates a non-streaming completion
+func (uc *CompletionNonStreamHandler) CreateCompletion(ctx context.Context, apiKey string, request openai.ChatCompletionRequest) (*CompletionResponse, *common.Error) {
+
+	// Call inference provider
+	response, err := uc.inferenceProvider.CreateCompletion(ctx, apiKey, request)
+	if err != nil {
+		return nil, common.NewError(err, "c7d8e9f0-g1h2-3456-cdef-789012345678")
+	}
+
+	// Convert response
+	return uc.convertResponse(response), nil
+}
+
+// convertResponse converts OpenAI response to our domain response
+func (uc *CompletionNonStreamHandler) convertResponse(response *openai.ChatCompletionResponse) *CompletionResponse {
+	choices := make([]CompletionChoice, len(response.Choices))
+	for i, choice := range response.Choices {
+		choices[i] = CompletionChoice{
+			Index: choice.Index,
+			Message: CompletionMessage{
+				Role:    choice.Message.Role,
+				Content: choice.Message.Content,
+			},
+			FinishReason: string(choice.FinishReason),
+		}
+	}
+
+	return &CompletionResponse{
+		ID:      response.ID,
+		Object:  response.Object,
+		Created: response.Created,
+		Model:   response.Model,
+		Choices: choices,
+		Usage: Usage{
+			PromptTokens:     response.Usage.PromptTokens,
+			CompletionTokens: response.Usage.CompletionTokens,
+			TotalTokens:      response.Usage.TotalTokens,
+		},
+	}
+}
+
+// SaveMessagesToConversation saves all messages from the completion request to the conversation
+func (c *CompletionNonStreamHandler) SaveMessagesToConversation(ctx context.Context, conv *conversation.Conversation, userID uint, messages []openai.ChatCompletionMessage) *common.Error {
+	_, err := c.saveMessagesToConversationWithAssistant(ctx, conv, userID, messages, "")
+	return err
+}
+
+// SaveMessagesToConversationWithAssistant saves all messages including the assistant response and returns the assistant item
+func (c *CompletionNonStreamHandler) SaveMessagesToConversationWithAssistant(ctx context.Context, conv *conversation.Conversation, userID uint, messages []openai.ChatCompletionMessage, assistantContent string) (*conversation.Item, *common.Error) {
+	return c.saveMessagesToConversationWithAssistant(ctx, conv, userID, messages, assistantContent)
+}
+
+// saveMessagesToConversationWithAssistant internal method that saves messages and optionally the assistant response
+func (c *CompletionNonStreamHandler) saveMessagesToConversationWithAssistant(ctx context.Context, conv *conversation.Conversation, userID uint, messages []openai.ChatCompletionMessage, assistantContent string) (*conversation.Item, *common.Error) {
+	if conv == nil {
+		return nil, nil // No conversation to save to
+	}
+
+	var assistantItem *conversation.Item
+
+	// Convert OpenAI messages to conversation items
+	for _, msg := range messages {
+		// Convert role
+		var role conversation.ItemRole
+		switch msg.Role {
+		case openai.ChatMessageRoleSystem:
+			role = conversation.ItemRoleSystem
+		case openai.ChatMessageRoleUser:
+			role = conversation.ItemRoleUser
+		case openai.ChatMessageRoleAssistant:
+			role = conversation.ItemRoleAssistant
+		default:
+			role = conversation.ItemRoleUser
+		}
+
+		// Convert content
+		content := make([]conversation.Content, 0, len(msg.MultiContent))
+		for _, contentPart := range msg.MultiContent {
+			if contentPart.Type == openai.ChatMessagePartTypeText {
+				content = append(content, conversation.Content{
+					Type: "text",
+					Text: &conversation.Text{
+						Value: contentPart.Text,
+					},
+				})
+			}
+		}
+
+		// If no multi-content, use simple text content
+		if len(content) == 0 && msg.Content != "" {
+			content = append(content, conversation.Content{
+				Type: "text",
+				Text: &conversation.Text{
+					Value: msg.Content,
+				},
+			})
+		}
+
+		// Add item to conversation
+		item, err := c.conversationService.AddItem(ctx, conv, userID, conversation.ItemTypeMessage, &role, content)
+		if err != nil {
+			return nil, common.NewError(err, "b2c3d4e5-f6g7-8901-bcde-f23456789012")
+		}
+
+		// If this is an assistant message, store it for return
+		if msg.Role == openai.ChatMessageRoleAssistant {
+			assistantItem = item
+		}
+	}
+
+	// If assistant content is provided and no assistant message was found in the input, create one
+	if assistantContent != "" && assistantItem == nil {
+		content := []conversation.Content{
+			{
+				Type: "text",
+				Text: &conversation.Text{
+					Value: assistantContent,
+				},
+			},
+		}
+
+		assistantRole := conversation.ItemRoleAssistant
+		item, err := c.conversationService.AddItem(ctx, conv, userID, conversation.ItemTypeMessage, &assistantRole, content)
+		if err != nil {
+			return nil, common.NewError(err, "c3d4e5f6-g7h8-9012-cdef-345678901234")
+		}
+		assistantItem = item
+	}
+
+	return assistantItem, nil
+}
+
+// CompletionResponse represents the response from chat completion
+type CompletionResponse struct {
+	ID       string                 `json:"id"`
+	Object   string                 `json:"object"`
+	Created  int64                  `json:"created"`
+	Model    string                 `json:"model"`
+	Choices  []CompletionChoice     `json:"choices"`
+	Usage    Usage                  `json:"usage"`
+	Metadata map[string]interface{} `json:"metadata,omitempty"`
+}
+
+type CompletionChoice struct {
+	Index        int               `json:"index"`
+	Message      CompletionMessage `json:"message"`
+	FinishReason string            `json:"finish_reason"`
+}
+
+type CompletionMessage struct {
+	Role    string `json:"role"`
+	Content string `json:"content"`
+}
+
+type Usage struct {
+	PromptTokens     int `json:"prompt_tokens"`
+	CompletionTokens int `json:"completion_tokens"`
+	TotalTokens      int `json:"total_tokens"`
+}
+
+// ModifyCompletionResponse modifies the completion response to include item ID and metadata
+func (uc *CompletionNonStreamHandler) ModifyCompletionResponse(response *CompletionResponse, conv *conversation.Conversation, conversationCreated bool, assistantItem *conversation.Item) *CompletionResponse {
+	// Create modified response
+	modifiedResponse := &CompletionResponse{
+		ID:      response.ID, // Default to original ID
+		Object:  response.Object,
+		Created: response.Created,
+		Model:   response.Model,
+		Choices: make([]CompletionChoice, len(response.Choices)),
+		Usage: Usage{
+			PromptTokens:     response.Usage.PromptTokens,
+			CompletionTokens: response.Usage.CompletionTokens,
+			TotalTokens:      response.Usage.TotalTokens,
+		},
+	}
+
+	// Copy choices
+	for i, choice := range response.Choices {
+		modifiedResponse.Choices[i] = CompletionChoice{
+			Index: choice.Index,
+			Message: CompletionMessage{
+				Role:    choice.Message.Role,
+				Content: choice.Message.Content,
+			},
+			FinishReason: choice.FinishReason,
+		}
+	}
+
+	// Replace ID with item ID if assistant item exists
+	if assistantItem != nil {
+		modifiedResponse.ID = assistantItem.PublicID
+	}
+
+	// Add metadata if conversation exists
+	if conv != nil {
+		modifiedResponse.Metadata = map[string]interface{}{
+			"conversation_id":      conv.PublicID,
+			"conversation_created": conversationCreated,
+			"conversation_title":   conv.Title,
+		}
+	}
+
+	return modifiedResponse
+}
diff --git a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/chat/completion_route.go b/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/chat/completion_route.go
new file mode 100644
index 00000000..e04c93cc
--- /dev/null
+++ b/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/chat/completion_route.go
@@ -0,0 +1,253 @@
+package chat
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"net/http"
+	"strings"
+
+	"github.com/gin-gonic/gin"
+	openai "github.com/sashabaranov/go-openai"
+	"menlo.ai/jan-api-gateway/app/domain/auth"
+	"menlo.ai/jan-api-gateway/app/domain/common"
+	"menlo.ai/jan-api-gateway/app/domain/conversation"
+	"menlo.ai/jan-api-gateway/app/interfaces/http/responses"
+)
+
+type CompletionAPI struct {
+	completionNonStreamHandler *CompletionNonStreamHandler
+	completionStreamHandler    *CompletionStreamHandler
+	conversationService        *conversation.ConversationService
+	authService                *auth.AuthService
+}
+
+func NewCompletionAPI(completionNonStreamHandler *CompletionNonStreamHandler, completionStreamHandler *CompletionStreamHandler, conversationService *conversation.ConversationService, authService *auth.AuthService) *CompletionAPI {
+	return &CompletionAPI{
+		completionNonStreamHandler: completionNonStreamHandler,
+		completionStreamHandler:    completionStreamHandler,
+		conversationService:        conversationService,
+		authService:                authService,
+	}
+}
+
+func (completionAPI *CompletionAPI) RegisterRouter(router *gin.RouterGroup) {
+	router.POST("/completions", completionAPI.PostCompletion)
+}
+
+// ExtendedChatCompletionRequest extends OpenAI's request with conversation field
+type ExtendedChatCompletionRequest struct {
+	openai.ChatCompletionRequest
+	Conversation string `json:"conversation,omitempty"`
+}
+
+// CreateChatCompletion
+// @Summary Create a chat completion
+// @Description Generates a model response for the given chat conversation. If `stream` is true, the response is sent as a stream of events. If `stream` is false or omitted, a single JSON response is returned.
+// @Tags Chat
+// @Security BearerAuth
+// @Accept json
+// @Produce json
+// @Produce text/event-stream
+// @Param request body ExtendedChatCompletionRequest true "Extended chat completion request payload"
+// @Success 200 {object} CompletionResponse "Successful non-streaming response"
+// @Success 200 {string} string "Successful streaming response (SSE format, event: 'data', data: JSON object per chunk)"
+// @Failure 400 {object} responses.ErrorResponse "Invalid request payload"
+// @Failure 401 {object} responses.ErrorResponse "Unauthorized"
+// @Failure 500 {object} responses.ErrorResponse "Internal server error"
+// @Router /v1/chat/completions [post]
+func (api *CompletionAPI) PostCompletion(reqCtx *gin.Context) {
+	var request ExtendedChatCompletionRequest
+	if err := reqCtx.ShouldBindJSON(&request); err != nil {
+		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
+			Code:  "cf237451-8932-48d1-9cf6-42c4db2d4805",
+			Error: err.Error(),
+		})
+		return
+	}
+
+	// Get user ID for saving messages
+	user, ok := auth.GetUserFromContext(reqCtx)
+	if !ok {
+		reqCtx.AbortWithStatusJSON(http.StatusNotFound, responses.ErrorResponse{
+			Code:  "0199506b-314d-70e2-a8aa-d5fde1569d1d",
+			Error: "user not found",
+		})
+		return
+	}
+	// TODO: Implement admin API key check
+
+	// Handle conversation management
+	conv, conversationCreated, convErr := api.handleConversationManagement(reqCtx, request.Conversation, request.Messages)
+	if convErr != nil {
+		// Conversation doesn't exist, return error
+		reqCtx.AbortWithStatusJSON(http.StatusNotFound, responses.ErrorResponse{
+			Code:  convErr.GetCode(),
+			Error: convErr.GetMessage(),
+		})
+		return
+	}
+
+	// Always send conversation metadata event for streaming requests
+	if request.Stream {
+		api.sendConversationMetadata(reqCtx, conv, conversationCreated)
+	}
+
+	// Handle streaming vs non-streaming requests
+	if request.Stream {
+		err := api.completionStreamHandler.StreamCompletion(reqCtx, "", request.ChatCompletionRequest, conv, user)
+		if err != nil {
+			// Check if context was cancelled (timeout)
+			if reqCtx.Request.Context().Err() == context.DeadlineExceeded {
+				reqCtx.AbortWithStatusJSON(
+					http.StatusRequestTimeout,
+					responses.ErrorResponse{
+						Code: "a1b2c3d4-e5f6-7890-abcd-ef1234567890",
+					})
+			} else if reqCtx.Request.Context().Err() == context.Canceled {
+				reqCtx.AbortWithStatusJSON(
+					http.StatusRequestTimeout,
+					responses.ErrorResponse{
+						Code: "b2c3d4e5-f6g7-8901-bcde-f23456789012",
+					})
+			} else {
+				reqCtx.AbortWithStatusJSON(
+					http.StatusBadRequest,
+					responses.ErrorResponse{
+						Code:  err.GetCode(),
+						Error: err.GetMessage(),
+					})
+			}
+			return
+		}
+		return
+	} else {
+
+		response, err := api.completionNonStreamHandler.CreateCompletion(reqCtx.Request.Context(), "", request.ChatCompletionRequest)
+		if err != nil {
+			reqCtx.AbortWithStatusJSON(
+				http.StatusBadRequest,
+				responses.ErrorResponse{
+					Code:  err.GetCode(),
+					Error: err.GetMessage(),
+				})
+			return
+		}
+
+		// Save messages to conversation and get the assistant message item
+		var latestMessage []openai.ChatCompletionMessage
+		if len(request.Messages) > 0 {
+			latestMessage = []openai.ChatCompletionMessage{request.Messages[len(request.Messages)-1]}
+		}
+		assistantItem, _ := api.completionNonStreamHandler.SaveMessagesToConversationWithAssistant(reqCtx.Request.Context(), conv, user.ID, latestMessage, response.Choices[0].Message.Content)
+
+		// Modify response to include item ID and metadata
+		modifiedResponse := api.completionNonStreamHandler.ModifyCompletionResponse(response, conv, conversationCreated, assistantItem)
+		reqCtx.JSON(http.StatusOK, modifiedResponse)
+		return
+	}
+}
+
+// handleConversationManagement handles conversation loading or creation and returns conversation, created flag, and error
+func (api *CompletionAPI) handleConversationManagement(reqCtx *gin.Context, conversationID string, messages []openai.ChatCompletionMessage) (*conversation.Conversation, bool, *common.Error) {
+	if conversationID != "" {
+		// Try to load existing conversation
+		conv, convErr := api.loadConversation(reqCtx, conversationID)
+		if convErr != nil {
+			return nil, false, convErr
+		}
+		return conv, false, nil
+	} else {
+		// Create new conversation
+		conv, conversationCreated := api.createNewConversation(reqCtx, messages)
+		return conv, conversationCreated, nil
+	}
+}
+
+// loadConversation loads an existing conversation by ID
+func (api *CompletionAPI) loadConversation(reqCtx *gin.Context, conversationID string) (*conversation.Conversation, *common.Error) {
+	ctx := reqCtx.Request.Context()
+
+	// Get user from context (set by AppUserAuthMiddleware)
+	user, ok := auth.GetUserFromContext(reqCtx)
+	if !ok {
+		return nil, common.NewErrorWithMessage("User not authenticated", "c1d2e3f4-g5h6-7890-cdef-123456789012")
+	}
+
+	conv, convErr := api.conversationService.GetConversationByPublicIDAndUserID(ctx, conversationID, user.ID)
+	if convErr != nil {
+		return nil, common.NewErrorWithMessage(fmt.Sprintf("Conversation with ID '%s' not found", conversationID), "a1b2c3d4-e5f6-7890-abcd-ef1234567890")
+	}
+
+	if conv == nil {
+		return nil, common.NewErrorWithMessage(fmt.Sprintf("Conversation with ID '%s' not found", conversationID), "b2c3d4e5-f6g7-8901-bcde-f23456789012")
+	}
+
+	return conv, nil
+}
+
+// createNewConversation creates a new conversation
+func (api *CompletionAPI) createNewConversation(reqCtx *gin.Context, messages []openai.ChatCompletionMessage) (*conversation.Conversation, bool) {
+	ctx := reqCtx.Request.Context()
+
+	// Get user from context (set by AppUserAuthMiddleware)
+	user, ok := auth.GetUserFromContext(reqCtx)
+	if !ok {
+		// If no user context, return nil
+		return nil, false
+	}
+
+	title := api.generateTitleFromMessages(messages)
+	conv, convErr := api.conversationService.CreateConversation(ctx, user.ID, &title, true, map[string]string{
+		"model": "jan-v1-4b", // Default model
+	})
+	if convErr != nil {
+		// If creation fails, return nil
+		return nil, false
+	}
+
+	return conv, true // Created new conversation
+}
+
+// TODO should be generate from models, now we just use the first user message
+// generateTitleFromMessages creates a title from the first user message
+func (api *CompletionAPI) generateTitleFromMessages(messages []openai.ChatCompletionMessage) string {
+	if len(messages) == 0 {
+		return "New Conversation"
+	}
+
+	// Find the first user message
+	for _, msg := range messages {
+		if msg.Role == "user" && msg.Content != "" {
+			title := strings.TrimSpace(msg.Content)
+			if len(title) > 50 {
+				return title[:50] + "..."
+			}
+			return title
+		}
+	}
+
+	return "New Conversation"
+}
+
+// sendConversationMetadata sends conversation metadata as SSE event
+func (api *CompletionAPI) sendConversationMetadata(reqCtx *gin.Context, conv *conversation.Conversation, conversationCreated bool) {
+	if conv == nil {
+		return
+	}
+
+	metadata := map[string]any{
+		"object":               "chat.completion.metadata",
+		"conversation_id":      conv.PublicID,
+		"conversation_created": conversationCreated,
+		"conversation_title":   conv.Title,
+	}
+
+	jsonData, err := json.Marshal(metadata)
+	if err != nil {
+		return
+	}
+
+	reqCtx.Writer.Write([]byte(fmt.Sprintf("data: %s\n\n", string(jsonData))))
+	reqCtx.Writer.Flush()
+}
diff --git a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/chat/completion_streaming_handler.go b/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/chat/completion_streaming_handler.go
new file mode 100644
index 00000000..c4a12044
--- /dev/null
+++ b/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/chat/completion_streaming_handler.go
@@ -0,0 +1,285 @@
+package chat
+
+import (
+	"bufio"
+	"context"
+	"encoding/json"
+	"fmt"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/gin-gonic/gin"
+	openai "github.com/sashabaranov/go-openai"
+	"menlo.ai/jan-api-gateway/app/domain/common"
+	"menlo.ai/jan-api-gateway/app/domain/conversation"
+	"menlo.ai/jan-api-gateway/app/domain/inference"
+	"menlo.ai/jan-api-gateway/app/domain/user"
+)
+
+// Constants for streaming configuration
+const (
+	RequestTimeout    = 120 * time.Second
+	DataPrefix        = "data: "
+	DoneMarker        = "[DONE]"
+	ChannelBufferSize = 100
+	ErrorBufferSize   = 10
+)
+
+// CompletionStreamHandler handles streaming chat completions
+type CompletionStreamHandler struct {
+	inferenceProvider   inference.InferenceProvider
+	conversationService *conversation.ConversationService
+}
+
+// NewCompletionStreamHandler creates a new CompletionStreamHandler
+func NewCompletionStreamHandler(inferenceProvider inference.InferenceProvider, conversationService *conversation.ConversationService) *CompletionStreamHandler {
+	return &CompletionStreamHandler{
+		inferenceProvider:   inferenceProvider,
+		conversationService: conversationService,
+	}
+}
+
+// StreamCompletion handles streaming chat completion using buffered channels
+func (s *CompletionStreamHandler) StreamCompletion(reqCtx *gin.Context, apiKey string, request openai.ChatCompletionRequest, conv *conversation.Conversation, user *user.User) *common.Error {
+	// Add timeout context
+	ctx, cancel := context.WithTimeout(reqCtx.Request.Context(), RequestTimeout)
+	defer cancel()
+
+	// Use ctx for long-running operations
+	reqCtx.Request = reqCtx.Request.WithContext(ctx)
+
+	// Set up streaming headers
+	reqCtx.Header("Content-Type", "text/event-stream")
+	reqCtx.Header("Cache-Control", "no-cache")
+	reqCtx.Header("Connection", "keep-alive")
+	reqCtx.Header("Access-Control-Allow-Origin", "*")
+	reqCtx.Header("Access-Control-Allow-Headers", "Cache-Control")
+
+	// Create buffered channels for data and errors
+	dataChan := make(chan string, ChannelBufferSize)
+	errChan := make(chan error, ErrorBufferSize)
+
+	var wg sync.WaitGroup
+	wg.Add(1)
+
+	// Start streaming in a goroutine
+	go s.streamResponseToChannel(reqCtx, request, dataChan, errChan, conv, user, &wg)
+
+	// Wait for streaming to complete and close channels
+	go func() {
+		wg.Wait()
+		close(dataChan)
+		close(errChan)
+	}()
+
+	// Process data and errors from channels
+	for {
+		select {
+		case line, ok := <-dataChan:
+			if !ok {
+				return nil
+			}
+			_, err := reqCtx.Writer.Write([]byte(line))
+			if err != nil {
+				return common.NewError(err, "bc82d69c-685b-4556-9d1f-2a4a80ae8ca4")
+			}
+			reqCtx.Writer.Flush()
+		case err := <-errChan:
+			if err != nil {
+				return common.NewError(err, "bc82d69c-685b-4556-9d1f-2a4a80ae8ca4")
+			}
+		}
+	}
+}
+
+// streamResponseToChannel handles streaming and sends data to channels
+func (s *CompletionStreamHandler) streamResponseToChannel(reqCtx *gin.Context, request openai.ChatCompletionRequest, dataChan chan<- string, errChan chan<- error, conv *conversation.Conversation, user *user.User, wg *sync.WaitGroup) {
+	defer wg.Done()
+
+	// Save input messages to conversation first
+	if conv != nil {
+		// Save messages to conversation and get the assistant message item
+		var latestMessage []openai.ChatCompletionMessage
+		if len(request.Messages) > 0 {
+			latestMessage = []openai.ChatCompletionMessage{request.Messages[len(request.Messages)-1]}
+		}
+		s.saveInputMessagesToConversation(reqCtx.Request.Context(), conv, user.ID, latestMessage)
+	}
+
+	// Get streaming reader from inference provider
+	reader, err := s.inferenceProvider.CreateCompletionStream(reqCtx.Request.Context(), "", request)
+	if err != nil {
+		errChan <- err
+		return
+	}
+	defer reader.Close()
+
+	// Variables to collect full response for conversation saving
+	var fullResponse string
+
+	// Process the stream line by line
+	scanner := bufio.NewScanner(reader)
+	for scanner.Scan() {
+		// Check if context was cancelled
+		if s.checkContextCancellation(reqCtx.Request.Context(), errChan) {
+			return
+		}
+
+		line := scanner.Text()
+		if data, found := strings.CutPrefix(line, DataPrefix); found {
+			if data == DoneMarker {
+				break
+			}
+
+			// Process stream chunk and send to data channel
+			processedData, contentChunk := s.processStreamChunkForChannel(data)
+			dataChan <- processedData
+
+			// Collect content for conversation saving
+			if contentChunk != "" {
+				fullResponse += contentChunk
+			}
+		}
+	}
+
+	if err := scanner.Err(); err != nil {
+		errChan <- err
+		return
+	}
+
+	// Save the complete assistant message to conversation if we have content
+	if conv != nil && fullResponse != "" {
+		assistantItemID := s.generateAssistantItemID()
+		s.saveAssistantMessageToConversation(reqCtx.Request.Context(), conv, user, assistantItemID, fullResponse)
+	}
+}
+
+// processStreamChunkForChannel processes a single stream chunk and returns formatted data and content
+func (s *CompletionStreamHandler) processStreamChunkForChannel(data string) (string, string) {
+	// Parse the JSON data to extract content
+	var streamData struct {
+		Choices []struct {
+			Delta struct {
+				Content          string `json:"content"`
+				ReasoningContent string `json:"reasoning_content"`
+				ToolCalls        []struct {
+					Function struct {
+						Arguments string `json:"arguments"`
+					} `json:"function"`
+				} `json:"tool_calls"`
+			} `json:"delta"`
+		} `json:"choices"`
+	}
+
+	if err := json.Unmarshal([]byte(data), &streamData); err != nil {
+		// If JSON parsing fails, still send raw data but with empty content
+		return fmt.Sprintf("data: %s\n\n", data), ""
+	}
+
+	// Extract content from all choices
+	var contentChunk string
+	for _, choice := range streamData.Choices {
+		// Check for regular content
+		if choice.Delta.Content != "" {
+			contentChunk += choice.Delta.Content
+		}
+
+		// Check for reasoning content (internal reasoning, don't save to conversation)
+		// Note: reasoning_content is not saved to conversation
+
+		// Check for tool calls
+		// Note: tool_calls are logged for debugging but not processed here
+	}
+
+	// Return formatted data and extracted content
+	return fmt.Sprintf("data: %s\n\n", data), contentChunk
+}
+
+// checkContextCancellation checks if context was cancelled and sends error to channel
+func (s *CompletionStreamHandler) checkContextCancellation(ctx context.Context, errChan chan<- error) bool {
+	select {
+	case <-ctx.Done():
+		errChan <- ctx.Err()
+		return true
+	default:
+		return false
+	}
+}
+
+// generateAssistantItemID generates a unique ID for the assistant message item
+func (s *CompletionStreamHandler) generateAssistantItemID() string {
+	// For now, use a simple UUID-like string
+	// TODO: Use conversation service's ID generation when method is made public
+	return fmt.Sprintf("msg_%d", time.Now().UnixNano())
+}
+
+// saveInputMessagesToConversation saves input messages to the conversation
+func (s *CompletionStreamHandler) saveInputMessagesToConversation(ctx context.Context, conv *conversation.Conversation, userID uint, messages []openai.ChatCompletionMessage) {
+	if conv == nil {
+		return
+	}
+
+	// Convert OpenAI messages to conversation items
+	for _, msg := range messages {
+		// Convert role
+		var role conversation.ItemRole
+		switch msg.Role {
+		case openai.ChatMessageRoleSystem:
+			role = conversation.ItemRoleSystem
+		case openai.ChatMessageRoleUser:
+			role = conversation.ItemRoleUser
+		case openai.ChatMessageRoleAssistant:
+			role = conversation.ItemRoleAssistant
+		default:
+			role = conversation.ItemRoleUser
+		}
+
+		// Convert content
+		content := make([]conversation.Content, 0, len(msg.MultiContent))
+		for _, contentPart := range msg.MultiContent {
+			if contentPart.Type == openai.ChatMessagePartTypeText {
+				content = append(content, conversation.Content{
+					Type: "text",
+					Text: &conversation.Text{
+						Value: contentPart.Text,
+					},
+				})
+			}
+		}
+
+		// If no multi-content, use simple text content
+		if len(content) == 0 && msg.Content != "" {
+			content = append(content, conversation.Content{
+				Type: "text",
+				Text: &conversation.Text{
+					Value: msg.Content,
+				},
+			})
+		}
+
+		// Add item to conversation
+		s.conversationService.AddItem(ctx, conv, userID, conversation.ItemTypeMessage, &role, content)
+	}
+}
+
+// saveAssistantMessageToConversation saves the complete assistant message to the conversation
+func (s *CompletionStreamHandler) saveAssistantMessageToConversation(ctx context.Context, conv *conversation.Conversation, user *user.User, itemID string, content string) {
+	if conv == nil || content == "" {
+		return
+	}
+
+	// Create content structure
+	conversationContent := []conversation.Content{
+		{
+			Type: "text",
+			Text: &conversation.Text{
+				Value: content,
+			},
+		},
+	}
+
+	// Add the assistant message to conversation
+	assistantRole := conversation.ItemRoleAssistant
+	s.conversationService.AddItem(ctx, conv, user.ID, conversation.ItemTypeMessage, &assistantRole, conversationContent)
+}
diff --git a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/chat/completions.go b/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/chat/completions.go
deleted file mode 100644
index 971a8be2..00000000
--- a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/chat/completions.go
+++ /dev/null
@@ -1,182 +0,0 @@
-package chat
-
-import (
-	"fmt"
-	"net/http"
-
-	"github.com/gin-gonic/gin"
-	openai "github.com/sashabaranov/go-openai"
-	"menlo.ai/jan-api-gateway/app/domain/apikey"
-	inferencemodelregistry "menlo.ai/jan-api-gateway/app/domain/inference_model_registry"
-	"menlo.ai/jan-api-gateway/app/interfaces/http/responses"
-	janinference "menlo.ai/jan-api-gateway/app/utils/httpclients/jan_inference"
-)
-
-type CompletionAPI struct {
-	apikeyService *apikey.ApiKeyService
-}
-
-func NewCompletionAPI(apikeyService *apikey.ApiKeyService) *CompletionAPI {
-	return &CompletionAPI{
-		apikeyService,
-	}
-}
-
-func (completionAPI *CompletionAPI) RegisterRouter(router *gin.RouterGroup) {
-	router.POST("/completions", completionAPI.PostCompletion)
-}
-
-// ChatCompletionResponseSwagger is a doc-only version without http.Header
-type ChatCompletionResponseSwagger struct {
-	ID      string                        `json:"id"`
-	Object  string                        `json:"object"`
-	Created int64                         `json:"created"`
-	Model   string                        `json:"model"`
-	Choices []openai.ChatCompletionChoice `json:"choices"`
-	Usage   openai.Usage                  `json:"usage"`
-}
-
-// CreateChatCompletion
-// @Summary Create a chat completion
-// @Description Generates a model response for the given chat conversation.
-// @Tags Chat
-// @Security BearerAuth
-// @Accept json
-// @Produce json
-// @Param request body openai.ChatCompletionRequest true "Chat completion request payload"
-// @Success 200 {object} ChatCompletionResponseSwagger "Successful response"
-// @Failure 400 {object} responses.ErrorResponse "Invalid request payload"
-// @Failure 401 {object} responses.ErrorResponse "Unauthorized"
-// @Failure 500 {object} responses.ErrorResponse "Internal server error"
-// @Router /v1/chat/completions [post]
-func (api *CompletionAPI) PostCompletion(reqCtx *gin.Context) {
-	var request openai.ChatCompletionRequest
-	if err := reqCtx.ShouldBindJSON(&request); err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code:  "cf237451-8932-48d1-9cf6-42c4db2d4805",
-			Error: err.Error(),
-		})
-		return
-	}
-
-	key := ""
-	// if environment_variables.EnvironmentVariables.ENABLE_ADMIN_API {
-	// 	key, ok := requests.GetTokenFromBearer(reqCtx)
-	// 	if !ok {
-	// 		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-	// 			Code:  "4284adb3-7af4-428b-8064-7073cb9ca2ca",
-	// 			Error: "invalid apikey",
-	// 		})
-	// 		return
-	// 	}
-	// 	hashed := api.apikeyService.HashKey(reqCtx, key)
-	// 	apikeyEntity, err := api.apikeyService.FindByKeyHash(reqCtx, hashed)
-	// 	if err != nil {
-	// 		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-	// 			Code:  "d14ab75b-586b-4b55-ba65-e520a76d6559",
-	// 			Error: "invalid apikey",
-	// 		})
-	// 		return
-	// 	}
-	// 	if !apikeyEntity.Enabled {
-	// 		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-	// 			Code:  "42bd6104-28a1-45bd-a164-8e32d12b0378",
-	// 			Error: "invalid apikey",
-	// 		})
-	// 		return
-	// 	}
-	// 	if apikeyEntity.ExpiresAt != nil && apikeyEntity.ExpiresAt.Before(time.Now()) {
-	// 		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-	// 			Code:  "f8f2733d-c76f-40e4-95b1-584a5d054225",
-	// 			Error: "apikey expired",
-	// 		})
-	// 		return
-	// 	}
-	// }
-
-	modelRegistry := inferencemodelregistry.GetInstance()
-	mToE := modelRegistry.GetModelToEndpoints()
-	endpoints, ok := mToE[request.Model]
-	if !ok {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code:  "59253517-df33-44bf-9333-c927402e4e2e",
-			Error: fmt.Sprintf("Model: %s does not exist", request.Model),
-		})
-		return
-	}
-
-	janInferenceClient := janinference.NewJanInferenceClient(reqCtx)
-	for _, endpoint := range endpoints {
-		if endpoint == janInferenceClient.BaseURL {
-			if request.Stream {
-				err := janInferenceClient.CreateChatCompletionStream(reqCtx, key, request)
-				if err != nil {
-					reqCtx.AbortWithStatusJSON(
-						http.StatusBadRequest,
-						responses.ErrorResponse{
-							Code:  "c3af973c-eada-4e8b-96d9-e92546588cd3",
-							Error: err.Error(),
-						})
-					return
-				}
-				return
-			} else {
-				response, err := janInferenceClient.CreateChatCompletion(reqCtx.Request.Context(), key, request)
-				if err != nil {
-					reqCtx.AbortWithStatusJSON(
-						http.StatusBadRequest,
-						responses.ErrorResponse{
-							Code:  "bc82d69c-685b-4556-9d1f-2a4a80ae8ca4",
-							Error: err.Error(),
-						})
-					return
-				}
-				reqCtx.JSON(http.StatusOK, response)
-				return
-			}
-		}
-	}
-
-	reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-		Code:  "6c6e4ea0-53d2-4c6c-8617-3a645af59f43",
-		Error: "Client does not exist",
-	})
-}
-
-type Message struct {
-	Role    string `json:"role"`
-	Content string `json:"content"`
-}
-
-type PostChatCompletionRequest struct {
-	Model       string    `json:"model"`
-	Messages    []Message `json:"messages"`
-	Temperature float32   `json:"temperature"`
-	MaxTokens   int       `json:"max_tokens"`
-}
-
-type ResponseMessage struct {
-	Role    string `json:"role"`
-	Content string `json:"content"`
-}
-
-type Choice struct {
-	Index        int             `json:"index"`
-	Message      ResponseMessage `json:"message"`
-	FinishReason string          `json:"finish_reason"`
-}
-
-type Usage struct {
-	PromptTokens     int `json:"prompt_tokens"`
-	CompletionTokens int `json:"completion_tokens"`
-	TotalTokens      int `json:"total_tokens"`
-}
-
-type PostChatCompletionResponse struct {
-	ID      string   `json:"id"`
-	Object  string   `json:"object"`
-	Created int64    `json:"created"`
-	Model   string   `json:"model"`
-	Choices []Choice `json:"choices"`
-	Usage   Usage    `json:"usage"`
-}
diff --git a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/conversations/conversations_route.go b/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/conversations/conversations_route.go
index 31f6609c..19bad2a2 100644
--- a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/conversations/conversations_route.go
+++ b/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/conversations/conversations_route.go
@@ -21,6 +21,99 @@ type ConversationAPI struct {
 	authService         *auth.AuthService
 }
 
+// Request structs
+type CreateConversationRequest struct {
+	Title    string                    `json:"title"`
+	Metadata map[string]string         `json:"metadata,omitempty"`
+	Items    []ConversationItemRequest `json:"items,omitempty"`
+}
+
+type UpdateConversationRequest struct {
+	Title    *string            `json:"title"`
+	Metadata *map[string]string `json:"metadata"`
+}
+
+type ConversationItemRequest struct {
+	Type    string                       `json:"type" binding:"required"`
+	Role    conversation.ItemRole        `json:"role,omitempty"`
+	Content []ConversationContentRequest `json:"content" binding:"required"`
+}
+
+type ConversationContentRequest struct {
+	Type string `json:"type" binding:"required"`
+	Text string `json:"text,omitempty"`
+}
+
+type CreateItemsRequest struct {
+	Items []ConversationItemRequest `json:"items" binding:"required"`
+}
+
+// Response structs
+type ExtendedConversationResponse struct {
+	ID        string            `json:"id"`
+	Title     string            `json:"title"`
+	Object    string            `json:"object"`
+	CreatedAt int64             `json:"created_at"`
+	Metadata  map[string]string `json:"metadata"`
+}
+
+type DeletedConversationResponse struct {
+	ID      string `json:"id"`
+	Object  string `json:"object"`
+	Deleted bool   `json:"deleted"`
+}
+
+type ConversationItemResponse struct {
+	ID        string            `json:"id"`
+	Object    string            `json:"object"`
+	Type      string            `json:"type"`
+	Role      *string           `json:"role,omitempty"`
+	Status    *string           `json:"status,omitempty"`
+	CreatedAt int64             `json:"created_at"`
+	Content   []ContentResponse `json:"content,omitempty"`
+}
+
+type ContentResponse struct {
+	Type       string                `json:"type"`
+	Text       *TextResponse         `json:"text,omitempty"`
+	InputText  *string               `json:"input_text,omitempty"`
+	OutputText *OutputTextResponse   `json:"output_text,omitempty"`
+	Image      *ImageContentResponse `json:"image,omitempty"`
+	File       *FileContentResponse  `json:"file,omitempty"`
+}
+
+type TextResponse struct {
+	Value string `json:"value"`
+}
+
+type OutputTextResponse struct {
+	Text        string               `json:"text"`
+	Annotations []AnnotationResponse `json:"annotations"`
+}
+
+type ImageContentResponse struct {
+	URL    string `json:"url,omitempty"`
+	FileID string `json:"file_id,omitempty"`
+	Detail string `json:"detail,omitempty"`
+}
+
+type FileContentResponse struct {
+	FileID   string `json:"file_id"`
+	Name     string `json:"name,omitempty"`
+	MimeType string `json:"mime_type,omitempty"`
+	Size     int64  `json:"size,omitempty"`
+}
+
+type AnnotationResponse struct {
+	Type       string `json:"type"`
+	Text       string `json:"text,omitempty"`
+	FileID     string `json:"file_id,omitempty"`
+	URL        string `json:"url,omitempty"`
+	StartIndex int    `json:"start_index"`
+	EndIndex   int    `json:"end_index"`
+	Index      int    `json:"index,omitempty"`
+}
+
 // NewConversationAPI creates a new conversation API instance
 func NewConversationAPI(
 	conversationService *conversation.ConversationService,
@@ -38,16 +131,15 @@ func (api *ConversationAPI) RegisterRouter(router *gin.RouterGroup) {
 		api.authService.RegisteredUserMiddleware(),
 	)
 
-	// OpenAI-compatible endpoints with Swagger documentation
-	conversationsRouter.POST("", api.createConversation)
-	conversationsRouter.GET("", api.listConversations)
+	conversationsRouter.POST("", api.CreateConversationHandler)
+	conversationsRouter.GET("", api.ListConversationsHandler)
 
 	conversationMiddleWare := api.conversationService.GetConversationMiddleWare()
-	conversationsRouter.GET(fmt.Sprintf("/:%s", conversation.ConversationContextKeyPublicID), conversationMiddleWare, api.getConversation)
-	conversationsRouter.PATCH(fmt.Sprintf("/:%s", conversation.ConversationContextKeyPublicID), conversationMiddleWare, api.updateConversation)
-	conversationsRouter.DELETE(fmt.Sprintf("/:%s", conversation.ConversationContextKeyPublicID), conversationMiddleWare, api.deleteConversation)
-	conversationsRouter.POST(fmt.Sprintf("/:%s/items", conversation.ConversationContextKeyPublicID), conversationMiddleWare, api.createItems)
-	conversationsRouter.GET(fmt.Sprintf("/:%s/items", conversation.ConversationContextKeyPublicID), conversationMiddleWare, api.listItems)
+	conversationsRouter.GET(fmt.Sprintf("/:%s", conversation.ConversationContextKeyPublicID), conversationMiddleWare, api.GetConversationHandler)
+	conversationsRouter.PATCH(fmt.Sprintf("/:%s", conversation.ConversationContextKeyPublicID), conversationMiddleWare, api.UpdateConversationHandler)
+	conversationsRouter.DELETE(fmt.Sprintf("/:%s", conversation.ConversationContextKeyPublicID), conversationMiddleWare, api.DeleteConversationHandler)
+	conversationsRouter.POST(fmt.Sprintf("/:%s/items", conversation.ConversationContextKeyPublicID), conversationMiddleWare, api.CreateItemsHandler)
+	conversationsRouter.GET(fmt.Sprintf("/:%s/items", conversation.ConversationContextKeyPublicID), conversationMiddleWare, api.ListItemsHandler)
 
 	conversationItemMiddleWare := api.conversationService.GetConversationItemMiddleWare()
 	conversationsRouter.GET(
@@ -58,7 +150,7 @@ func (api *ConversationAPI) RegisterRouter(router *gin.RouterGroup) {
 		),
 		conversationMiddleWare,
 		conversationItemMiddleWare,
-		api.getItem,
+		api.GetItemHandler,
 	)
 	conversationsRouter.DELETE(
 		fmt.Sprintf(
@@ -68,34 +160,34 @@ func (api *ConversationAPI) RegisterRouter(router *gin.RouterGroup) {
 		),
 		conversationMiddleWare,
 		conversationItemMiddleWare,
-		api.deleteItem,
+		api.DeleteItemHandler,
 	)
 }
 
-// ListConversations
 // @Summary List Conversations
-// @Description Retrieves a paginated list of conversations for the authenticated user.
+// @Description Retrieves a paginated list of conversations for the authenticated user with OpenAI-compatible response format.
 // @Tags Conversations
 // @Security BearerAuth
 // @Param limit query int false "The maximum number of items to return" default(20)
 // @Param after query string false "A cursor for use in pagination. The ID of the last object from the previous page"
 // @Param order query string false "Order of items (asc/desc)"
-// @Success 200 {object} openai.ListResponse[ConversationResponse] "Successfully retrieved the list of conversations"
+// @Success 200 {object} openai.ListResponse[ExtendedConversationResponse] "Successfully retrieved the list of conversations"
 // @Failure 400 {object} responses.ErrorResponse "Bad Request - Invalid pagination parameters"
 // @Failure 401 {object} responses.ErrorResponse "Unauthorized - invalid or missing API key"
 // @Failure 500 {object} responses.ErrorResponse "Internal Server Error"
 // @Router /v1/conversations [get]
-func (api *ConversationAPI) listConversations(reqCtx *gin.Context) {
+func (api *ConversationAPI) ListConversationsHandler(reqCtx *gin.Context) {
 	ctx := reqCtx.Request.Context()
 	user, _ := auth.GetUserFromContext(reqCtx)
 	userID := user.ID
+
 	pagination, err := query.GetCursorPaginationFromQuery(reqCtx, func(lastID string) (*uint, error) {
-		convs, err := api.conversationService.FindConversationsByFilter(ctx, conversation.ConversationFilter{
+		convs, convErr := api.conversationService.FindConversationsByFilter(ctx, conversation.ConversationFilter{
 			UserID:   &userID,
 			PublicID: &lastID,
 		}, nil)
-		if err != nil {
-			return nil, err
+		if convErr != nil {
+			return nil, convErr
 		}
 		if len(convs) != 1 {
 			return nil, fmt.Errorf("invalid conversation")
@@ -104,8 +196,8 @@ func (api *ConversationAPI) listConversations(reqCtx *gin.Context) {
 	})
 	if err != nil {
 		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code:          "5f89e23d-d4a0-45ce-ba43-ae2a9be0ca64",
-			ErrorInstance: err,
+			Code:  "5f89e23d-d4a0-45ce-ba43-ae2a9be0ca64",
+			Error: "Invalid pagination parameters",
 		})
 		return
 	}
@@ -113,19 +205,19 @@ func (api *ConversationAPI) listConversations(reqCtx *gin.Context) {
 	filter := conversation.ConversationFilter{
 		UserID: &userID,
 	}
-	conversations, err := api.conversationService.FindConversationsByFilter(ctx, filter, pagination)
-	if err != nil {
+	conversations, convErr := api.conversationService.FindConversationsByFilter(ctx, filter, pagination)
+	if convErr != nil {
 		reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-			Code:          "ac74fc61-fd96-4d5b-a630-e7a8e1e46575",
-			ErrorInstance: err,
+			Code:          "019952d5-6876-7323-96fa-89784b7d082e",
+			ErrorInstance: convErr.GetError(),
 		})
 		return
 	}
-	count, err := api.conversationService.CountConversationsByFilter(ctx, filter)
-	if err != nil {
+	count, countErr := api.conversationService.CountConversationsByFilter(ctx, filter)
+	if countErr != nil {
 		reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-			Code:          "ae349271-d67e-4f76-a220-6945d802cbe2",
-			ErrorInstance: err,
+			Code:          "019952d5-1afc-7229-bb57-6928f54d5171",
+			ErrorInstance: convErr.GetError(),
 		})
 		return
 	}
@@ -135,15 +227,15 @@ func (api *ConversationAPI) listConversations(reqCtx *gin.Context) {
 	if len(conversations) > 0 {
 		firstId = &conversations[0].PublicID
 		lastId = &conversations[len(conversations)-1].PublicID
-		moreRecords, err := api.conversationService.FindConversationsByFilter(ctx, filter, &query.Pagination{
+		moreRecords, moreErr := api.conversationService.FindConversationsByFilter(ctx, filter, &query.Pagination{
 			Order: pagination.Order,
 			Limit: ptr.ToInt(1),
 			After: &conversations[len(conversations)-1].ID,
 		})
-		if err != nil {
+		if moreErr != nil {
 			reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-				Code:          "0b6b667c-aa25-4863-8494-a4ae2e5d12c4",
-				ErrorInstance: err,
+				Code:          "019952d5-983a-73a8-8439-290ae4b4ee51",
+				ErrorInstance: convErr.GetError(),
 			})
 			return
 		}
@@ -152,38 +244,33 @@ func (api *ConversationAPI) listConversations(reqCtx *gin.Context) {
 		}
 	}
 
-	reqCtx.JSON(http.StatusOK, openai.ListResponse[*ConversationResponse]{
-		Object:  openai.ObjectTypeListList,
+	result := functional.Map(conversations, domainToExtendedConversationResponse)
+
+	response := openai.ListResponse[*ExtendedConversationResponse]{
+		Object:  "list",
 		FirstID: firstId,
 		LastID:  lastId,
 		Total:   count,
 		HasMore: hasMore,
-		Data:    functional.Map(conversations, domainToConversationResponse),
-	})
-}
+		Data:    result,
+	}
 
-// ConversationResponse represents the response structure
-type ConversationResponse struct {
-	ID        string            `json:"id"`
-	Object    string            `json:"object"`
-	CreatedAt int64             `json:"created_at"`
-	Metadata  map[string]string `json:"metadata"`
+	reqCtx.JSON(http.StatusOK, response)
 }
 
-// createConversation handles conversation creation
 // @Summary Create a conversation
-// @Description Creates a new conversation for the authenticated user
+// @Description Creates a new conversation for the authenticated user with optional items
 // @Tags Conversations
 // @Security BearerAuth
 // @Accept json
 // @Produce json
 // @Param request body CreateConversationRequest true "Create conversation request"
-// @Success 200 {object} ConversationResponse "Created conversation"
-// @Failure 400 {object} responses.ErrorResponse "Invalid request"
+// @Success 200 {object} ExtendedConversationResponse "Created conversation"
+// @Failure 400 {object} responses.ErrorResponse "Invalid request - Bad payload, too many items, or invalid item format"
 // @Failure 401 {object} responses.ErrorResponse "Unauthorized"
 // @Failure 500 {object} responses.ErrorResponse "Internal server error"
 // @Router /v1/conversations [post]
-func (api *ConversationAPI) createConversation(reqCtx *gin.Context) {
+func (api *ConversationAPI) CreateConversationHandler(reqCtx *gin.Context) {
 	ctx := reqCtx.Request.Context()
 	user, _ := auth.GetUserFromContext(reqCtx)
 	userId := user.ID
@@ -199,7 +286,8 @@ func (api *ConversationAPI) createConversation(reqCtx *gin.Context) {
 
 	if len(request.Items) > 20 {
 		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code: "0e5b8426-b1d2-4114-ac81-d3982dc497cf",
+			Code:  "0e5b8426-b1d2-4114-ac81-d3982dc497cf",
+			Error: "Too many items",
 		})
 		return
 	}
@@ -210,17 +298,19 @@ func (api *ConversationAPI) createConversation(reqCtx *gin.Context) {
 		item, ok := NewItemFromConversationItemRequest(itemReq)
 		if !ok {
 			reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-				Code: "1fe8d03b-9e1e-4e52-b5b5-77a25954fc43",
+				Code:  "1fe8d03b-9e1e-4e52-b5b5-77a25954fc43",
+				Error: "Invalid item format",
 			})
 			return
 		}
 		itemsToCreate[i] = item
 	}
 
-	ok, errorCode := api.conversationService.ValidateItems(ctx, itemsToCreate)
-	if !ok {
+	err := api.conversationService.ValidateItems(ctx, itemsToCreate)
+	if err != nil {
 		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code: *errorCode,
+			Code:          "019952d0-1dc9-746e-82ff-dd42b1e7930f",
+			ErrorInstance: err.GetError(),
 		})
 		return
 	}
@@ -228,9 +318,9 @@ func (api *ConversationAPI) createConversation(reqCtx *gin.Context) {
 	// Create conversation
 	conv, err := api.conversationService.CreateConversation(ctx, userId, &request.Title, true, request.Metadata)
 	if err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-			Code:          "8fc529d7-f384-40f2-ac15-cd1f1e109316",
-			ErrorInstance: err,
+		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
+			Code:          "019952d0-3e32-76ba-a97f-711223df2c84",
+			ErrorInstance: err.GetError(),
 		})
 		return
 	}
@@ -239,70 +329,55 @@ func (api *ConversationAPI) createConversation(reqCtx *gin.Context) {
 	if len(request.Items) > 0 {
 		_, err := api.conversationService.AddMultipleItems(ctx, conv, userId, itemsToCreate)
 		if err != nil {
-			reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-				Code:          "8fc529d7-f384-40f2-ac15-cd1f1e109316",
-				ErrorInstance: err,
-			})
-			return
-		}
-
-		// Reload conversation with items
-		conv, err = api.conversationService.GetConversationByPublicIDAndUserID(ctx, conv.PublicID, userId)
-		if err != nil {
-			reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-				Code:          "8fc529d7-f384-40f2-ac15-cd1f1e109316",
-				ErrorInstance: err,
+			reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
+				Code:          "019952d0-6d70-7419-81ae-828d8009ee56",
+				ErrorInstance: err.GetError(),
 			})
 			return
 		}
 	}
 
-	reqCtx.JSON(http.StatusOK, domainToConversationResponse(conv))
+	response := domainToExtendedConversationResponse(conv)
+	reqCtx.JSON(http.StatusOK, response)
 }
 
-// getConversation handles conversation retrieval
 // @Summary Get a conversation
-// @Description Retrieves a conversation by its ID
+// @Description Retrieves a conversation by its ID with full metadata and title
 // @Tags Conversations
 // @Security BearerAuth
 // @Produce json
 // @Param conversation_id path string true "Conversation ID"
-// @Success 200 {object} ConversationResponse "Conversation details"
+// @Success 200 {object} ExtendedConversationResponse "Conversation details"
 // @Failure 401 {object} responses.ErrorResponse "Unauthorized"
 // @Failure 403 {object} responses.ErrorResponse "Access denied"
 // @Failure 404 {object} responses.ErrorResponse "Conversation not found"
 // @Failure 500 {object} responses.ErrorResponse "Internal server error"
 // @Router /v1/conversations/{conversation_id} [get]
-func (api *ConversationAPI) getConversation(reqCtx *gin.Context) {
+func (api *ConversationAPI) GetConversationHandler(reqCtx *gin.Context) {
 	conv, ok := conversation.GetConversationFromContext(reqCtx)
 	if !ok {
 		return
 	}
-	reqCtx.JSON(http.StatusOK, domainToConversationResponse(conv))
-}
-
-type UpdateConversationRequest struct {
-	Title    *string            `json:"title"`
-	Metadata *map[string]string `json:"metadata"`
+	response := domainToExtendedConversationResponse(conv)
+	reqCtx.JSON(http.StatusOK, response)
 }
 
-// updateConversation handles conversation updates
 // @Summary Update a conversation
-// @Description Updates conversation metadata
+// @Description Updates conversation title and/or metadata
 // @Tags Conversations
 // @Security BearerAuth
 // @Accept json
 // @Produce json
 // @Param conversation_id path string true "Conversation ID"
 // @Param request body UpdateConversationRequest true "Update conversation request"
-// @Success 200 {object} ConversationResponse "Updated conversation"
-// @Failure 400 {object} responses.ErrorResponse "Invalid request"
+// @Success 200 {object} ExtendedConversationResponse "Updated conversation"
+// @Failure 400 {object} responses.ErrorResponse "Invalid request payload or update failed"
 // @Failure 401 {object} responses.ErrorResponse "Unauthorized"
 // @Failure 403 {object} responses.ErrorResponse "Access denied"
 // @Failure 404 {object} responses.ErrorResponse "Conversation not found"
 // @Failure 500 {object} responses.ErrorResponse "Internal server error"
 // @Router /v1/conversations/{conversation_id} [patch]
-func (api *ConversationAPI) updateConversation(reqCtx *gin.Context) {
+func (api *ConversationAPI) UpdateConversationHandler(reqCtx *gin.Context) {
 	ctx := reqCtx.Request.Context()
 	conv, ok := conversation.GetConversationFromContext(reqCtx)
 	if !ok {
@@ -312,8 +387,8 @@ func (api *ConversationAPI) updateConversation(reqCtx *gin.Context) {
 	var request UpdateConversationRequest
 	if err := reqCtx.ShouldBindJSON(&request); err != nil {
 		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code:          "4183e285-08ef-4a79-8a68-d53cddd0c0e2",
-			ErrorInstance: err,
+			Code:  "4183e285-08ef-4a79-8a68-d53cddd0c0e2",
+			Error: "Invalid request payload",
 		})
 		return
 	}
@@ -328,25 +403,18 @@ func (api *ConversationAPI) updateConversation(reqCtx *gin.Context) {
 	conv, err := api.conversationService.UpdateConversation(ctx, conv)
 	if err != nil {
 		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code:          "3901c185-94fa-4bbc-97ef-6031939ba8c2",
+			Code:          "019952d0-a754-73bc-adbc-781ac31e12d7",
 			ErrorInstance: err,
 		})
 		return
 	}
 
-	reqCtx.JSON(http.StatusOK, domainToConversationResponse(conv))
+	response := domainToExtendedConversationResponse(conv)
+	reqCtx.JSON(http.StatusOK, response)
 }
 
-// DeletedConversationResponse represents the deleted conversation response
-type DeletedConversationResponse struct {
-	ID      string `json:"id"`
-	Object  string `json:"object"`
-	Deleted bool   `json:"deleted"`
-}
-
-// deleteConversation handles conversation deletion
 // @Summary Delete a conversation
-// @Description Deletes a conversation and all its items
+// @Description Deletes a conversation and all its items permanently
 // @Tags Conversations
 // @Security BearerAuth
 // @Produce json
@@ -357,96 +425,31 @@ type DeletedConversationResponse struct {
 // @Failure 404 {object} responses.ErrorResponse "Conversation not found"
 // @Failure 500 {object} responses.ErrorResponse "Internal server error"
 // @Router /v1/conversations/{conversation_id} [delete]
-func (api *ConversationAPI) deleteConversation(reqCtx *gin.Context) {
+func (api *ConversationAPI) DeleteConversationHandler(reqCtx *gin.Context) {
 	ctx := reqCtx.Request.Context()
 	conv, ok := conversation.GetConversationFromContext(reqCtx)
 	if !ok {
 		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code: "a4fb6e9b-00c8-423c-9836-a83080e34d28",
+			Code:  "a4fb6e9b-00c8-423c-9836-a83080e34d28",
+			Error: "Conversation not found",
 		})
 		return
 	}
-	err := api.conversationService.DeleteConversation(ctx, conv)
-	if err != nil {
+
+	success, err := api.conversationService.DeleteConversation(ctx, conv)
+	if !success {
 		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code:          "2d5345ba-a6db-441b-b52e-74cf358bdfcd",
-			ErrorInstance: err,
+			Code:          "019952c3-9836-75ea-9785-a8d035a7c136",
+			ErrorInstance: err.GetError(),
 		})
-		return
 	}
-	reqCtx.JSON(http.StatusOK, domainToDeletedConversationResponse(conv))
-}
-
-// ConversationItemResponse represents an item in the response
-type ConversationItemResponse struct {
-	ID        string            `json:"id"`
-	Object    string            `json:"object"`
-	Type      string            `json:"type"`
-	Role      *string           `json:"role,omitempty"`
-	Status    *string           `json:"status,omitempty"`
-	CreatedAt int64             `json:"created_at"`
-	Content   []ContentResponse `json:"content,omitempty"`
-}
-
-// ContentResponse represents content in the response
-type ContentResponse struct {
-	Type       string                `json:"type"`
-	Text       *TextResponse         `json:"text,omitempty"`
-	InputText  *string               `json:"input_text,omitempty"`
-	OutputText *OutputTextResponse   `json:"output_text,omitempty"`
-	Image      *ImageContentResponse `json:"image,omitempty"`
-	File       *FileContentResponse  `json:"file,omitempty"`
-}
-
-// TextResponse represents text content in the response
-type TextResponse struct {
-	Value string `json:"value"`
-}
-
-// OutputTextResponse represents AI output text with annotations
-type OutputTextResponse struct {
-	Text        string               `json:"text"`
-	Annotations []AnnotationResponse `json:"annotations"`
-}
-
-// ImageContentResponse represents image content
-type ImageContentResponse struct {
-	URL    string `json:"url,omitempty"`
-	FileID string `json:"file_id,omitempty"`
-	Detail string `json:"detail,omitempty"`
-}
-
-// FileContentResponse represents file content
-type FileContentResponse struct {
-	FileID   string `json:"file_id"`
-	Name     string `json:"name,omitempty"`
-	MimeType string `json:"mime_type,omitempty"`
-	Size     int64  `json:"size,omitempty"`
-}
+	response := domainToDeletedConversationResponse(conv)
 
-// AnnotationResponse represents annotation in the response
-type AnnotationResponse struct {
-	Type       string `json:"type"`
-	Text       string `json:"text,omitempty"`
-	FileID     string `json:"file_id,omitempty"`
-	URL        string `json:"url,omitempty"`
-	StartIndex int    `json:"start_index"`
-	EndIndex   int    `json:"end_index"`
-	Index      int    `json:"index,omitempty"`
-}
-
-// ConversationItemListResponse represents the response for item lists
-type ConversationItemListResponse struct {
-	Object  string                      `json:"object"`
-	Data    []*ConversationItemResponse `json:"data"`
-	HasMore bool                        `json:"has_more"`
-	FirstID *string                     `json:"first_id,omitempty"`
-	LastID  *string                     `json:"last_id,omitempty"`
+	reqCtx.JSON(http.StatusOK, response)
 }
 
-// createItems handles item creation
 // @Summary Create items in a conversation
-// @Description Adds multiple items to a conversation
+// @Description Adds multiple items to a conversation with OpenAI-compatible format
 // @Tags Conversations
 // @Security BearerAuth
 // @Accept json
@@ -454,55 +457,56 @@ type ConversationItemListResponse struct {
 // @Param conversation_id path string true "Conversation ID"
 // @Param request body CreateItemsRequest true "Create items request"
 // @Success 200 {object} openai.ListResponse[ConversationItemResponse] "Created items"
-// @Failure 400 {object} responses.ErrorResponse "Invalid request"
+// @Failure 400 {object} responses.ErrorResponse "Invalid request payload or invalid item format"
 // @Failure 401 {object} responses.ErrorResponse "Unauthorized"
 // @Failure 403 {object} responses.ErrorResponse "Access denied"
 // @Failure 404 {object} responses.ErrorResponse "Conversation not found"
 // @Failure 500 {object} responses.ErrorResponse "Internal server error"
 // @Router /v1/conversations/{conversation_id}/items [post]
-func (api *ConversationAPI) createItems(reqCtx *gin.Context) {
+func (api *ConversationAPI) CreateItemsHandler(reqCtx *gin.Context) {
 	ctx := reqCtx.Request.Context()
 	conv, _ := conversation.GetConversationFromContext(reqCtx)
 
 	var request CreateItemsRequest
 	if err := reqCtx.ShouldBindJSON(&request); err != nil {
 		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code:          "a4fb6e9b-00c8-423c-9836-a83080e34d28",
-			ErrorInstance: err,
+			Code:  "a4fb6e9b-00c8-423c-9836-a83080e34d28",
+			Error: "Invalid request payload",
 		})
 		return
 	}
+
 	itemsToCreate := make([]*conversation.Item, len(request.Items))
 	for i, itemReq := range request.Items {
 		item, ok := NewItemFromConversationItemRequest(itemReq)
 		if !ok {
 			reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-				Code: "a4fb6e9b-00c8-423c-9836-a83080e34d28",
+				Code:  "a4fb6e9b-00c8-423c-9836-a83080e34d28",
+				Error: "Invalid item format",
 			})
 			return
 		}
 		itemsToCreate[i] = item
 	}
 
-	ok, errorCode := api.conversationService.ValidateItems(ctx, itemsToCreate)
-	if !ok {
-		if errorCode == nil {
-			errorCode = ptr.ToString("41b80303-0e55-4a24-a079-d2d9340d713b")
-		}
+	err := api.conversationService.ValidateItems(ctx, itemsToCreate)
+	if err != nil {
 		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code: *errorCode,
+			Code:          "019952d1-265e-738b-bada-7918c32a61d2",
+			ErrorInstance: err.GetError(),
 		})
 		return
 	}
 
 	createdItems, err := api.conversationService.AddMultipleItems(ctx, conv, conv.UserID, itemsToCreate)
 	if err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-			Code:          *errorCode,
-			ErrorInstance: err,
+		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
+			Code:          "019952d1-68dc-73f3-84e3-0f53d8fce318",
+			ErrorInstance: err.GetError(),
 		})
 		return
 	}
+
 	var firstId *string
 	var lastId *string
 	if len(createdItems) > 0 {
@@ -510,33 +514,35 @@ func (api *ConversationAPI) createItems(reqCtx *gin.Context) {
 		lastId = &createdItems[len(createdItems)-1].PublicID
 	}
 
-	reqCtx.JSON(http.StatusOK, openai.ListResponse[*ConversationItemResponse]{
-		Object:  openai.ObjectTypeListList,
+	response := &openai.ListResponse[*ConversationItemResponse]{
+		Object:  "list",
 		Data:    functional.Map(createdItems, domainToConversationItemResponse),
 		FirstID: firstId,
 		LastID:  lastId,
 		HasMore: false,
 		Total:   int64(len(createdItems)),
-	})
+	}
+
+	reqCtx.JSON(http.StatusOK, response)
 }
 
-// listItems handles item listing with optional pagination
 // @Summary List items in a conversation
-// @Description Lists all items in a conversation
+// @Description Lists all items in a conversation with OpenAI-compatible pagination
 // @Tags Conversations
 // @Security BearerAuth
 // @Produce json
 // @Param conversation_id path string true "Conversation ID"
 // @Param limit query int false "Number of items to return (1-100)"
-// @Param cursor query string false "Cursor for pagination"
+// @Param after query string false "Cursor for pagination - ID of the last item from previous page"
 // @Param order query string false "Order of items (asc/desc)"
-// @Success 200 {object} ConversationItemListResponse "List of items"
+// @Success 200 {object} openai.ListResponse[ConversationItemResponse] "List of items"
+// @Failure 400 {object} responses.ErrorResponse "Bad Request - Invalid pagination parameters"
 // @Failure 401 {object} responses.ErrorResponse "Unauthorized"
 // @Failure 403 {object} responses.ErrorResponse "Access denied"
 // @Failure 404 {object} responses.ErrorResponse "Conversation not found"
 // @Failure 500 {object} responses.ErrorResponse "Internal server error"
 // @Router /v1/conversations/{conversation_id}/items [get]
-func (api *ConversationAPI) listItems(reqCtx *gin.Context) {
+func (api *ConversationAPI) ListItemsHandler(reqCtx *gin.Context) {
 	ctx := reqCtx.Request.Context()
 	conv, _ := conversation.GetConversationFromContext(reqCtx)
 
@@ -546,7 +552,7 @@ func (api *ConversationAPI) listItems(reqCtx *gin.Context) {
 			ConversationID: &conv.ID,
 		}, nil)
 		if err != nil {
-			return nil, err
+			return nil, fmt.Errorf("%s: %s", err.GetCode(), err.Error())
 		}
 		if len(items) != 1 {
 			return nil, fmt.Errorf("invalid conversation")
@@ -555,8 +561,8 @@ func (api *ConversationAPI) listItems(reqCtx *gin.Context) {
 	})
 	if err != nil {
 		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code:          "e9144b73-6fc1-4b16-b9c7-460d8a4ecf6b",
-			ErrorInstance: err,
+			Code:  "e9144b73-6fc1-4b16-b9c7-460d8a4ecf6b",
+			Error: "Invalid pagination parameters",
 		})
 		return
 	}
@@ -564,11 +570,11 @@ func (api *ConversationAPI) listItems(reqCtx *gin.Context) {
 	filter := conversation.ItemFilter{
 		ConversationID: &conv.ID,
 	}
-	itemEntities, err := api.conversationService.FindItemsByFilter(ctx, filter, pagination)
-	if err != nil {
+	itemEntities, filterErr := api.conversationService.FindItemsByFilter(ctx, filter, pagination)
+	if filterErr != nil {
 		reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-			Code:          "49530db0-0c1c-414c-a769-a7a4811dd650",
-			ErrorInstance: err,
+			Code:          "019952d1-a6d2-76ff-9c10-3e9264056f90",
+			ErrorInstance: filterErr.GetError(),
 		})
 		return
 	}
@@ -579,15 +585,15 @@ func (api *ConversationAPI) listItems(reqCtx *gin.Context) {
 	if len(itemEntities) > 0 {
 		firstId = &itemEntities[0].PublicID
 		lastId = &itemEntities[len(itemEntities)-1].PublicID
-		moreRecords, err := api.conversationService.FindItemsByFilter(ctx, filter, &query.Pagination{
+		moreRecords, moreErr := api.conversationService.FindItemsByFilter(ctx, filter, &query.Pagination{
 			Order: pagination.Order,
 			Limit: ptr.ToInt(1),
 			After: &itemEntities[len(itemEntities)-1].ID,
 		})
-		if err != nil {
+		if moreErr != nil {
 			reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-				Code:          "f3cefed4-6f86-4e26-9e74-e858601627ca",
-				ErrorInstance: err,
+				Code:          "019952d1-e914-7466-b527-49e498129426",
+				ErrorInstance: moreErr.GetError(),
 			})
 			return
 		}
@@ -596,19 +602,20 @@ func (api *ConversationAPI) listItems(reqCtx *gin.Context) {
 		}
 	}
 
-	reqCtx.JSON(http.StatusOK, openai.ListResponse[*ConversationItemResponse]{
-		Object:  openai.ObjectTypeListList,
+	response := &openai.ListResponse[*ConversationItemResponse]{
+		Object:  "list",
 		Data:    functional.Map(itemEntities, domainToConversationItemResponse),
 		FirstID: firstId,
 		LastID:  lastId,
 		HasMore: hasMore,
 		Total:   int64(len(itemEntities)),
-	})
+	}
+
+	reqCtx.JSON(http.StatusOK, response)
 }
 
-// getItem handles single item retrieval
 // @Summary Get an item from a conversation
-// @Description Retrieves a specific item from a conversation
+// @Description Retrieves a specific item from a conversation with full content details
 // @Tags Conversations
 // @Security BearerAuth
 // @Produce json
@@ -617,32 +624,34 @@ func (api *ConversationAPI) listItems(reqCtx *gin.Context) {
 // @Success 200 {object} ConversationItemResponse "Item details"
 // @Failure 401 {object} responses.ErrorResponse "Unauthorized"
 // @Failure 403 {object} responses.ErrorResponse "Access denied"
-// @Failure 404 {object} responses.ErrorResponse "Conversation not found"
+// @Failure 404 {object} responses.ErrorResponse "Conversation or item not found"
 // @Failure 500 {object} responses.ErrorResponse "Internal server error"
 // @Router /v1/conversations/{conversation_id}/items/{item_id} [get]
-func (api *ConversationAPI) getItem(reqCtx *gin.Context) {
+func (api *ConversationAPI) GetItemHandler(reqCtx *gin.Context) {
 	item, ok := conversation.GetConversationItemFromContext(reqCtx)
 	if !ok {
 		return
 	}
-	reqCtx.JSON(http.StatusOK, domainToConversationItemResponse(item))
+
+	response := domainToConversationItemResponse(item)
+	reqCtx.JSON(http.StatusOK, response)
 }
 
-// deleteItem handles item deletion
 // @Summary Delete an item from a conversation
-// @Description Deletes a specific item from a conversation
+// @Description Deletes a specific item from a conversation and returns the deleted item details
 // @Tags Conversations
 // @Security BearerAuth
 // @Produce json
 // @Param conversation_id path string true "Conversation ID"
 // @Param item_id path string true "Item ID"
-// @Success 200 {object} ConversationResponse "Updated conversation"
+// @Success 200 {object} ConversationItemResponse "Deleted item details"
+// @Failure 400 {object} responses.ErrorResponse "Bad Request - Deletion failed"
 // @Failure 401 {object} responses.ErrorResponse "Unauthorized"
 // @Failure 403 {object} responses.ErrorResponse "Access denied"
-// @Failure 404 {object} responses.ErrorResponse "Conversation not found"
+// @Failure 404 {object} responses.ErrorResponse "Conversation or item not found"
 // @Failure 500 {object} responses.ErrorResponse "Internal server error"
 // @Router /v1/conversations/{conversation_id}/items/{item_id} [delete]
-func (api *ConversationAPI) deleteItem(reqCtx *gin.Context) {
+func (api *ConversationAPI) DeleteItemHandler(reqCtx *gin.Context) {
 	ctx := reqCtx.Request.Context()
 	conv, ok := conversation.GetConversationFromContext(reqCtx)
 	if !ok {
@@ -662,23 +671,61 @@ func (api *ConversationAPI) deleteItem(reqCtx *gin.Context) {
 	// Use efficient deletion with item public ID instead of loading all items
 	itemDeleted, err := api.conversationService.DeleteItemWithConversation(ctx, conv, item)
 	if err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-			Code: "9c9cdf48-715b-44b9-9be1-6bb19e2401f8",
+		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
+			Code:          "019952d2-0f0f-730c-9abc-3fecc1db55c2",
+			ErrorInstance: err,
 		})
 		return
 	}
-	reqCtx.JSON(http.StatusOK, domainToConversationItemResponse(itemDeleted))
+
+	response := domainToConversationItemResponse(itemDeleted)
+	reqCtx.JSON(http.StatusOK, response)
+}
+
+func NewItemFromConversationItemRequest(itemReq ConversationItemRequest) (*conversation.Item, bool) {
+	ok := conversation.ValidateItemType(string(itemReq.Type))
+	if !ok {
+		return nil, false
+	}
+	itemType := conversation.ItemType(itemReq.Type)
+
+	var role *conversation.ItemRole
+	if itemReq.Role != "" {
+		ok := conversation.ValidateItemRole(string(itemReq.Role))
+		if !ok {
+			return nil, false
+		}
+		r := conversation.ItemRole(itemReq.Role)
+		role = &r
+	}
+
+	content := make([]conversation.Content, len(itemReq.Content))
+	for j, c := range itemReq.Content {
+		content[j] = conversation.Content{
+			Type: c.Type,
+			Text: &conversation.Text{
+				Value: c.Text,
+			},
+		}
+	}
+
+	return &conversation.Item{
+		Type:    itemType,
+		Role:    role,
+		Content: content,
+	}, true
 }
 
-func domainToConversationResponse(entity *conversation.Conversation) *ConversationResponse {
+func domainToExtendedConversationResponse(entity *conversation.Conversation) *ExtendedConversationResponse {
 	metadata := entity.Metadata
 	if metadata == nil {
 		metadata = make(map[string]string)
 	}
-	return &ConversationResponse{
+	return &ExtendedConversationResponse{
 		ID:        entity.PublicID,
 		Object:    "conversation",
-		CreatedAt: entity.CreatedAt,
+		Title:     ptr.FromString(entity.Title),
+		CreatedAt: entity.CreatedAt.Unix(),
 		Metadata:  metadata,
 	}
 }
@@ -696,8 +743,8 @@ func domainToConversationItemResponse(entity *conversation.Item) *ConversationIt
 		ID:        entity.PublicID,
 		Object:    "conversation.item",
 		Type:      string(entity.Type),
-		Status:    entity.Status,
-		CreatedAt: entity.CreatedAt,
+		Status:    conversation.ItemStatusToStringPtr(entity.Status),
+		CreatedAt: entity.CreatedAt.Unix(),
 		Content:   domainToContentResponse(entity.Content),
 	}
 
@@ -782,66 +829,3 @@ func domainToAnnotationResponse(annotations []conversation.Annotation) []Annotat
 	}
 	return result
 }
-
-// AuthenticatedUser represents an authenticated user context
-type AuthenticatedUser struct {
-	ID uint
-}
-
-// CreateConversationRequest represents the input for creating a conversation
-type CreateConversationRequest struct {
-	Title    string                    `json:"title"`
-	Metadata map[string]string         `json:"metadata,omitempty"`
-	Items    []ConversationItemRequest `json:"items,omitempty"`
-}
-
-// ConversationItemRequest represents an item in the conversation request
-type ConversationItemRequest struct {
-	Type    string                       `json:"type" binding:"required"`
-	Role    conversation.ItemRole        `json:"role,omitempty"`
-	Content []ConversationContentRequest `json:"content" binding:"required"`
-}
-
-// ConversationContentRequest represents content in the request
-type ConversationContentRequest struct {
-	Type string `json:"type" binding:"required"`
-	Text string `json:"text,omitempty"`
-}
-
-func NewItemFromConversationItemRequest(itemReq ConversationItemRequest) (*conversation.Item, bool) {
-	ok := conversation.ValidateItemType(string(itemReq.Type))
-	if !ok {
-		return nil, false
-	}
-	itemType := conversation.ItemType(itemReq.Type)
-
-	var role *conversation.ItemRole
-	if itemReq.Role != "" {
-		ok := conversation.ValidateItemRole(string(itemReq.Role))
-		if !ok {
-			return nil, false
-		}
-		r := conversation.ItemRole(itemReq.Role)
-		role = &r
-	}
-
-	content := make([]conversation.Content, len(itemReq.Content))
-	for j, c := range itemReq.Content {
-		content[j] = conversation.Content{
-			Type: c.Type,
-			Text: &conversation.Text{
-				Value: c.Text,
-			},
-		}
-	}
-
-	return &conversation.Item{
-		Type:    itemType,
-		Role:    role,
-		Content: content,
-	}, true
-}
-
-type CreateItemsRequest struct {
-	Items []ConversationItemRequest `json:"items" binding:"required"`
-}
diff --git a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/responses/response_route.go b/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/responses/response_route.go
new file mode 100644
index 00000000..e8ff1e31
--- /dev/null
+++ b/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/responses/response_route.go
@@ -0,0 +1,466 @@
+package responses
+
+import (
+	"fmt"
+	"net/http"
+
+	"github.com/gin-gonic/gin"
+	"menlo.ai/jan-api-gateway/app/domain/auth"
+	"menlo.ai/jan-api-gateway/app/domain/response"
+
+	requesttypes "menlo.ai/jan-api-gateway/app/interfaces/http/requests"
+	"menlo.ai/jan-api-gateway/app/interfaces/http/responses"
+)
+
+// Use types from the response packages instead of defining internal types
+
+// ResponseRoute represents the response API routes
+type ResponseRoute struct {
+	responseModelService  *response.ResponseModelService
+	authService           *auth.AuthService
+	responseService       *response.ResponseService
+	streamModelService    *response.StreamModelService
+	nonStreamModelService *response.NonStreamModelService
+}
+
+// NewResponseRoute creates a new ResponseRoute instance
+func NewResponseRoute(responseModelService *response.ResponseModelService, authService *auth.AuthService, responseService *response.ResponseService, streamHandler *response.StreamModelService, nonStreamHandler *response.NonStreamModelService) *ResponseRoute {
+	return &ResponseRoute{
+		responseModelService:  responseModelService,
+		authService:           authService,
+		responseService:       responseService,
+		streamModelService:    streamHandler,
+		nonStreamModelService: nonStreamHandler,
+	}
+}
+
+// RegisterRouter registers the response routes
+func (responseRoute *ResponseRoute) RegisterRouter(router gin.IRouter) {
+	responseRouter := router.Group("/responses")
+	responseRoute.registerRoutes(responseRouter)
+}
+
+// registerRoutes registers all response routes
+func (responseRoute *ResponseRoute) registerRoutes(router *gin.RouterGroup) {
+	// Apply middleware to the entire group
+	responseGroup := router.Group("",
+		responseRoute.authService.AppUserAuthMiddleware(),
+		responseRoute.authService.RegisteredUserMiddleware(),
+	)
+
+	responseGroup.POST("", responseRoute.CreateResponse)
+
+	// Apply response middleware for routes that need response context
+	responseMiddleWare := responseRoute.responseService.GetResponseMiddleWare()
+	responseGroup.GET(fmt.Sprintf("/:%s", string(response.ResponseContextKeyPublicID)), responseMiddleWare, responseRoute.responseModelService.GetResponseHandler)
+	responseGroup.DELETE(fmt.Sprintf("/:%s", string(response.ResponseContextKeyPublicID)), responseMiddleWare, responseRoute.responseModelService.DeleteResponseHandler)
+	responseGroup.POST(fmt.Sprintf("/:%s/cancel", string(response.ResponseContextKeyPublicID)), responseMiddleWare, responseRoute.responseModelService.CancelResponseHandler)
+	responseGroup.GET(fmt.Sprintf("/:%s/input_items", string(response.ResponseContextKeyPublicID)), responseMiddleWare, responseRoute.responseModelService.ListInputItemsHandler)
+}
+
+// CreateResponse creates a new response from LLM
+// @Summary Create a response
+// @Description Creates a new LLM response for the given input. Supports multiple input types including text, images, files, web search, and more.
+// @Description
+// @Description **Supported Input Types:**
+// @Description - `text`: Plain text input
+// @Description - `image`: Image input (URL or base64)
+// @Description - `file`: File input by file ID
+// @Description - `web_search`: Web search input
+// @Description - `file_search`: File search input
+// @Description - `streaming`: Streaming input
+// @Description - `function_calls`: Function calls input
+// @Description - `reasoning`: Reasoning input
+// @Description
+// @Description **Example Request:**
+// @Description ```json
+// @Description {
+// @Description   "model": "gpt-4",
+// @Description   "input": {
+// @Description     "type": "text",
+// @Description     "text": "Hello, how are you?"
+// @Description   },
+// @Description   "max_tokens": 100,
+// @Description   "temperature": 0.7,
+// @Description   "stream": false,
+// @Description   "background": false
+// @Description }
+// @Description ```
+// @Description
+// @Description **Response Format:**
+// @Description The response uses embedded structure where all fields are at the top level:
+// @Description - `jan_status`: Jan API status code (optional)
+// @Description - `id`: Response identifier
+// @Description - `object`: Object type ("response")
+// @Description - `created`: Unix timestamp
+// @Description - `model`: Model used
+// @Description - `status`: Response status
+// @Description - `input`: Input data
+// @Description - `output`: Generated output
+// @Description
+// @Description **Example Response:**
+// @Description ```json
+// @Description {
+// @Description   "jan_status": "000000",
+// @Description   "id": "resp_1234567890",
+// @Description   "object": "response",
+// @Description   "created": 1234567890,
+// @Description   "model": "gpt-4",
+// @Description   "status": "completed",
+// @Description   "input": {
+// @Description     "type": "text",
+// @Description     "text": "Hello, how are you?"
+// @Description   },
+// @Description   "output": {
+// @Description     "type": "text",
+// @Description     "text": {
+// @Description       "value": "I'm doing well, thank you!"
+// @Description     }
+// @Description   }
+// @Description }
+// @Description ```
+// @Description
+// @Description **Response Status:**
+// @Description - `completed`: Response generation finished successfully
+// @Description - `processing`: Response is being generated
+// @Description - `failed`: Response generation failed
+// @Description - `cancelled`: Response was cancelled
+// @Tags Jan, Jan-Responses
+// @Security BearerAuth
+// @Accept json
+// @Produce json
+// @Param request body requesttypes.CreateResponseRequest true "Request payload containing model, input, and generation parameters"
+// @Success 200 {object} responses.Response "Created response"
+// @Success 202 {object} responses.Response "Response accepted for background processing"
+// @Failure 400 {object} responses.ErrorResponse "Invalid request payload"
+// @Failure 401 {object} responses.ErrorResponse "Unauthorized"
+// @Failure 422 {object} responses.ErrorResponse "Validation error"
+// @Failure 429 {object} responses.ErrorResponse "Rate limit exceeded"
+// @Failure 500 {object} responses.ErrorResponse "Internal server error"
+// @Router /v1/responses [post]
+func (responseRoute *ResponseRoute) CreateResponse(reqCtx *gin.Context) {
+	ctx := reqCtx.Request.Context()
+	user, _ := auth.GetUserFromContext(reqCtx)
+	userID := user.ID
+
+	var request requesttypes.CreateResponseRequest
+	if err := reqCtx.ShouldBindJSON(&request); err != nil {
+		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
+			Code: "g7h8i9j0-k1l2-3456-ghij-789012345678",
+		})
+		return
+	}
+
+	// Validate request parameters
+	if request.Model == "" {
+		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
+			Code: "h8i9j0k1-l2m3-4567-hijk-890123456789",
+		})
+		return
+	}
+
+	if request.Input == nil {
+		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
+			Code: "i9j0k1l2-m3n4-5678-ijkl-901234567890",
+		})
+		return
+	}
+
+	// Convert to domain request type
+	domainRequest := &requesttypes.CreateResponseRequest{
+		Model:              request.Model,
+		Input:              request.Input,
+		Stream:             request.Stream,
+		Temperature:        request.Temperature,
+		MaxTokens:          request.MaxTokens,
+		PreviousResponseID: request.PreviousResponseID,
+		SystemPrompt:       request.SystemPrompt,
+		TopP:               request.TopP,
+		TopK:               request.TopK,
+		RepetitionPenalty:  request.RepetitionPenalty,
+		Seed:               request.Seed,
+		Stop:               request.Stop,
+		PresencePenalty:    request.PresencePenalty,
+		FrequencyPenalty:   request.FrequencyPenalty,
+		LogitBias:          request.LogitBias,
+		ResponseFormat:     request.ResponseFormat,
+		Tools:              request.Tools,
+		ToolChoice:         request.ToolChoice,
+		Metadata:           request.Metadata,
+		Background:         request.Background,
+		Timeout:            request.Timeout,
+		User:               request.User,
+		Conversation:       request.Conversation,
+		Store:              request.Store,
+	}
+
+	// Call domain service (pure business logic)
+	result, err := responseRoute.responseModelService.CreateResponse(ctx, userID, domainRequest)
+	if err != nil {
+		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
+			Code:  err.GetCode(),
+			Error: err.Error(),
+		})
+		return
+	}
+
+	// Handle HTTP/SSE concerns directly
+	responseRoute.handleResponseCreation(reqCtx, result, domainRequest)
+}
+
+// handleResponseCreation handles both streaming and non-streaming response creation
+func (responseRoute *ResponseRoute) handleResponseCreation(reqCtx *gin.Context, result *response.ResponseCreationResult, request *requesttypes.CreateResponseRequest) {
+	// Set up streaming headers if needed
+	if result.IsStreaming {
+		reqCtx.Header("Content-Type", "text/event-stream")
+		reqCtx.Header("Cache-Control", "no-cache")
+		reqCtx.Header("Connection", "keep-alive")
+		reqCtx.Header("Access-Control-Allow-Origin", "*")
+		reqCtx.Header("Access-Control-Allow-Headers", "Cache-Control")
+	}
+
+	// Delegate to appropriate handler based on streaming preference
+	if result.IsStreaming {
+		responseRoute.streamModelService.CreateStreamResponse(reqCtx, request, result.APIKey, result.Conversation, result.Response, result.ChatCompletionRequest)
+	} else {
+		responseRoute.nonStreamModelService.CreateNonStreamResponseHandler(reqCtx, request, result.APIKey, result.Conversation, result.Response, result.ChatCompletionRequest)
+	}
+}
+
+// GetResponse retrieves a response by ID
+// @Summary Get a response
+// @Description Retrieves an LLM response by its ID. Returns the complete response object with embedded structure where all fields are at the top level.
+// @Description
+// @Description **Response Format:**
+// @Description The response uses embedded structure where all fields are at the top level:
+// @Description - `jan_status`: Jan API status code (optional)
+// @Description - `id`: Response identifier
+// @Description - `object`: Object type ("response")
+// @Description - `created`: Unix timestamp
+// @Description - `model`: Model used
+// @Description - `status`: Response status
+// @Description - `input`: Input data
+// @Description - `output`: Generated output
+// @Tags Jan, Jan-Responses
+// @Security BearerAuth
+// @Accept json
+// @Produce json
+// @Param response_id path string true "Unique identifier of the response"
+// @Success 200 {object} responses.Response "Response details"
+// @Failure 400 {object} responses.ErrorResponse "Invalid request"
+// @Failure 401 {object} responses.ErrorResponse "Unauthorized"
+// @Failure 403 {object} responses.ErrorResponse "Access denied"
+// @Failure 404 {object} responses.ErrorResponse "Response not found"
+// @Failure 500 {object} responses.ErrorResponse "Internal server error"
+// @Router /v1/responses/{response_id} [get]
+func (responseRoute *ResponseRoute) GetResponse(reqCtx *gin.Context) {
+	resp, ok := response.GetResponseFromContext(reqCtx)
+	if !ok {
+		return
+	}
+	// Convert domain response to API response using the service
+	apiResponse := responseRoute.responseService.ConvertDomainResponseToAPIResponse(resp)
+	reqCtx.JSON(http.StatusOK, apiResponse)
+}
+
+// DeleteResponse deletes a response by ID
+// @Summary Delete a response
+// @Description Deletes an LLM response by its ID. Returns the deleted response object with embedded structure where all fields are at the top level.
+// @Description
+// @Description **Response Format:**
+// @Description The response uses embedded structure where all fields are at the top level:
+// @Description - `jan_status`: Jan API status code (optional)
+// @Description - `id`: Response identifier
+// @Description - `object`: Object type ("response")
+// @Description - `created`: Unix timestamp
+// @Description - `model`: Model used
+// @Description - `status`: Response status (will be "cancelled")
+// @Description - `input`: Input data
+// @Description - `cancelled_at`: Cancellation timestamp
+// @Tags Jan, Jan-Responses
+// @Security BearerAuth
+// @Accept json
+// @Produce json
+// @Param response_id path string true "Unique identifier of the response"
+// @Success 200 {object} responses.Response "Deleted response"
+// @Failure 400 {object} responses.ErrorResponse "Invalid request"
+// @Failure 401 {object} responses.ErrorResponse "Unauthorized"
+// @Failure 403 {object} responses.ErrorResponse "Access denied"
+// @Failure 404 {object} responses.ErrorResponse "Response not found"
+// @Failure 500 {object} responses.ErrorResponse "Internal server error"
+// @Router /v1/responses/{response_id} [delete]
+func (responseRoute *ResponseRoute) DeleteResponse(reqCtx *gin.Context) {
+	ctx := reqCtx.Request.Context()
+	resp, ok := response.GetResponseFromContext(reqCtx)
+	if !ok {
+		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
+			Code: "k1l2m3n4-o5p6-7890-klmn-123456789012",
+		})
+		return
+	}
+
+	success, err := responseRoute.responseService.DeleteResponse(ctx, resp.ID)
+	if !success {
+		reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
+			Code:  err.GetCode(),
+			Error: err.Error(),
+		})
+		return
+	}
+	// Convert domain response to API response using the service
+	apiResponse := responseRoute.responseService.ConvertDomainResponseToAPIResponse(resp)
+	reqCtx.JSON(http.StatusOK, apiResponse)
+}
+
+// CancelResponse cancels a running response
+// @Summary Cancel a response
+// @Description Cancels a running LLM response that was created with background=true. Only responses that are currently processing can be cancelled.
+// @Description
+// @Description **Response Format:**
+// @Description The response uses embedded structure where all fields are at the top level:
+// @Description - `jan_status`: Jan API status code (optional)
+// @Description - `id`: Response identifier
+// @Description - `object`: Object type ("response")
+// @Description - `created`: Unix timestamp
+// @Description - `model`: Model used
+// @Description - `status`: Response status (will be "cancelled")
+// @Description - `input`: Input data
+// @Description - `cancelled_at`: Cancellation timestamp
+// @Tags Jan, Jan-Responses
+// @Security BearerAuth
+// @Accept json
+// @Produce json
+// @Param response_id path string true "Unique identifier of the response to cancel"
+// @Success 200 {object} responses.Response "Response cancelled successfully"
+// @Failure 400 {object} responses.ErrorResponse "Invalid request or response cannot be cancelled"
+// @Failure 401 {object} responses.ErrorResponse "Unauthorized"
+// @Failure 403 {object} responses.ErrorResponse "Access denied"
+// @Failure 404 {object} responses.ErrorResponse "Response not found"
+// @Failure 500 {object} responses.ErrorResponse "Internal server error"
+// @Router /v1/responses/{response_id}/cancel [post]
+func (responseRoute *ResponseRoute) CancelResponse(reqCtx *gin.Context) {
+	ctx := reqCtx.Request.Context()
+	resp, ok := response.GetResponseFromContext(reqCtx)
+	if !ok {
+		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
+			Code: "m3n4o5p6-q7r8-9012-mnop-345678901234",
+		})
+		return
+	}
+
+	// TODO
+	// Cancel the stream if it is streaming in go routine and update response status in go routine
+	success, err := responseRoute.responseService.UpdateResponseStatus(ctx, resp.ID, response.ResponseStatusCancelled)
+	if !success {
+		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
+			Code:  err.GetCode(),
+			Error: err.Error(),
+		})
+		return
+	}
+
+	// Reload the response to get updated status
+	updatedResp, err := responseRoute.responseService.GetResponseByPublicID(ctx, resp.PublicID)
+	if err != nil {
+		reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
+			Code:  err.GetCode(),
+			Error: err.Error(),
+		})
+		return
+	}
+	// Convert domain response to API response using the service
+	apiResponse := responseRoute.responseService.ConvertDomainResponseToAPIResponse(updatedResp)
+	reqCtx.JSON(http.StatusOK, apiResponse)
+}
+
+// ListInputItems lists input items for a response
+// @Summary List input items
+// @Description Retrieves a paginated list of input items for a response. Supports cursor-based pagination for efficient retrieval of large datasets.
+// @Description
+// @Description **Response Format:**
+// @Description The response uses embedded structure where all fields are at the top level:
+// @Description - `jan_status`: Jan API status code (optional)
+// @Description - `first_id`: First item ID for pagination (optional)
+// @Description - `last_id`: Last item ID for pagination (optional)
+// @Description - `has_more`: Whether more items are available (optional)
+// @Description - `id`: Input item identifier
+// @Description - `object`: Object type ("input_item")
+// @Description - `created`: Unix timestamp
+// @Description - `type`: Input type
+// @Description - `text`: Text content (for text type)
+// @Description - `image`: Image content (for image type)
+// @Description - `file`: File content (for file type)
+// @Description
+// @Description **Example Response:**
+// @Description ```json
+// @Description {
+// @Description   "jan_status": "000000",
+// @Description   "first_id": "input_123",
+// @Description   "last_id": "input_456",
+// @Description   "has_more": false,
+// @Description   "id": "input_1234567890",
+// @Description   "object": "input_item",
+// @Description   "created": 1234567890,
+// @Description   "type": "text",
+// @Description   "text": "Hello, world!"
+// @Description }
+// @Description ```
+// @Tags Jan, Jan-Responses
+// @Security BearerAuth
+// @Accept json
+// @Produce json
+// @Param response_id path string true "Unique identifier of the response"
+// @Param limit query int false "Maximum number of items to return (default: 20, max: 100)"
+// @Param after query string false "Cursor for pagination - return items after this ID"
+// @Param before query string false "Cursor for pagination - return items before this ID"
+// @Success 200 {object} responses.ListInputItemsResponse "List of input items"
+// @Failure 400 {object} responses.ErrorResponse "Invalid request or pagination parameters"
+// @Failure 401 {object} responses.ErrorResponse "Unauthorized"
+// @Failure 403 {object} responses.ErrorResponse "Access denied"
+// @Failure 404 {object} responses.ErrorResponse "Response not found"
+// @Failure 500 {object} responses.ErrorResponse "Internal server error"
+// @Router /v1/responses/{response_id}/input_items [get]
+func (responseRoute *ResponseRoute) ListInputItems(reqCtx *gin.Context) {
+	ctx := reqCtx.Request.Context()
+	resp, ok := response.GetResponseFromContext(reqCtx)
+	if !ok {
+		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
+			Code: "p6q7r8s9-t0u1-2345-pqrs-678901234567",
+		})
+		return
+	}
+
+	// Get items for this response using the response service
+	items, err := responseRoute.responseService.GetItemsForResponse(ctx, resp.ID, nil)
+	if err != nil {
+		reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
+			Code:  err.GetCode(),
+			Error: err.Error(),
+		})
+		return
+	}
+
+	var firstId *string
+	var lastId *string
+	if len(items) > 0 {
+		firstId = &items[0].PublicID
+		lastId = &items[len(items)-1].PublicID
+	}
+
+	// Convert conversation items to input items using the service
+	inputItems := make([]responses.InputItem, 0, len(items))
+	for _, item := range items {
+		inputItem := responseRoute.responseService.ConvertConversationItemToInputItem(item)
+		inputItems = append(inputItems, inputItem)
+	}
+
+	reqCtx.JSON(http.StatusOK, responses.ListInputItemsResponse{
+		Object:  "list",
+		Data:    inputItems,
+		FirstID: firstId,
+		LastID:  lastId,
+		HasMore: false, // For now, we'll return all items without pagination
+	})
+}
+
+// All transformation functions removed - now using service methods
diff --git a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/v1_route.go b/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/v1_route.go
index 03f76b7f..b3deb583 100644
--- a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/v1_route.go
+++ b/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/v1_route.go
@@ -9,6 +9,7 @@ import (
 	"menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/conversations"
 	"menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/mcp"
 	"menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/organization"
+	"menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/responses"
 	"menlo.ai/jan-api-gateway/config"
 )
 
@@ -19,6 +20,7 @@ type V1Route struct {
 	modelAPI          *ModelAPI
 	mcpAPI            *mcp.MCPAPI
 	authRoute         *auth.AuthRoute
+	responsesRoute    *responses.ResponseRoute
 }
 
 func NewV1Route(
@@ -28,6 +30,7 @@ func NewV1Route(
 	modelAPI *ModelAPI,
 	mcpAPI *mcp.MCPAPI,
 	authRoute *auth.AuthRoute,
+	responsesRoute *responses.ResponseRoute,
 ) *V1Route {
 	return &V1Route{
 		organizationRoute,
@@ -36,6 +39,7 @@ func NewV1Route(
 		modelAPI,
 		mcpAPI,
 		authRoute,
+		responsesRoute,
 	}
 }
 
@@ -48,6 +52,7 @@ func (v1Route *V1Route) RegisterRouter(router gin.IRouter) {
 	v1Route.mcpAPI.RegisterRouter(v1Router)
 	v1Route.organizationRoute.RegisterRouter(v1Router)
 	v1Route.authRoute.RegisterRouter(v1Router)
+	v1Route.responsesRoute.RegisterRouter(v1Router)
 }
 
 // GetVersion godoc
diff --git a/apps/jan-api-gateway/application/app/utils/httpclients/jan_inference/client.go b/apps/jan-api-gateway/application/app/utils/httpclients/jan_inference/client.go
index 8156c1fc..e0b2d262 100644
--- a/apps/jan-api-gateway/application/app/utils/httpclients/jan_inference/client.go
+++ b/apps/jan-api-gateway/application/app/utils/httpclients/jan_inference/client.go
@@ -58,6 +58,33 @@ func (client *JanInferenceClient) CreateChatCompletionStream(ctx context.Context
 	return nil
 }
 
+// CreateChatCompletionStreamChunks returns chunks instead of writing to response
+func (client *JanInferenceClient) CreateChatCompletionStreamChunks(ctx context.Context, apiKey string, request openai.ChatCompletionRequest) (<-chan string, error) {
+	chunkChan := make(chan string, 100)
+
+	go func() {
+		defer close(chunkChan)
+
+		req := JanInferenceRestyClient.R().SetBody(request)
+		resp, err := req.
+			SetDoNotParseResponse(true).
+			Post("/v1/chat/completions")
+		if err != nil {
+			chunkChan <- fmt.Sprintf("error: %v", err)
+			return
+		}
+		defer resp.RawResponse.Body.Close()
+
+		scanner := bufio.NewScanner(resp.RawResponse.Body)
+		for scanner.Scan() {
+			line := scanner.Text()
+			chunkChan <- line
+		}
+	}()
+
+	return chunkChan, nil
+}
+
 // TODO: add timeout
 func (client *JanInferenceClient) CreateChatCompletion(ctx context.Context, apiKey string, request openai.ChatCompletionRequest) (*openai.ChatCompletionResponse, error) {
 	var chatCompletionResponse openai.ChatCompletionResponse
@@ -91,4 +118,4 @@ type Model struct {
 type ModelsResponse struct {
 	Object string  `json:"object"`
 	Data   []Model `json:"data"`
-}
+}
\ No newline at end of file
diff --git a/apps/jan-api-gateway/application/app/utils/idgen/generator.go b/apps/jan-api-gateway/application/app/utils/idgen/generator.go
index 607bb9e9..3f06e703 100644
--- a/apps/jan-api-gateway/application/app/utils/idgen/generator.go
+++ b/apps/jan-api-gateway/application/app/utils/idgen/generator.go
@@ -2,7 +2,6 @@ package idgen
 
 import (
 	"crypto/rand"
-	"encoding/base64"
 	"fmt"
 	"strings"
 )
@@ -10,25 +9,21 @@ import (
 // GenerateSecureID generates a cryptographically secure ID with the given prefix and length
 // This is a pure utility function that only handles the crypto and formatting logic
 func GenerateSecureID(prefix string, length int) (string, error) {
-	// The byte length required is about 3/4 of the desired string length.
-	// We add 2 to be safe and avoid rounding issues or insufficient bytes.
-	byteLength := (length * 3 / 4) + 2
-	bytes := make([]byte, byteLength)
+	// Use larger byte array for better entropy
+	bytes := make([]byte, length*2) // Use more bytes to ensure we have enough entropy
 	_, err := rand.Read(bytes)
 	if err != nil {
 		return "", fmt.Errorf("failed to generate random bytes: %w", err)
 	}
 
-	// Encode to base64 URL-safe format
-	encoded := base64.URLEncoding.EncodeToString(bytes)
-	encoded = strings.TrimRight(encoded, "=") // Remove padding
-
-	// Truncate to desired length
-	if len(encoded) > length {
-		encoded = encoded[:length]
+	// Generate alphanumeric string (numbers and lowercase letters only)
+	const charset = "0123456789abcdefghijklmnopqrstuvwxyz"
+	encoded := make([]byte, length)
+	for i := 0; i < length; i++ {
+		encoded[i] = charset[bytes[i]%36] // 36 = len(charset)
 	}
 
-	return fmt.Sprintf("%s_%s", prefix, encoded), nil
+	return fmt.Sprintf("%s_%s", prefix, string(encoded)), nil
 }
 
 // ValidateIDFormat validates that an ID has the expected format (prefix_alphanumeric)
@@ -46,12 +41,9 @@ func ValidateIDFormat(id, expectedPrefix string) bool {
 		return false
 	}
 
-	// Validate characters (base64 URL-safe: A-Z, a-z, 0-9, -, _)
+	// Validate characters (numbers and lowercase letters only: 0-9, a-z)
 	for _, char := range suffix {
-		if !((char >= 'a' && char <= 'z') ||
-			(char >= 'A' && char <= 'Z') ||
-			(char >= '0' && char <= '9') ||
-			char == '-' || char == '_') {
+		if !((char >= 'a' && char <= 'z') || (char >= '0' && char <= '9')) {
 			return false
 		}
 	}
diff --git a/apps/jan-api-gateway/application/app/utils/ptr/pointer.go b/apps/jan-api-gateway/application/app/utils/ptr/pointer.go
index ba14e080..4fbc5fdb 100644
--- a/apps/jan-api-gateway/application/app/utils/ptr/pointer.go
+++ b/apps/jan-api-gateway/application/app/utils/ptr/pointer.go
@@ -25,3 +25,11 @@ func ToBool(b bool) *bool {
 func ToTime(b time.Time) *time.Time {
 	return &b
 }
+
+// FromString safely dereferences a string pointer, returning empty string if nil
+func FromString(s *string) string {
+	if s == nil {
+		return ""
+	}
+	return *s
+}
diff --git a/apps/jan-api-gateway/application/cmd/server/wire.go b/apps/jan-api-gateway/application/cmd/server/wire.go
index 11f0d498..e8fed7e5 100644
--- a/apps/jan-api-gateway/application/cmd/server/wire.go
+++ b/apps/jan-api-gateway/application/cmd/server/wire.go
@@ -3,8 +3,11 @@
 package main
 
 import (
+	"context"
+
 	"github.com/google/wire"
 	"menlo.ai/jan-api-gateway/app/domain"
+	"menlo.ai/jan-api-gateway/app/infrastructure"
 	"menlo.ai/jan-api-gateway/app/infrastructure/database"
 	"menlo.ai/jan-api-gateway/app/infrastructure/database/repository"
 	"menlo.ai/jan-api-gateway/app/interfaces/http"
@@ -15,10 +18,16 @@ func CreateApplication() (*Application, error) {
 	wire.Build(
 		database.NewDB,
 		repository.RepositoryProvider,
+		infrastructure.InfrastructureProvider,
 		domain.ServiceProvider,
 		routes.RouteProvider,
 		http.NewHttpServer,
 		wire.Struct(new(Application), "*"),
+		provideContext,
 	)
 	return nil, nil
 }
+
+func provideContext() context.Context {
+	return context.Background()
+}
diff --git a/apps/jan-api-gateway/application/cmd/server/wire_gen.go b/apps/jan-api-gateway/application/cmd/server/wire_gen.go
index 677c7b59..0db702a2 100644
--- a/apps/jan-api-gateway/application/cmd/server/wire_gen.go
+++ b/apps/jan-api-gateway/application/cmd/server/wire_gen.go
@@ -7,12 +7,15 @@
 package main
 
 import (
+	"context"
+
 	"menlo.ai/jan-api-gateway/app/domain/apikey"
 	"menlo.ai/jan-api-gateway/app/domain/auth"
 	"menlo.ai/jan-api-gateway/app/domain/conversation"
 	"menlo.ai/jan-api-gateway/app/domain/mcp/serpermcp"
 	"menlo.ai/jan-api-gateway/app/domain/organization"
 	"menlo.ai/jan-api-gateway/app/domain/project"
+	"menlo.ai/jan-api-gateway/app/domain/response"
 	"menlo.ai/jan-api-gateway/app/domain/user"
 	"menlo.ai/jan-api-gateway/app/infrastructure/database"
 	"menlo.ai/jan-api-gateway/app/infrastructure/database/repository/apikeyrepo"
@@ -20,23 +23,27 @@ import (
 	"menlo.ai/jan-api-gateway/app/infrastructure/database/repository/itemrepo"
 	"menlo.ai/jan-api-gateway/app/infrastructure/database/repository/organizationrepo"
 	"menlo.ai/jan-api-gateway/app/infrastructure/database/repository/projectrepo"
+	"menlo.ai/jan-api-gateway/app/infrastructure/database/repository/responserepo"
 	"menlo.ai/jan-api-gateway/app/infrastructure/database/repository/transaction"
 	"menlo.ai/jan-api-gateway/app/infrastructure/database/repository/userrepo"
+	"menlo.ai/jan-api-gateway/app/infrastructure/inference"
 	"menlo.ai/jan-api-gateway/app/interfaces/http"
-	"menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1"
+	v1 "menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1"
 	auth2 "menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/auth"
 	"menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/auth/google"
 	"menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/chat"
+	chat2 "menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/chat"
 	"menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/conversations"
 	"menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/mcp"
-	"menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/mcp/mcp_impl"
+	mcpimpl "menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/mcp/mcp_impl"
 	organization2 "menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/organization"
 	"menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/organization/projects"
-	"menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/organization/projects/api_keys"
-)
+	apikeys "menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/organization/projects/api_keys"
+	"menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/responses"
+	janinference "menlo.ai/jan-api-gateway/app/utils/httpclients/jan_inference"
 
-import (
 	_ "github.com/grafana/pyroscope-go/godeltaprof/http/pprof"
+
 	_ "net/http/pprof"
 )
 
@@ -61,11 +68,16 @@ func CreateApplication() (*Application, error) {
 	projectsRoute := projects.NewProjectsRoute(projectService, apiKeyService, projectApiKeyRoute)
 	authService := auth.NewAuthService(userService, apiKeyService, organizationService)
 	organizationRoute := organization2.NewOrganizationRoute(adminApiKeyAPI, projectsRoute, authService)
-	completionAPI := chat.NewCompletionAPI(apiKeyService)
-	chatRoute := chat.NewChatRoute(completionAPI)
+	context := provideContext()
+	janInferenceClient := janinference.NewJanInferenceClient(context)
+	inferenceProvider := inference.NewJanInferenceProvider(janInferenceClient)
 	conversationRepository := conversationrepo.NewConversationGormRepository(transactionDatabase)
 	itemRepository := itemrepo.NewItemGormRepository(transactionDatabase)
 	conversationService := conversation.NewService(conversationRepository, itemRepository)
+	completionNonStreamHandler := chat.NewCompletionNonStreamHandler(inferenceProvider, conversationService)
+	completionStreamHandler := chat.NewCompletionStreamHandler(inferenceProvider, conversationService)
+	completionAPI := chat2.NewCompletionAPI(completionNonStreamHandler, completionStreamHandler, conversationService, authService)
+	chatRoute := chat2.NewChatRoute(completionAPI, authService)
 	conversationAPI := conversations.NewConversationAPI(conversationService, authService)
 	modelAPI := v1.NewModelAPI()
 	serperService := serpermcp.NewSerperService()
@@ -73,10 +85,22 @@ func CreateApplication() (*Application, error) {
 	mcpapi := mcp.NewMCPAPI(serperMCP, authService)
 	googleAuthAPI := google.NewGoogleAuthAPI(userService, authService)
 	authRoute := auth2.NewAuthRoute(googleAuthAPI, userService, authService)
-	v1Route := v1.NewV1Route(organizationRoute, chatRoute, conversationAPI, modelAPI, mcpapi, authRoute)
+	responseRepository := responserepo.NewResponseGormRepository(transactionDatabase)
+	responseService := response.NewResponseService(responseRepository, itemRepository, conversationService)
+	responseModelService := response.NewResponseModelService(userService, authService, apiKeyService, conversationService, responseService)
+	streamModelService := response.NewStreamModelService(responseModelService)
+	nonStreamModelService := response.NewNonStreamModelService(responseModelService)
+	responseRoute := responses.NewResponseRoute(responseModelService, authService, responseService, streamModelService, nonStreamModelService)
+	v1Route := v1.NewV1Route(organizationRoute, chatRoute, conversationAPI, modelAPI, mcpapi, authRoute, responseRoute)
 	httpServer := http.NewHttpServer(v1Route)
 	application := &Application{
 		HttpServer: httpServer,
 	}
 	return application, nil
 }
+
+// wire.go:
+
+func provideContext() context.Context {
+	return context.Background()
+}
diff --git a/apps/jan-api-gateway/application/docs/docs.go b/apps/jan-api-gateway/application/docs/docs.go
index 659938b9..0ac17beb 100644
--- a/apps/jan-api-gateway/application/docs/docs.go
+++ b/apps/jan-api-gateway/application/docs/docs.go
@@ -35,7 +35,7 @@ const docTemplate = `{
                         "in": "body",
                         "required": true,
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_auth_google.GoogleCallbackRequest"
+                            "$ref": "#/definitions/google.GoogleCallbackRequest"
                         }
                     }
                 ],
@@ -43,25 +43,25 @@ const docTemplate = `{
                     "200": {
                         "description": "Successfully authenticated and returned tokens",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_auth_google.AccessTokenResponse"
+                            "$ref": "#/definitions/google.AccessTokenResponse"
                         }
                     },
                     "400": {
                         "description": "Bad request (e.g., invalid state, missing code, or invalid claims)",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized (e.g., a user claim is not found or is invalid in the context)",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal Server Error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -81,7 +81,7 @@ const docTemplate = `{
                     "500": {
                         "description": "Internal Server Error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -101,19 +101,19 @@ const docTemplate = `{
                     "200": {
                         "description": "Successfully refreshed the access token",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_auth.AccessTokenResponse"
+                            "$ref": "#/definitions/auth.AccessTokenResponse"
                         }
                     },
                     "400": {
                         "description": "Bad Request (e.g., invalid refresh token)",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized (e.g., expired or missing refresh token)",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -139,13 +139,13 @@ const docTemplate = `{
                     "400": {
                         "description": "Bad Request (e.g., invalid refresh token)",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized (e.g., expired or missing refresh token)",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -170,13 +170,13 @@ const docTemplate = `{
                     "200": {
                         "description": "Successfully retrieved user profile",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_auth.GetMeResponse"
+                            "$ref": "#/definitions/auth.GetMeResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized (e.g., missing or invalid JWT)",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -199,19 +199,19 @@ const docTemplate = `{
                     "200": {
                         "description": "Successfully refreshed the access token",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_auth.AccessTokenResponse"
+                            "$ref": "#/definitions/auth.AccessTokenResponse"
                         }
                     },
                     "400": {
                         "description": "Bad Request (e.g., invalid refresh token)",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized (e.g., expired or missing refresh token)",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -242,7 +242,7 @@ const docTemplate = `{
                         "in": "body",
                         "required": true,
                         "schema": {
-                            "$ref": "#/definitions/openai.ChatCompletionRequest"
+                            "$ref": "#/definitions/chat.PostChatCompletionRequest"
                         }
                     }
                 ],
@@ -250,25 +250,25 @@ const docTemplate = `{
                     "200": {
                         "description": "Successful response",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_chat.ChatCompletionResponseSwagger"
+                            "$ref": "#/definitions/chat.ChatCompletionResponseSwagger"
                         }
                     },
                     "400": {
                         "description": "Invalid request payload",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -311,25 +311,25 @@ const docTemplate = `{
                     "200": {
                         "description": "Successfully retrieved the list of conversations",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.ListResponse-app_interfaces_http_routes_v1_conversations_ConversationResponse"
+                            "$ref": "#/definitions/conversations.ListResponse-conversations_ConversationResponse"
                         }
                     },
                     "400": {
                         "description": "Bad Request - Invalid pagination parameters",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized - invalid or missing API key",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal Server Error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -358,7 +358,7 @@ const docTemplate = `{
                         "in": "body",
                         "required": true,
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.CreateConversationRequest"
+                            "$ref": "#/definitions/conversations.CreateConversationRequest"
                         }
                     }
                 ],
@@ -366,25 +366,25 @@ const docTemplate = `{
                     "200": {
                         "description": "Created conversation",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.ConversationResponse"
+                            "$ref": "#/definitions/conversations.ConversationResponse"
                         }
                     },
                     "400": {
                         "description": "Invalid request",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -418,31 +418,31 @@ const docTemplate = `{
                     "200": {
                         "description": "Conversation details",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.ConversationResponse"
+                            "$ref": "#/definitions/conversations.ConversationResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "403": {
                         "description": "Access denied",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "404": {
                         "description": "Conversation not found",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -474,31 +474,31 @@ const docTemplate = `{
                     "200": {
                         "description": "Deleted conversation",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.DeletedConversationResponse"
+                            "$ref": "#/definitions/conversations.DeletedConversationResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "403": {
                         "description": "Access denied",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "404": {
                         "description": "Conversation not found",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -534,7 +534,7 @@ const docTemplate = `{
                         "in": "body",
                         "required": true,
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.UpdateConversationRequest"
+                            "$ref": "#/definitions/conversations.UpdateConversationRequest"
                         }
                     }
                 ],
@@ -542,37 +542,37 @@ const docTemplate = `{
                     "200": {
                         "description": "Updated conversation",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.ConversationResponse"
+                            "$ref": "#/definitions/conversations.ConversationResponse"
                         }
                     },
                     "400": {
                         "description": "Invalid request",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "403": {
                         "description": "Access denied",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "404": {
                         "description": "Conversation not found",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -624,31 +624,31 @@ const docTemplate = `{
                     "200": {
                         "description": "List of items",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.ConversationItemListResponse"
+                            "$ref": "#/definitions/conversations.ConversationItemListResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "403": {
                         "description": "Access denied",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "404": {
                         "description": "Conversation not found",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -684,7 +684,7 @@ const docTemplate = `{
                         "in": "body",
                         "required": true,
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.CreateItemsRequest"
+                            "$ref": "#/definitions/conversations.CreateItemsRequest"
                         }
                     }
                 ],
@@ -692,37 +692,37 @@ const docTemplate = `{
                     "200": {
                         "description": "Created items",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.ListResponse-app_interfaces_http_routes_v1_conversations_ConversationItemResponse"
+                            "$ref": "#/definitions/conversations.ListResponse-conversations_ConversationItemResponse"
                         }
                     },
                     "400": {
                         "description": "Invalid request",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "403": {
                         "description": "Access denied",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "404": {
                         "description": "Conversation not found",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -763,31 +763,31 @@ const docTemplate = `{
                     "200": {
                         "description": "Item details",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.ConversationItemResponse"
+                            "$ref": "#/definitions/conversations.ConversationItemResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "403": {
                         "description": "Access denied",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "404": {
                         "description": "Conversation not found",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -826,31 +826,31 @@ const docTemplate = `{
                     "200": {
                         "description": "Updated conversation",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.ConversationResponse"
+                            "$ref": "#/definitions/conversations.ConversationResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "403": {
                         "description": "Access denied",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "404": {
                         "description": "Conversation not found",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -915,7 +915,7 @@ const docTemplate = `{
                     "200": {
                         "description": "Successful response",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1.ModelsResponse"
+                            "$ref": "#/definitions/v1.ModelsResponse"
                         }
                     }
                 }
@@ -952,19 +952,19 @@ const docTemplate = `{
                     "200": {
                         "description": "Successfully retrieved the list of admin API keys",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization.AdminApiKeyListResponse"
+                            "$ref": "#/definitions/organization.AdminApiKeyListResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized - invalid or missing API key",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal Server Error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -993,7 +993,7 @@ const docTemplate = `{
                         "in": "body",
                         "required": true,
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization.CreateOrganizationAdminAPIKeyRequest"
+                            "$ref": "#/definitions/organization.CreateOrganizationAdminAPIKeyRequest"
                         }
                     }
                 ],
@@ -1001,19 +1001,19 @@ const docTemplate = `{
                     "200": {
                         "description": "Successfully created admin API key",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization.OrganizationAdminAPIKeyResponse"
+                            "$ref": "#/definitions/organization.OrganizationAdminAPIKeyResponse"
                         }
                     },
                     "400": {
                         "description": "Bad request - invalid payload",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized - invalid or missing API key",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -1044,19 +1044,19 @@ const docTemplate = `{
                     "200": {
                         "description": "Successfully retrieved the admin API key",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization.OrganizationAdminAPIKeyResponse"
+                            "$ref": "#/definitions/organization.OrganizationAdminAPIKeyResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized - invalid or missing API key",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "404": {
                         "description": "Not Found - API key with the given ID does not exist or does not belong to the organization",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -1085,19 +1085,19 @@ const docTemplate = `{
                     "200": {
                         "description": "Successfully deleted the admin API key",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization.AdminAPIKeyDeletedResponse"
+                            "$ref": "#/definitions/organization.AdminAPIKeyDeletedResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized - invalid or missing API key",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "404": {
                         "description": "Not Found - API key with the given ID does not exist or does not belong to the organization",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -1140,19 +1140,19 @@ const docTemplate = `{
                     "200": {
                         "description": "Successfully retrieved the list of projects",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_projects.ProjectListResponse"
+                            "$ref": "#/definitions/projects.ProjectListResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized - invalid or missing API key",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal Server Error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -1181,7 +1181,7 @@ const docTemplate = `{
                         "in": "body",
                         "required": true,
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_projects.CreateProjectRequest"
+                            "$ref": "#/definitions/projects.CreateProjectRequest"
                         }
                     }
                 ],
@@ -1189,25 +1189,25 @@ const docTemplate = `{
                     "200": {
                         "description": "Successfully created project",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_projects.ProjectResponse"
+                            "$ref": "#/definitions/projects.ProjectResponse"
                         }
                     },
                     "400": {
                         "description": "Bad request - invalid payload",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized - invalid or missing API key",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal Server Error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -1238,19 +1238,19 @@ const docTemplate = `{
                     "200": {
                         "description": "Successfully retrieved the project",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_projects.ProjectResponse"
+                            "$ref": "#/definitions/projects.ProjectResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized - invalid or missing API key",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "404": {
                         "description": "Not Found - project with the given ID does not exist or does not belong to the organization",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -1286,7 +1286,7 @@ const docTemplate = `{
                         "in": "body",
                         "required": true,
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_projects.UpdateProjectRequest"
+                            "$ref": "#/definitions/projects.UpdateProjectRequest"
                         }
                     }
                 ],
@@ -1294,25 +1294,25 @@ const docTemplate = `{
                     "200": {
                         "description": "Successfully updated the project",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_projects.ProjectResponse"
+                            "$ref": "#/definitions/projects.ProjectResponse"
                         }
                     },
                     "400": {
                         "description": "Bad request - invalid payload",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized - invalid or missing API key",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "404": {
                         "description": "Not Found - project with the given ID does not exist",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -1343,19 +1343,19 @@ const docTemplate = `{
                     "200": {
                         "description": "Successfully archived the project",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_projects.ProjectResponse"
+                            "$ref": "#/definitions/projects.ProjectResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized - invalid or missing API key",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "404": {
                         "description": "Not Found - project with the given ID does not exist",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -1392,31 +1392,31 @@ const docTemplate = `{
                     "200": {
                         "description": "API key created successfully",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.GeneralResponse-app_interfaces_http_routes_v1_organization_projects_api_keys_ApiKeyResponse"
+                            "$ref": "#/definitions/apikeys.ApiKeyCreateResponse"
                         }
                     },
                     "400": {
                         "description": "Bad request, e.g., invalid payload or missing IDs",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized, e.g., invalid or missing token",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "404": {
                         "description": "Not Found, e.g., project or organization not found",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -1452,7 +1452,7 @@ const docTemplate = `{
                         "in": "body",
                         "required": true,
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_projects_api_keys.CreateApiKeyRequest"
+                            "$ref": "#/definitions/apikeys.CreateApiKeyRequest"
                         }
                     }
                 ],
@@ -1460,36 +1460,295 @@ const docTemplate = `{
                     "200": {
                         "description": "API key created successfully",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.GeneralResponse-app_interfaces_http_routes_v1_organization_projects_api_keys_ApiKeyResponse"
+                            "$ref": "#/definitions/apikeys.ApiKeyCreateResponse"
                         }
                     },
                     "400": {
                         "description": "Bad request, e.g., invalid payload or missing IDs",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized, e.g., invalid or missing token",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "404": {
                         "description": "Not Found, e.g., project or organization not found",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
             }
         },
+        "/v1/responses": {
+            "post": {
+                "security": [
+                    {
+                        "BearerAuth": []
+                    }
+                ],
+                "description": "Creates a new LLM response for the given input. Supports multiple input types including text, images, files, web search, and more.\n\n**Supported Input Types:**\n- ` + "`" + `text` + "`" + `: Plain text input\n- ` + "`" + `image` + "`" + `: Image input (URL or base64)\n- ` + "`" + `file` + "`" + `: File input by file ID\n- ` + "`" + `web_search` + "`" + `: Web search input\n- ` + "`" + `file_search` + "`" + `: File search input\n- ` + "`" + `streaming` + "`" + `: Streaming input\n- ` + "`" + `function_calls` + "`" + `: Function calls input\n- ` + "`" + `reasoning` + "`" + `: Reasoning input\n\n**Example Request:**\n` + "`" + `` + "`" + `` + "`" + `json\n{\n\"model\": \"gpt-4\",\n\"input\": {\n\"type\": \"text\",\n\"text\": \"Hello, how are you?\"\n},\n\"max_tokens\": 100,\n\"temperature\": 0.7,\n\"stream\": false,\n\"background\": false\n}\n` + "`" + `` + "`" + `` + "`" + `\n\n**Response Format:**\nThe response uses embedded structure where all fields are at the top level:\n- ` + "`" + `jan_status` + "`" + `: Jan API status code (optional)\n- ` + "`" + `id` + "`" + `: Response identifier\n- ` + "`" + `object` + "`" + `: Object type (\"response\")\n- ` + "`" + `created` + "`" + `: Unix timestamp\n- ` + "`" + `model` + "`" + `: Model used\n- ` + "`" + `status` + "`" + `: Response status\n- ` + "`" + `input` + "`" + `: Input data\n- ` + "`" + `output` + "`" + `: Generated output\n\n**Example Response:**\n` + "`" + `` + "`" + `` + "`" + `json\n{\n\"jan_status\": \"000000\",\n\"id\": \"resp_1234567890\",\n\"object\": \"response\",\n\"created\": 1234567890,\n\"model\": \"gpt-4\",\n\"status\": \"completed\",\n\"input\": {\n\"type\": \"text\",\n\"text\": \"Hello, how are you?\"\n},\n\"output\": {\n\"type\": \"text\",\n\"text\": {\n\"value\": \"I'm doing well, thank you!\"\n}\n}\n}\n` + "`" + `` + "`" + `` + "`" + `\n\n**Response Status:**\n- ` + "`" + `completed` + "`" + `: Response generation finished successfully\n- ` + "`" + `processing` + "`" + `: Response is being generated\n- ` + "`" + `failed` + "`" + `: Response generation failed\n- ` + "`" + `cancelled` + "`" + `: Response was cancelled",
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "Jan",
+                    "Jan-Responses"
+                ],
+                "summary": "Create a response",
+                "parameters": [
+                    {
+                        "description": "Request payload containing model, input, and generation parameters",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/responses.CreateResponseRequest"
+                        }
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Successful response with embedded fields"
+                    },
+                    "202": {
+                        "description": "Response accepted for background processing with embedded fields"
+                    },
+                    "400": {
+                        "description": "Invalid request payload"
+                    },
+                    "401": {
+                        "description": "Unauthorized"
+                    },
+                    "422": {
+                        "description": "Validation error"
+                    },
+                    "429": {
+                        "description": "Rate limit exceeded"
+                    },
+                    "500": {
+                        "description": "Internal server error"
+                    }
+                }
+            }
+        },
+        "/v1/responses/{response_id}": {
+            "get": {
+                "security": [
+                    {
+                        "BearerAuth": []
+                    }
+                ],
+                "description": "Retrieves an LLM response by its ID. Returns the complete response object with embedded structure where all fields are at the top level.\n\n**Response Format:**\nThe response uses embedded structure where all fields are at the top level:\n- ` + "`" + `jan_status` + "`" + `: Jan API status code (optional)\n- ` + "`" + `id` + "`" + `: Response identifier\n- ` + "`" + `object` + "`" + `: Object type (\"response\")\n- ` + "`" + `created` + "`" + `: Unix timestamp\n- ` + "`" + `model` + "`" + `: Model used\n- ` + "`" + `status` + "`" + `: Response status\n- ` + "`" + `input` + "`" + `: Input data\n- ` + "`" + `output` + "`" + `: Generated output",
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "Jan",
+                    "Jan-Responses"
+                ],
+                "summary": "Get a response",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Unique identifier of the response",
+                        "name": "response_id",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Successful response with embedded fields"
+                    },
+                    "400": {
+                        "description": "Invalid request"
+                    },
+                    "401": {
+                        "description": "Unauthorized"
+                    },
+                    "404": {
+                        "description": "Response not found"
+                    },
+                    "500": {
+                        "description": "Internal server error"
+                    }
+                }
+            },
+            "delete": {
+                "security": [
+                    {
+                        "BearerAuth": []
+                    }
+                ],
+                "description": "Deletes an LLM response by its ID. Returns the deleted response object with embedded structure where all fields are at the top level.\n\n**Response Format:**\nThe response uses embedded structure where all fields are at the top level:\n- ` + "`" + `jan_status` + "`" + `: Jan API status code (optional)\n- ` + "`" + `id` + "`" + `: Response identifier\n- ` + "`" + `object` + "`" + `: Object type (\"response\")\n- ` + "`" + `created` + "`" + `: Unix timestamp\n- ` + "`" + `model` + "`" + `: Model used\n- ` + "`" + `status` + "`" + `: Response status (will be \"cancelled\")\n- ` + "`" + `input` + "`" + `: Input data\n- ` + "`" + `cancelled_at` + "`" + `: Cancellation timestamp",
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "Jan",
+                    "Jan-Responses"
+                ],
+                "summary": "Delete a response",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Unique identifier of the response",
+                        "name": "response_id",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Successful response with embedded fields"
+                    },
+                    "400": {
+                        "description": "Invalid request"
+                    },
+                    "401": {
+                        "description": "Unauthorized"
+                    },
+                    "404": {
+                        "description": "Response not found"
+                    },
+                    "500": {
+                        "description": "Internal server error"
+                    }
+                }
+            }
+        },
+        "/v1/responses/{response_id}/cancel": {
+            "post": {
+                "security": [
+                    {
+                        "BearerAuth": []
+                    }
+                ],
+                "description": "Cancels a running LLM response that was created with background=true. Only responses that are currently processing can be cancelled.\n\n**Response Format:**\nThe response uses embedded structure where all fields are at the top level:\n- ` + "`" + `jan_status` + "`" + `: Jan API status code (optional)\n- ` + "`" + `id` + "`" + `: Response identifier\n- ` + "`" + `object` + "`" + `: Object type (\"response\")\n- ` + "`" + `created` + "`" + `: Unix timestamp\n- ` + "`" + `model` + "`" + `: Model used\n- ` + "`" + `status` + "`" + `: Response status (will be \"cancelled\")\n- ` + "`" + `input` + "`" + `: Input data\n- ` + "`" + `cancelled_at` + "`" + `: Cancellation timestamp",
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "Jan",
+                    "Jan-Responses"
+                ],
+                "summary": "Cancel a response",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Unique identifier of the response to cancel",
+                        "name": "response_id",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Response cancelled successfully with embedded fields"
+                    },
+                    "400": {
+                        "description": "Invalid request or response cannot be cancelled"
+                    },
+                    "401": {
+                        "description": "Unauthorized"
+                    },
+                    "404": {
+                        "description": "Response not found"
+                    },
+                    "500": {
+                        "description": "Internal server error"
+                    }
+                }
+            }
+        },
+        "/v1/responses/{response_id}/input_items": {
+            "get": {
+                "security": [
+                    {
+                        "BearerAuth": []
+                    }
+                ],
+                "description": "Retrieves a paginated list of input items for a response. Supports cursor-based pagination for efficient retrieval of large datasets.\n\n**Response Format:**\nThe response uses embedded structure where all fields are at the top level:\n- ` + "`" + `jan_status` + "`" + `: Jan API status code (optional)\n- ` + "`" + `first_id` + "`" + `: First item ID for pagination (optional)\n- ` + "`" + `last_id` + "`" + `: Last item ID for pagination (optional)\n- ` + "`" + `has_more` + "`" + `: Whether more items are available (optional)\n- ` + "`" + `id` + "`" + `: Input item identifier\n- ` + "`" + `object` + "`" + `: Object type (\"input_item\")\n- ` + "`" + `created` + "`" + `: Unix timestamp\n- ` + "`" + `type` + "`" + `: Input type\n- ` + "`" + `text` + "`" + `: Text content (for text type)\n- ` + "`" + `image` + "`" + `: Image content (for image type)\n- ` + "`" + `file` + "`" + `: File content (for file type)\n\n**Example Response:**\n` + "`" + `` + "`" + `` + "`" + `json\n{\n\"jan_status\": \"000000\",\n\"first_id\": \"input_123\",\n\"last_id\": \"input_456\",\n\"has_more\": false,\n\"id\": \"input_1234567890\",\n\"object\": \"input_item\",\n\"created\": 1234567890,\n\"type\": \"text\",\n\"text\": \"Hello, world!\"\n}\n` + "`" + `` + "`" + `` + "`" + `",
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "Jan",
+                    "Jan-Responses"
+                ],
+                "summary": "List input items",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Unique identifier of the response",
+                        "name": "response_id",
+                        "in": "path",
+                        "required": true
+                    },
+                    {
+                        "type": "integer",
+                        "description": "Maximum number of items to return (default: 20, max: 100)",
+                        "name": "limit",
+                        "in": "query"
+                    },
+                    {
+                        "type": "string",
+                        "description": "Cursor for pagination - return items after this ID",
+                        "name": "after",
+                        "in": "query"
+                    },
+                    {
+                        "type": "string",
+                        "description": "Cursor for pagination - return items before this ID",
+                        "name": "before",
+                        "in": "query"
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Successful response with paginated input items and embedded fields"
+                    },
+                    "400": {
+                        "description": "Invalid request or pagination parameters"
+                    },
+                    "401": {
+                        "description": "Unauthorized"
+                    },
+                    "404": {
+                        "description": "Response not found"
+                    },
+                    "500": {
+                        "description": "Internal server error"
+                    }
+                }
+            }
+        },
         "/v1/version": {
             "get": {
                 "description": "Returns the current build version of the API server.",
@@ -1515,38 +1774,61 @@ const docTemplate = `{
         }
     },
     "definitions": {
-        "app_interfaces_http_routes_v1.Model": {
+        "apikeys.ApiKeyCreateResponse": {
             "type": "object",
             "properties": {
-                "created": {
-                    "type": "integer"
+                "result": {
+                    "$ref": "#/definitions/apikeys.ApiKeyResponse"
+                },
+                "status": {
+                    "type": "string"
+                }
+            }
+        },
+        "apikeys.ApiKeyResponse": {
+            "type": "object",
+            "properties": {
+                "apikeyType": {
+                    "type": "string"
+                },
+                "description": {
+                    "type": "string"
+                },
+                "enabled": {
+                    "type": "boolean"
+                },
+                "expiresAt": {
+                    "type": "string"
                 },
                 "id": {
                     "type": "string"
                 },
-                "object": {
+                "key": {
                     "type": "string"
                 },
-                "owned_by": {
+                "last_usedAt": {
+                    "type": "string"
+                },
+                "permissions": {
+                    "type": "string"
+                },
+                "plaintextHint": {
                     "type": "string"
                 }
             }
         },
-        "app_interfaces_http_routes_v1.ModelsResponse": {
+        "apikeys.CreateApiKeyRequest": {
             "type": "object",
             "properties": {
-                "data": {
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/app_interfaces_http_routes_v1.Model"
-                    }
+                "description": {
+                    "type": "string"
                 },
-                "object": {
+                "expiresAt": {
                     "type": "string"
                 }
             }
         },
-        "app_interfaces_http_routes_v1_auth.AccessTokenResponse": {
+        "auth.AccessTokenResponse": {
             "type": "object",
             "properties": {
                 "access_token": {
@@ -1560,7 +1842,7 @@ const docTemplate = `{
                 }
             }
         },
-        "app_interfaces_http_routes_v1_auth.GetMeResponse": {
+        "auth.GetMeResponse": {
             "type": "object",
             "properties": {
                 "email": {
@@ -1577,41 +1859,27 @@ const docTemplate = `{
                 }
             }
         },
-        "app_interfaces_http_routes_v1_auth_google.AccessTokenResponse": {
+        "chat.ChatCompletionChoice": {
             "type": "object",
             "properties": {
-                "access_token": {
+                "finish_reason": {
                     "type": "string"
                 },
-                "expires_in": {
+                "index": {
                     "type": "integer"
                 },
-                "object": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_auth_google.GoogleCallbackRequest": {
-            "type": "object",
-            "required": [
-                "code"
-            ],
-            "properties": {
-                "code": {
-                    "type": "string"
-                },
-                "state": {
-                    "type": "string"
+                "message": {
+                    "$ref": "#/definitions/chat.Message"
                 }
             }
         },
-        "app_interfaces_http_routes_v1_chat.ChatCompletionResponseSwagger": {
+        "chat.ChatCompletionResponseSwagger": {
             "type": "object",
             "properties": {
                 "choices": {
                     "type": "array",
                     "items": {
-                        "$ref": "#/definitions/openai.ChatCompletionChoice"
+                        "$ref": "#/definitions/chat.ChatCompletionChoice"
                     }
                 },
                 "created": {
@@ -1627,513 +1895,56 @@ const docTemplate = `{
                     "type": "string"
                 },
                 "usage": {
-                    "$ref": "#/definitions/openai.Usage"
+                    "$ref": "#/definitions/chat.Usage"
                 }
             }
         },
-        "app_interfaces_http_routes_v1_conversations.AnnotationResponse": {
+        "chat.Message": {
             "type": "object",
             "properties": {
-                "end_index": {
-                    "type": "integer"
-                },
-                "file_id": {
+                "content": {
                     "type": "string"
                 },
-                "index": {
-                    "type": "integer"
-                },
-                "start_index": {
-                    "type": "integer"
-                },
-                "text": {
-                    "type": "string"
-                },
-                "type": {
-                    "type": "string"
-                },
-                "url": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_conversations.ContentResponse": {
-            "type": "object",
-            "properties": {
-                "file": {
-                    "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.FileContentResponse"
-                },
-                "image": {
-                    "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.ImageContentResponse"
-                },
-                "input_text": {
-                    "type": "string"
-                },
-                "output_text": {
-                    "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.OutputTextResponse"
-                },
-                "text": {
-                    "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.TextResponse"
-                },
-                "type": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_conversations.ConversationContentRequest": {
-            "type": "object",
-            "required": [
-                "type"
-            ],
-            "properties": {
-                "text": {
-                    "type": "string"
-                },
-                "type": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_conversations.ConversationItemListResponse": {
-            "type": "object",
-            "properties": {
-                "data": {
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.ConversationItemResponse"
-                    }
-                },
-                "first_id": {
-                    "type": "string"
-                },
-                "has_more": {
-                    "type": "boolean"
-                },
-                "last_id": {
-                    "type": "string"
-                },
-                "object": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_conversations.ConversationItemRequest": {
-            "type": "object",
-            "required": [
-                "content",
-                "type"
-            ],
-            "properties": {
-                "content": {
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.ConversationContentRequest"
-                    }
-                },
                 "role": {
-                    "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_domain_conversation.ItemRole"
-                },
-                "type": {
                     "type": "string"
                 }
             }
         },
-        "app_interfaces_http_routes_v1_conversations.ConversationItemResponse": {
+        "chat.PostChatCompletionRequest": {
             "type": "object",
             "properties": {
-                "content": {
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.ContentResponse"
-                    }
-                },
-                "created_at": {
-                    "type": "integer"
-                },
-                "id": {
-                    "type": "string"
-                },
-                "object": {
-                    "type": "string"
-                },
-                "role": {
-                    "type": "string"
-                },
-                "status": {
-                    "type": "string"
-                },
-                "type": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_conversations.ConversationResponse": {
-            "type": "object",
-            "properties": {
-                "created_at": {
-                    "type": "integer"
-                },
-                "id": {
-                    "type": "string"
-                },
-                "metadata": {
-                    "type": "object",
-                    "additionalProperties": {
-                        "type": "string"
-                    }
-                },
-                "object": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_conversations.CreateConversationRequest": {
-            "type": "object",
-            "properties": {
-                "items": {
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.ConversationItemRequest"
-                    }
-                },
-                "metadata": {
-                    "type": "object",
-                    "additionalProperties": {
-                        "type": "string"
-                    }
-                },
-                "title": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_conversations.CreateItemsRequest": {
-            "type": "object",
-            "required": [
-                "items"
-            ],
-            "properties": {
-                "items": {
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.ConversationItemRequest"
-                    }
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_conversations.DeletedConversationResponse": {
-            "type": "object",
-            "properties": {
-                "deleted": {
-                    "type": "boolean"
-                },
-                "id": {
-                    "type": "string"
-                },
-                "object": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_conversations.FileContentResponse": {
-            "type": "object",
-            "properties": {
-                "file_id": {
-                    "type": "string"
-                },
-                "mime_type": {
-                    "type": "string"
-                },
-                "name": {
-                    "type": "string"
-                },
-                "size": {
+                "max_tokens": {
                     "type": "integer"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_conversations.ImageContentResponse": {
-            "type": "object",
-            "properties": {
-                "detail": {
-                    "type": "string"
-                },
-                "file_id": {
-                    "type": "string"
-                },
-                "url": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_conversations.OutputTextResponse": {
-            "type": "object",
-            "properties": {
-                "annotations": {
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.AnnotationResponse"
-                    }
                 },
-                "text": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_conversations.TextResponse": {
-            "type": "object",
-            "properties": {
-                "value": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_conversations.UpdateConversationRequest": {
-            "type": "object",
-            "properties": {
-                "metadata": {
-                    "type": "object",
-                    "additionalProperties": {
-                        "type": "string"
-                    }
-                },
-                "title": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_organization.AdminAPIKeyDeletedResponse": {
-            "type": "object",
-            "properties": {
-                "deleted": {
-                    "type": "boolean"
-                },
-                "id": {
-                    "type": "string"
-                },
-                "object": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_organization.AdminApiKeyListResponse": {
-            "type": "object",
-            "properties": {
-                "data": {
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/app_interfaces_http_routes_v1_organization.OrganizationAdminAPIKeyResponse"
-                    }
-                },
-                "first_id": {
-                    "type": "string"
-                },
-                "has_more": {
-                    "type": "boolean"
-                },
-                "last_id": {
-                    "type": "string"
-                },
-                "object": {
-                    "type": "string",
-                    "example": "list"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_organization.CreateOrganizationAdminAPIKeyRequest": {
-            "type": "object",
-            "required": [
-                "name"
-            ],
-            "properties": {
-                "name": {
-                    "type": "string",
-                    "example": "My Admin API Key"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_organization.OrganizationAdminAPIKeyResponse": {
-            "type": "object",
-            "properties": {
-                "created_at": {
-                    "type": "integer",
-                    "example": 1698765432
-                },
-                "id": {
-                    "type": "string",
-                    "example": "key_1234567890"
-                },
-                "last_used_at": {
-                    "type": "integer",
-                    "example": 1698765432
-                },
-                "name": {
-                    "type": "string",
-                    "example": "My Admin API Key"
-                },
-                "object": {
-                    "type": "string",
-                    "example": "api_key"
-                },
-                "owner": {
-                    "$ref": "#/definitions/app_interfaces_http_routes_v1_organization.Owner"
-                },
-                "redacted_value": {
-                    "type": "string",
-                    "example": "sk-...abcd"
-                },
-                "value": {
-                    "type": "string",
-                    "example": "sk-abcdef1234567890"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_organization.Owner": {
-            "type": "object",
-            "properties": {
-                "created_at": {
-                    "type": "integer",
-                    "example": 1698765432
-                },
-                "id": {
-                    "type": "string",
-                    "example": "user_1234567890"
-                },
-                "name": {
-                    "type": "string",
-                    "example": "John Doe"
-                },
-                "object": {
-                    "type": "string",
-                    "example": "user"
-                },
-                "role": {
-                    "type": "string",
-                    "example": "admin"
-                },
-                "type": {
-                    "type": "string",
-                    "example": "user"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_organization_projects.CreateProjectRequest": {
-            "type": "object",
-            "required": [
-                "name"
-            ],
-            "properties": {
-                "name": {
-                    "type": "string",
-                    "example": "New AI Project"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_organization_projects.ProjectListResponse": {
-            "type": "object",
-            "properties": {
-                "data": {
+                "messages": {
                     "type": "array",
                     "items": {
-                        "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_projects.ProjectResponse"
+                        "$ref": "#/definitions/chat.Message"
                     }
                 },
-                "first_id": {
-                    "type": "string"
-                },
-                "has_more": {
-                    "type": "boolean"
-                },
-                "last_id": {
-                    "type": "string"
-                },
-                "object": {
-                    "type": "string",
-                    "example": "list"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_organization_projects.ProjectResponse": {
-            "type": "object",
-            "properties": {
-                "archived_at": {
-                    "type": "integer",
-                    "example": 1698765432
-                },
-                "created_at": {
-                    "type": "integer",
-                    "example": 1698765432
-                },
-                "id": {
-                    "type": "string",
-                    "example": "proj_1234567890"
-                },
-                "name": {
-                    "type": "string",
-                    "example": "My First Project"
-                },
-                "object": {
-                    "type": "string",
-                    "example": "project"
-                },
-                "status": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_organization_projects.UpdateProjectRequest": {
-            "type": "object",
-            "properties": {
-                "name": {
-                    "type": "string",
-                    "example": "Updated AI Project"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_organization_projects_api_keys.ApiKeyResponse": {
-            "type": "object",
-            "properties": {
-                "apikeyType": {
-                    "type": "string"
-                },
-                "description": {
-                    "type": "string"
-                },
-                "enabled": {
-                    "type": "boolean"
-                },
-                "expiresAt": {
-                    "type": "string"
-                },
-                "id": {
-                    "type": "string"
-                },
-                "key": {
-                    "type": "string"
-                },
-                "last_usedAt": {
-                    "type": "string"
-                },
-                "permissions": {
+                "model": {
                     "type": "string"
                 },
-                "plaintextHint": {
-                    "type": "string"
+                "temperature": {
+                    "type": "number"
                 }
             }
         },
-        "app_interfaces_http_routes_v1_organization_projects_api_keys.CreateApiKeyRequest": {
+        "chat.Usage": {
             "type": "object",
             "properties": {
-                "description": {
-                    "type": "string"
+                "completion_tokens": {
+                    "type": "integer"
                 },
-                "expiresAt": {
-                    "type": "string"
+                "prompt_tokens": {
+                    "type": "integer"
+                },
+                "total_tokens": {
+                    "type": "integer"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_domain_conversation.ItemRole": {
+        "conversation.ItemRole": {
             "type": "string",
             "enum": [
                 "system",
@@ -2146,61 +1957,76 @@ const docTemplate = `{
                 "ItemRoleAssistant"
             ]
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse": {
+        "conversations.AnnotationResponse": {
             "type": "object",
             "properties": {
-                "code": {
+                "end_index": {
+                    "type": "integer"
+                },
+                "file_id": {
                     "type": "string"
                 },
-                "error": {
+                "index": {
+                    "type": "integer"
+                },
+                "start_index": {
+                    "type": "integer"
+                },
+                "text": {
+                    "type": "string"
+                },
+                "type": {
+                    "type": "string"
+                },
+                "url": {
                     "type": "string"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_responses.GeneralResponse-app_interfaces_http_routes_v1_organization_projects_api_keys_ApiKeyResponse": {
+        "conversations.ContentResponse": {
             "type": "object",
             "properties": {
-                "result": {
-                    "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_projects_api_keys.ApiKeyResponse"
+                "file": {
+                    "$ref": "#/definitions/conversations.FileContentResponse"
                 },
-                "status": {
+                "image": {
+                    "$ref": "#/definitions/conversations.ImageContentResponse"
+                },
+                "input_text": {
+                    "type": "string"
+                },
+                "output_text": {
+                    "$ref": "#/definitions/conversations.OutputTextResponse"
+                },
+                "text": {
+                    "$ref": "#/definitions/conversations.TextResponse"
+                },
+                "type": {
                     "type": "string"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.ListResponse-app_interfaces_http_routes_v1_conversations_ConversationItemResponse": {
+        "conversations.ConversationContentRequest": {
             "type": "object",
+            "required": [
+                "type"
+            ],
             "properties": {
-                "data": {
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.ConversationItemResponse"
-                    }
-                },
-                "first_id": {
+                "text": {
                     "type": "string"
                 },
-                "has_more": {
-                    "type": "boolean"
-                },
-                "last_id": {
+                "type": {
                     "type": "string"
-                },
-                "object": {
-                    "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.ObjectTypeList"
-                },
-                "total": {
-                    "type": "integer"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.ListResponse-app_interfaces_http_routes_v1_conversations_ConversationResponse": {
+        "conversations.ConversationItemListResponse": {
             "type": "object",
             "properties": {
                 "data": {
                     "type": "array",
                     "items": {
-                        "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.ConversationResponse"
+                        "$ref": "#/definitions/conversations.ConversationItemResponse"
                     }
                 },
                 "first_id": {
@@ -2213,632 +2039,534 @@ const docTemplate = `{
                     "type": "string"
                 },
                 "object": {
-                    "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.ObjectTypeList"
-                },
-                "total": {
-                    "type": "integer"
+                    "type": "string"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.ObjectTypeList": {
-            "type": "string",
-            "enum": [
-                "list"
-            ],
-            "x-enum-varnames": [
-                "ObjectTypeListList"
-            ]
-        },
-        "openai.ChatCompletionChoice": {
+        "conversations.ConversationItemRequest": {
             "type": "object",
+            "required": [
+                "content",
+                "type"
+            ],
             "properties": {
-                "content_filter_results": {
-                    "$ref": "#/definitions/openai.ContentFilterResults"
-                },
-                "finish_reason": {
-                    "description": "FinishReason\nstop: API returned complete message,\nor a message terminated by one of the stop sequences provided via the stop parameter\nlength: Incomplete model output due to max_tokens parameter or token limit\nfunction_call: The model decided to call a function\ncontent_filter: Omitted content due to a flag from our content filters\nnull: API response still in progress or incomplete",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/openai.FinishReason"
-                        }
-                    ]
-                },
-                "index": {
-                    "type": "integer"
+                "content": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/conversations.ConversationContentRequest"
+                    }
                 },
-                "logprobs": {
-                    "$ref": "#/definitions/openai.LogProbs"
+                "role": {
+                    "$ref": "#/definitions/conversation.ItemRole"
                 },
-                "message": {
-                    "$ref": "#/definitions/openai.ChatCompletionMessage"
+                "type": {
+                    "type": "string"
                 }
             }
         },
-        "openai.ChatCompletionMessage": {
+        "conversations.ConversationItemResponse": {
             "type": "object",
             "properties": {
                 "content": {
-                    "type": "string"
-                },
-                "function_call": {
-                    "$ref": "#/definitions/openai.FunctionCall"
-                },
-                "multiContent": {
                     "type": "array",
                     "items": {
-                        "$ref": "#/definitions/openai.ChatMessagePart"
+                        "$ref": "#/definitions/conversations.ContentResponse"
                     }
                 },
-                "name": {
-                    "description": "This property isn't in the official documentation, but it's in\nthe documentation for the official library for python:\n- https://github.com/openai/openai-python/blob/main/chatml.md\n- https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb",
-                    "type": "string"
+                "created_at": {
+                    "type": "integer"
                 },
-                "reasoning_content": {
-                    "description": "This property is used for the \"reasoning\" feature supported by deepseek-reasoner\nwhich is not in the official documentation.\nthe doc from deepseek:\n- https://api-docs.deepseek.com/api/create-chat-completion#responses",
+                "id": {
                     "type": "string"
                 },
-                "refusal": {
+                "object": {
                     "type": "string"
                 },
                 "role": {
                     "type": "string"
                 },
-                "tool_call_id": {
-                    "description": "For Role=tool prompts this should be set to the ID given in the assistant's prior request to call a tool.",
+                "status": {
                     "type": "string"
                 },
-                "tool_calls": {
-                    "description": "For Role=assistant prompts this may be set to the tool calls generated by the model, such as function calls.",
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/openai.ToolCall"
-                    }
+                "type": {
+                    "type": "string"
                 }
             }
         },
-        "openai.ChatCompletionRequest": {
+        "conversations.ConversationResponse": {
             "type": "object",
             "properties": {
-                "chat_template_kwargs": {
-                    "description": "ChatTemplateKwargs provides a way to add non-standard parameters to the request body.\nAdditional kwargs to pass to the template renderer. Will be accessible by the chat template.\nSuch as think mode for qwen3. \"chat_template_kwargs\": {\"enable_thinking\": false}\nhttps://qwen.readthedocs.io/en/latest/deployment/vllm.html#thinking-non-thinking-modes",
-                    "type": "object",
-                    "additionalProperties": {}
-                },
-                "frequency_penalty": {
-                    "type": "number"
-                },
-                "function_call": {
-                    "description": "Deprecated: use ToolChoice instead."
-                },
-                "functions": {
-                    "description": "Deprecated: use Tools instead.",
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/openai.FunctionDefinition"
-                    }
+                "created_at": {
+                    "type": "integer"
                 },
-                "guided_choice": {
-                    "description": "GuidedChoice is a vLLM-specific extension that restricts the model's output\nto one of the predefined string choices provided in this field. This feature\nis used to constrain the model's responses to a controlled set of options,\nensuring predictable and consistent outputs in scenarios where specific\nchoices are required.",
-                    "type": "array",
-                    "items": {
-                        "type": "string"
-                    }
+                "id": {
+                    "type": "string"
                 },
-                "logit_bias": {
-                    "description": "LogitBias is must be a token id string (specified by their token ID in the tokenizer), not a word string.\nincorrect: ` + "`" + `\"logit_bias\":{\"You\": 6}` + "`" + `, correct: ` + "`" + `\"logit_bias\":{\"1639\": 6}` + "`" + `\nrefs: https://platform.openai.com/docs/api-reference/chat/create#chat/create-logit_bias",
+                "metadata": {
                     "type": "object",
                     "additionalProperties": {
-                        "type": "integer"
+                        "type": "string"
                     }
                 },
-                "logprobs": {
-                    "description": "LogProbs indicates whether to return log probabilities of the output tokens or not.\nIf true, returns the log probabilities of each output token returned in the content of message.\nThis option is currently not available on the gpt-4-vision-preview model.",
-                    "type": "boolean"
-                },
-                "max_completion_tokens": {
-                    "description": "MaxCompletionTokens An upper bound for the number of tokens that can be generated for a completion,\nincluding visible output tokens and reasoning tokens https://platform.openai.com/docs/guides/reasoning",
-                    "type": "integer"
-                },
-                "max_tokens": {
-                    "description": "MaxTokens The maximum number of tokens that can be generated in the chat completion.\nThis value can be used to control costs for text generated via API.\nDeprecated: use MaxCompletionTokens. Not compatible with o1-series models.\nrefs: https://platform.openai.com/docs/api-reference/chat/create#chat-create-max_tokens",
-                    "type": "integer"
-                },
-                "messages": {
+                "object": {
+                    "type": "string"
+                }
+            }
+        },
+        "conversations.CreateConversationRequest": {
+            "type": "object",
+            "properties": {
+                "items": {
                     "type": "array",
                     "items": {
-                        "$ref": "#/definitions/openai.ChatCompletionMessage"
+                        "$ref": "#/definitions/conversations.ConversationItemRequest"
                     }
                 },
                 "metadata": {
-                    "description": "Metadata to store with the completion.",
                     "type": "object",
                     "additionalProperties": {
                         "type": "string"
                     }
                 },
-                "model": {
-                    "type": "string"
-                },
-                "n": {
-                    "type": "integer"
-                },
-                "parallel_tool_calls": {
-                    "description": "Disable the default behavior of parallel tool calls by setting it: false."
-                },
-                "prediction": {
-                    "description": "Configuration for a predicted output.",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/openai.Prediction"
-                        }
-                    ]
-                },
-                "presence_penalty": {
-                    "type": "number"
-                },
-                "reasoning_effort": {
-                    "description": "Controls effort on reasoning for reasoning models. It can be set to \"low\", \"medium\", or \"high\".",
-                    "type": "string"
-                },
-                "response_format": {
-                    "$ref": "#/definitions/openai.ChatCompletionResponseFormat"
-                },
-                "safety_identifier": {
-                    "description": "A stable identifier used to help detect users of your application that may be violating OpenAI's usage policies.\nThe IDs should be a string that uniquely identifies each user.\nWe recommend hashing their username or email address, in order to avoid sending us any identifying information.\nhttps://platform.openai.com/docs/api-reference/chat/create#chat_create-safety_identifier",
+                "title": {
                     "type": "string"
-                },
-                "seed": {
-                    "type": "integer"
-                },
-                "service_tier": {
-                    "description": "Specifies the latency tier to use for processing the request.",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/openai.ServiceTier"
-                        }
-                    ]
-                },
-                "stop": {
-                    "type": "array",
-                    "items": {
-                        "type": "string"
-                    }
-                },
-                "store": {
-                    "description": "Store can be set to true to store the output of this completion request for use in distillations and evals.\nhttps://platform.openai.com/docs/api-reference/chat/create#chat-create-store",
-                    "type": "boolean"
-                },
-                "stream": {
-                    "type": "boolean"
-                },
-                "stream_options": {
-                    "description": "Options for streaming response. Only set this when you set stream: true.",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/openai.StreamOptions"
-                        }
-                    ]
-                },
-                "temperature": {
-                    "type": "number"
-                },
-                "tool_choice": {
-                    "description": "This can be either a string or an ToolChoice object."
-                },
-                "tools": {
+                }
+            }
+        },
+        "conversations.CreateItemsRequest": {
+            "type": "object",
+            "required": [
+                "items"
+            ],
+            "properties": {
+                "items": {
                     "type": "array",
                     "items": {
-                        "$ref": "#/definitions/openai.Tool"
+                        "$ref": "#/definitions/conversations.ConversationItemRequest"
                     }
-                },
-                "top_logprobs": {
-                    "description": "TopLogProbs is an integer between 0 and 5 specifying the number of most likely tokens to return at each\ntoken position, each with an associated log probability.\nlogprobs must be set to true if this parameter is used.",
-                    "type": "integer"
-                },
-                "top_p": {
-                    "type": "number"
-                },
-                "user": {
-                    "type": "string"
                 }
             }
         },
-        "openai.ChatCompletionResponseFormat": {
+        "conversations.DeletedConversationResponse": {
             "type": "object",
             "properties": {
-                "json_schema": {
-                    "$ref": "#/definitions/openai.ChatCompletionResponseFormatJSONSchema"
+                "deleted": {
+                    "type": "boolean"
                 },
-                "type": {
-                    "$ref": "#/definitions/openai.ChatCompletionResponseFormatType"
+                "id": {
+                    "type": "string"
+                },
+                "object": {
+                    "type": "string"
                 }
             }
         },
-        "openai.ChatCompletionResponseFormatJSONSchema": {
+        "conversations.FileContentResponse": {
             "type": "object",
             "properties": {
-                "description": {
+                "file_id": {
+                    "type": "string"
+                },
+                "mime_type": {
                     "type": "string"
                 },
                 "name": {
                     "type": "string"
                 },
-                "schema": {},
-                "strict": {
-                    "type": "boolean"
+                "size": {
+                    "type": "integer"
                 }
             }
         },
-        "openai.ChatCompletionResponseFormatType": {
-            "type": "string",
-            "enum": [
-                "json_object",
-                "json_schema",
-                "text"
-            ],
-            "x-enum-varnames": [
-                "ChatCompletionResponseFormatTypeJSONObject",
-                "ChatCompletionResponseFormatTypeJSONSchema",
-                "ChatCompletionResponseFormatTypeText"
-            ]
-        },
-        "openai.ChatMessageImageURL": {
+        "conversations.ImageContentResponse": {
             "type": "object",
             "properties": {
                 "detail": {
-                    "$ref": "#/definitions/openai.ImageURLDetail"
+                    "type": "string"
+                },
+                "file_id": {
+                    "type": "string"
                 },
                 "url": {
                     "type": "string"
                 }
             }
         },
-        "openai.ChatMessagePart": {
+        "conversations.ListResponse-conversations_ConversationItemResponse": {
             "type": "object",
             "properties": {
-                "image_url": {
-                    "$ref": "#/definitions/openai.ChatMessageImageURL"
+                "data": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/conversations.ConversationItemResponse"
+                    }
                 },
-                "text": {
+                "first_id": {
                     "type": "string"
                 },
-                "type": {
-                    "$ref": "#/definitions/openai.ChatMessagePartType"
+                "has_more": {
+                    "type": "boolean"
+                },
+                "last_id": {
+                    "type": "string"
+                },
+                "object": {
+                    "type": "string"
+                },
+                "total": {
+                    "type": "integer"
                 }
             }
         },
-        "openai.ChatMessagePartType": {
-            "type": "string",
-            "enum": [
-                "text",
-                "image_url"
-            ],
-            "x-enum-varnames": [
-                "ChatMessagePartTypeText",
-                "ChatMessagePartTypeImageURL"
-            ]
-        },
-        "openai.CompletionTokensDetails": {
+        "conversations.ListResponse-conversations_ConversationResponse": {
             "type": "object",
             "properties": {
-                "accepted_prediction_tokens": {
-                    "type": "integer"
+                "data": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/conversations.ConversationResponse"
+                    }
+                },
+                "first_id": {
+                    "type": "string"
+                },
+                "has_more": {
+                    "type": "boolean"
                 },
-                "audio_tokens": {
-                    "type": "integer"
+                "last_id": {
+                    "type": "string"
                 },
-                "reasoning_tokens": {
-                    "type": "integer"
+                "object": {
+                    "type": "string"
                 },
-                "rejected_prediction_tokens": {
+                "total": {
                     "type": "integer"
                 }
             }
         },
-        "openai.ContentFilterResults": {
+        "conversations.OutputTextResponse": {
             "type": "object",
             "properties": {
-                "hate": {
-                    "$ref": "#/definitions/openai.Hate"
-                },
-                "jailbreak": {
-                    "$ref": "#/definitions/openai.JailBreak"
-                },
-                "profanity": {
-                    "$ref": "#/definitions/openai.Profanity"
-                },
-                "self_harm": {
-                    "$ref": "#/definitions/openai.SelfHarm"
-                },
-                "sexual": {
-                    "$ref": "#/definitions/openai.Sexual"
+                "annotations": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/conversations.AnnotationResponse"
+                    }
                 },
-                "violence": {
-                    "$ref": "#/definitions/openai.Violence"
+                "text": {
+                    "type": "string"
                 }
             }
         },
-        "openai.FinishReason": {
-            "type": "string",
-            "enum": [
-                "stop",
-                "length",
-                "function_call",
-                "tool_calls",
-                "content_filter",
-                "null"
-            ],
-            "x-enum-varnames": [
-                "FinishReasonStop",
-                "FinishReasonLength",
-                "FinishReasonFunctionCall",
-                "FinishReasonToolCalls",
-                "FinishReasonContentFilter",
-                "FinishReasonNull"
-            ]
-        },
-        "openai.FunctionCall": {
+        "conversations.TextResponse": {
             "type": "object",
             "properties": {
-                "arguments": {
-                    "description": "call function with arguments in JSON format",
+                "value": {
                     "type": "string"
+                }
+            }
+        },
+        "conversations.UpdateConversationRequest": {
+            "type": "object",
+            "properties": {
+                "metadata": {
+                    "type": "object",
+                    "additionalProperties": {
+                        "type": "string"
+                    }
                 },
-                "name": {
+                "title": {
                     "type": "string"
                 }
             }
         },
-        "openai.FunctionDefinition": {
+        "google.AccessTokenResponse": {
             "type": "object",
             "properties": {
-                "description": {
-                    "type": "string"
-                },
-                "name": {
+                "access_token": {
                     "type": "string"
                 },
-                "parameters": {
-                    "description": "Parameters is an object describing the function.\nYou can pass json.RawMessage to describe the schema,\nor you can pass in a struct which serializes to the proper JSON schema.\nThe jsonschema package is provided for convenience, but you should\nconsider another specialized library if you require more complex schemas."
+                "expires_in": {
+                    "type": "integer"
                 },
-                "strict": {
-                    "type": "boolean"
+                "object": {
+                    "type": "string"
                 }
             }
         },
-        "openai.Hate": {
+        "google.GoogleCallbackRequest": {
             "type": "object",
+            "required": [
+                "code"
+            ],
             "properties": {
-                "filtered": {
-                    "type": "boolean"
+                "code": {
+                    "type": "string"
                 },
-                "severity": {
+                "state": {
                     "type": "string"
                 }
             }
         },
-        "openai.ImageURLDetail": {
-            "type": "string",
-            "enum": [
-                "high",
-                "low",
-                "auto"
-            ],
-            "x-enum-varnames": [
-                "ImageURLDetailHigh",
-                "ImageURLDetailLow",
-                "ImageURLDetailAuto"
-            ]
-        },
-        "openai.JailBreak": {
+        "organization.AdminAPIKeyDeletedResponse": {
             "type": "object",
             "properties": {
-                "detected": {
+                "deleted": {
                     "type": "boolean"
                 },
-                "filtered": {
-                    "type": "boolean"
+                "id": {
+                    "type": "string"
+                },
+                "object": {
+                    "type": "string"
                 }
             }
         },
-        "openai.LogProb": {
+        "organization.AdminApiKeyListResponse": {
             "type": "object",
             "properties": {
-                "bytes": {
-                    "description": "Omitting the field if it is null",
+                "data": {
                     "type": "array",
                     "items": {
-                        "type": "integer"
+                        "$ref": "#/definitions/organization.OrganizationAdminAPIKeyResponse"
                     }
                 },
-                "logprob": {
-                    "type": "number"
+                "first_id": {
+                    "type": "string"
+                },
+                "has_more": {
+                    "type": "boolean"
                 },
-                "token": {
+                "last_id": {
                     "type": "string"
                 },
-                "top_logprobs": {
-                    "description": "TopLogProbs is a list of the most likely tokens and their log probability, at this token position.\nIn rare cases, there may be fewer than the number of requested top_logprobs returned.",
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/openai.TopLogProbs"
-                    }
+                "object": {
+                    "type": "string",
+                    "example": "list"
                 }
             }
         },
-        "openai.LogProbs": {
+        "organization.CreateOrganizationAdminAPIKeyRequest": {
             "type": "object",
+            "required": [
+                "name"
+            ],
             "properties": {
-                "content": {
-                    "description": "Content is a list of message content tokens with log probability information.",
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/openai.LogProb"
-                    }
+                "name": {
+                    "type": "string",
+                    "example": "My Admin API Key"
                 }
             }
         },
-        "openai.Prediction": {
+        "organization.OrganizationAdminAPIKeyResponse": {
             "type": "object",
             "properties": {
-                "content": {
-                    "type": "string"
+                "created_at": {
+                    "type": "integer",
+                    "example": 1698765432
                 },
-                "type": {
-                    "type": "string"
+                "id": {
+                    "type": "string",
+                    "example": "key_1234567890"
+                },
+                "last_used_at": {
+                    "type": "integer",
+                    "example": 1698765432
+                },
+                "name": {
+                    "type": "string",
+                    "example": "My Admin API Key"
+                },
+                "object": {
+                    "type": "string",
+                    "example": "api_key"
+                },
+                "owner": {
+                    "$ref": "#/definitions/organization.Owner"
+                },
+                "redacted_value": {
+                    "type": "string",
+                    "example": "sk-...abcd"
+                },
+                "value": {
+                    "type": "string",
+                    "example": "sk-abcdef1234567890"
                 }
             }
         },
-        "openai.Profanity": {
+        "organization.Owner": {
             "type": "object",
             "properties": {
-                "detected": {
-                    "type": "boolean"
+                "created_at": {
+                    "type": "integer",
+                    "example": 1698765432
                 },
-                "filtered": {
-                    "type": "boolean"
+                "id": {
+                    "type": "string",
+                    "example": "user_1234567890"
+                },
+                "name": {
+                    "type": "string",
+                    "example": "John Doe"
+                },
+                "object": {
+                    "type": "string",
+                    "example": "user"
+                },
+                "role": {
+                    "type": "string",
+                    "example": "admin"
+                },
+                "type": {
+                    "type": "string",
+                    "example": "user"
                 }
             }
         },
-        "openai.PromptTokensDetails": {
+        "projects.CreateProjectRequest": {
             "type": "object",
+            "required": [
+                "name"
+            ],
             "properties": {
-                "audio_tokens": {
-                    "type": "integer"
-                },
-                "cached_tokens": {
-                    "type": "integer"
+                "name": {
+                    "type": "string",
+                    "example": "New AI Project"
                 }
             }
         },
-        "openai.SelfHarm": {
+        "projects.ProjectListResponse": {
             "type": "object",
             "properties": {
-                "filtered": {
+                "data": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/projects.ProjectResponse"
+                    }
+                },
+                "first_id": {
+                    "type": "string"
+                },
+                "has_more": {
                     "type": "boolean"
                 },
-                "severity": {
+                "last_id": {
                     "type": "string"
+                },
+                "object": {
+                    "type": "string",
+                    "example": "list"
                 }
             }
         },
-        "openai.ServiceTier": {
-            "type": "string",
-            "enum": [
-                "auto",
-                "default",
-                "flex",
-                "priority"
-            ],
-            "x-enum-varnames": [
-                "ServiceTierAuto",
-                "ServiceTierDefault",
-                "ServiceTierFlex",
-                "ServiceTierPriority"
-            ]
-        },
-        "openai.Sexual": {
+        "projects.ProjectResponse": {
             "type": "object",
             "properties": {
-                "filtered": {
-                    "type": "boolean"
+                "archived_at": {
+                    "type": "integer",
+                    "example": 1698765432
+                },
+                "created_at": {
+                    "type": "integer",
+                    "example": 1698765432
+                },
+                "id": {
+                    "type": "string",
+                    "example": "proj_1234567890"
+                },
+                "name": {
+                    "type": "string",
+                    "example": "My First Project"
                 },
-                "severity": {
+                "object": {
+                    "type": "string",
+                    "example": "project"
+                },
+                "status": {
                     "type": "string"
                 }
             }
         },
-        "openai.StreamOptions": {
+        "projects.UpdateProjectRequest": {
             "type": "object",
             "properties": {
-                "include_usage": {
-                    "description": "If set, an additional chunk will be streamed before the data: [DONE] message.\nThe usage field on this chunk shows the token usage statistics for the entire request,\nand the choices field will always be an empty array.\nAll other chunks will also include a usage field, but with a null value.",
-                    "type": "boolean"
+                "name": {
+                    "type": "string",
+                    "example": "Updated AI Project"
                 }
             }
         },
-        "openai.Tool": {
+        "responses.CreateResponseRequest": {
             "type": "object",
+            "required": [
+                "input",
+                "model"
+            ],
             "properties": {
-                "function": {
-                    "$ref": "#/definitions/openai.FunctionDefinition"
+                "generation": {
+                    "type": "object",
+                    "additionalProperties": true
                 },
-                "type": {
-                    "$ref": "#/definitions/openai.ToolType"
-                }
-            }
-        },
-        "openai.ToolCall": {
-            "type": "object",
-            "properties": {
-                "function": {
-                    "$ref": "#/definitions/openai.FunctionCall"
+                "input": {
+                    "type": "object",
+                    "additionalProperties": true
                 },
-                "id": {
-                    "type": "string"
+                "max_tokens": {
+                    "type": "integer",
+                    "example": 1000
                 },
-                "index": {
-                    "description": "Index is not nil only in chat completion chunk object",
-                    "type": "integer"
+                "model": {
+                    "type": "string",
+                    "example": "gpt-4"
                 },
-                "type": {
-                    "$ref": "#/definitions/openai.ToolType"
+                "stream": {
+                    "type": "boolean",
+                    "example": false
+                },
+                "temperature": {
+                    "type": "number",
+                    "example": 0.7
                 }
             }
         },
-        "openai.ToolType": {
-            "type": "string",
-            "enum": [
-                "function"
-            ],
-            "x-enum-varnames": [
-                "ToolTypeFunction"
-            ]
-        },
-        "openai.TopLogProbs": {
+        "responses.ErrorResponse": {
             "type": "object",
             "properties": {
-                "bytes": {
-                    "type": "array",
-                    "items": {
-                        "type": "integer"
-                    }
-                },
-                "logprob": {
-                    "type": "number"
+                "code": {
+                    "type": "string"
                 },
-                "token": {
+                "error": {
                     "type": "string"
                 }
             }
         },
-        "openai.Usage": {
+        "v1.Model": {
             "type": "object",
             "properties": {
-                "completion_tokens": {
+                "created": {
                     "type": "integer"
                 },
-                "completion_tokens_details": {
-                    "$ref": "#/definitions/openai.CompletionTokensDetails"
-                },
-                "prompt_tokens": {
-                    "type": "integer"
+                "id": {
+                    "type": "string"
                 },
-                "prompt_tokens_details": {
-                    "$ref": "#/definitions/openai.PromptTokensDetails"
+                "object": {
+                    "type": "string"
                 },
-                "total_tokens": {
-                    "type": "integer"
+                "owned_by": {
+                    "type": "string"
                 }
             }
         },
-        "openai.Violence": {
+        "v1.ModelsResponse": {
             "type": "object",
             "properties": {
-                "filtered": {
-                    "type": "boolean"
+                "data": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/v1.Model"
+                    }
                 },
-                "severity": {
+                "object": {
                     "type": "string"
                 }
             }
diff --git a/apps/jan-api-gateway/application/docs/swagger.json b/apps/jan-api-gateway/application/docs/swagger.json
index e0b0dd85..c1ccbb90 100644
--- a/apps/jan-api-gateway/application/docs/swagger.json
+++ b/apps/jan-api-gateway/application/docs/swagger.json
@@ -28,7 +28,7 @@
                         "in": "body",
                         "required": true,
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_auth_google.GoogleCallbackRequest"
+                            "$ref": "#/definitions/google.GoogleCallbackRequest"
                         }
                     }
                 ],
@@ -36,25 +36,25 @@
                     "200": {
                         "description": "Successfully authenticated and returned tokens",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_auth_google.AccessTokenResponse"
+                            "$ref": "#/definitions/google.AccessTokenResponse"
                         }
                     },
                     "400": {
                         "description": "Bad request (e.g., invalid state, missing code, or invalid claims)",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized (e.g., a user claim is not found or is invalid in the context)",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal Server Error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -74,7 +74,7 @@
                     "500": {
                         "description": "Internal Server Error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -94,19 +94,19 @@
                     "200": {
                         "description": "Successfully refreshed the access token",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_auth.AccessTokenResponse"
+                            "$ref": "#/definitions/auth.AccessTokenResponse"
                         }
                     },
                     "400": {
                         "description": "Bad Request (e.g., invalid refresh token)",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized (e.g., expired or missing refresh token)",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -132,13 +132,13 @@
                     "400": {
                         "description": "Bad Request (e.g., invalid refresh token)",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized (e.g., expired or missing refresh token)",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -163,13 +163,13 @@
                     "200": {
                         "description": "Successfully retrieved user profile",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_auth.GetMeResponse"
+                            "$ref": "#/definitions/auth.GetMeResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized (e.g., missing or invalid JWT)",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -192,19 +192,19 @@
                     "200": {
                         "description": "Successfully refreshed the access token",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_auth.AccessTokenResponse"
+                            "$ref": "#/definitions/auth.AccessTokenResponse"
                         }
                     },
                     "400": {
                         "description": "Bad Request (e.g., invalid refresh token)",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized (e.g., expired or missing refresh token)",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -235,7 +235,7 @@
                         "in": "body",
                         "required": true,
                         "schema": {
-                            "$ref": "#/definitions/openai.ChatCompletionRequest"
+                            "$ref": "#/definitions/chat.PostChatCompletionRequest"
                         }
                     }
                 ],
@@ -243,25 +243,25 @@
                     "200": {
                         "description": "Successful response",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_chat.ChatCompletionResponseSwagger"
+                            "$ref": "#/definitions/chat.ChatCompletionResponseSwagger"
                         }
                     },
                     "400": {
                         "description": "Invalid request payload",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -304,25 +304,25 @@
                     "200": {
                         "description": "Successfully retrieved the list of conversations",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.ListResponse-app_interfaces_http_routes_v1_conversations_ConversationResponse"
+                            "$ref": "#/definitions/conversations.ListResponse-conversations_ConversationResponse"
                         }
                     },
                     "400": {
                         "description": "Bad Request - Invalid pagination parameters",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized - invalid or missing API key",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal Server Error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -351,7 +351,7 @@
                         "in": "body",
                         "required": true,
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.CreateConversationRequest"
+                            "$ref": "#/definitions/conversations.CreateConversationRequest"
                         }
                     }
                 ],
@@ -359,25 +359,25 @@
                     "200": {
                         "description": "Created conversation",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.ConversationResponse"
+                            "$ref": "#/definitions/conversations.ConversationResponse"
                         }
                     },
                     "400": {
                         "description": "Invalid request",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -411,31 +411,31 @@
                     "200": {
                         "description": "Conversation details",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.ConversationResponse"
+                            "$ref": "#/definitions/conversations.ConversationResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "403": {
                         "description": "Access denied",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "404": {
                         "description": "Conversation not found",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -467,31 +467,31 @@
                     "200": {
                         "description": "Deleted conversation",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.DeletedConversationResponse"
+                            "$ref": "#/definitions/conversations.DeletedConversationResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "403": {
                         "description": "Access denied",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "404": {
                         "description": "Conversation not found",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -527,7 +527,7 @@
                         "in": "body",
                         "required": true,
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.UpdateConversationRequest"
+                            "$ref": "#/definitions/conversations.UpdateConversationRequest"
                         }
                     }
                 ],
@@ -535,37 +535,37 @@
                     "200": {
                         "description": "Updated conversation",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.ConversationResponse"
+                            "$ref": "#/definitions/conversations.ConversationResponse"
                         }
                     },
                     "400": {
                         "description": "Invalid request",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "403": {
                         "description": "Access denied",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "404": {
                         "description": "Conversation not found",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -617,31 +617,31 @@
                     "200": {
                         "description": "List of items",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.ConversationItemListResponse"
+                            "$ref": "#/definitions/conversations.ConversationItemListResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "403": {
                         "description": "Access denied",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "404": {
                         "description": "Conversation not found",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -677,7 +677,7 @@
                         "in": "body",
                         "required": true,
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.CreateItemsRequest"
+                            "$ref": "#/definitions/conversations.CreateItemsRequest"
                         }
                     }
                 ],
@@ -685,37 +685,37 @@
                     "200": {
                         "description": "Created items",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.ListResponse-app_interfaces_http_routes_v1_conversations_ConversationItemResponse"
+                            "$ref": "#/definitions/conversations.ListResponse-conversations_ConversationItemResponse"
                         }
                     },
                     "400": {
                         "description": "Invalid request",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "403": {
                         "description": "Access denied",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "404": {
                         "description": "Conversation not found",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -756,31 +756,31 @@
                     "200": {
                         "description": "Item details",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.ConversationItemResponse"
+                            "$ref": "#/definitions/conversations.ConversationItemResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "403": {
                         "description": "Access denied",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "404": {
                         "description": "Conversation not found",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -819,31 +819,31 @@
                     "200": {
                         "description": "Updated conversation",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.ConversationResponse"
+                            "$ref": "#/definitions/conversations.ConversationResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "403": {
                         "description": "Access denied",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "404": {
                         "description": "Conversation not found",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -908,7 +908,7 @@
                     "200": {
                         "description": "Successful response",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1.ModelsResponse"
+                            "$ref": "#/definitions/v1.ModelsResponse"
                         }
                     }
                 }
@@ -945,19 +945,19 @@
                     "200": {
                         "description": "Successfully retrieved the list of admin API keys",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization.AdminApiKeyListResponse"
+                            "$ref": "#/definitions/organization.AdminApiKeyListResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized - invalid or missing API key",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal Server Error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -986,7 +986,7 @@
                         "in": "body",
                         "required": true,
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization.CreateOrganizationAdminAPIKeyRequest"
+                            "$ref": "#/definitions/organization.CreateOrganizationAdminAPIKeyRequest"
                         }
                     }
                 ],
@@ -994,19 +994,19 @@
                     "200": {
                         "description": "Successfully created admin API key",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization.OrganizationAdminAPIKeyResponse"
+                            "$ref": "#/definitions/organization.OrganizationAdminAPIKeyResponse"
                         }
                     },
                     "400": {
                         "description": "Bad request - invalid payload",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized - invalid or missing API key",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -1037,19 +1037,19 @@
                     "200": {
                         "description": "Successfully retrieved the admin API key",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization.OrganizationAdminAPIKeyResponse"
+                            "$ref": "#/definitions/organization.OrganizationAdminAPIKeyResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized - invalid or missing API key",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "404": {
                         "description": "Not Found - API key with the given ID does not exist or does not belong to the organization",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -1078,19 +1078,19 @@
                     "200": {
                         "description": "Successfully deleted the admin API key",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization.AdminAPIKeyDeletedResponse"
+                            "$ref": "#/definitions/organization.AdminAPIKeyDeletedResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized - invalid or missing API key",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "404": {
                         "description": "Not Found - API key with the given ID does not exist or does not belong to the organization",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -1133,19 +1133,19 @@
                     "200": {
                         "description": "Successfully retrieved the list of projects",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_projects.ProjectListResponse"
+                            "$ref": "#/definitions/projects.ProjectListResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized - invalid or missing API key",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal Server Error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -1174,7 +1174,7 @@
                         "in": "body",
                         "required": true,
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_projects.CreateProjectRequest"
+                            "$ref": "#/definitions/projects.CreateProjectRequest"
                         }
                     }
                 ],
@@ -1182,25 +1182,25 @@
                     "200": {
                         "description": "Successfully created project",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_projects.ProjectResponse"
+                            "$ref": "#/definitions/projects.ProjectResponse"
                         }
                     },
                     "400": {
                         "description": "Bad request - invalid payload",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized - invalid or missing API key",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal Server Error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -1231,19 +1231,19 @@
                     "200": {
                         "description": "Successfully retrieved the project",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_projects.ProjectResponse"
+                            "$ref": "#/definitions/projects.ProjectResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized - invalid or missing API key",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "404": {
                         "description": "Not Found - project with the given ID does not exist or does not belong to the organization",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -1279,7 +1279,7 @@
                         "in": "body",
                         "required": true,
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_projects.UpdateProjectRequest"
+                            "$ref": "#/definitions/projects.UpdateProjectRequest"
                         }
                     }
                 ],
@@ -1287,25 +1287,25 @@
                     "200": {
                         "description": "Successfully updated the project",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_projects.ProjectResponse"
+                            "$ref": "#/definitions/projects.ProjectResponse"
                         }
                     },
                     "400": {
                         "description": "Bad request - invalid payload",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized - invalid or missing API key",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "404": {
                         "description": "Not Found - project with the given ID does not exist",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -1336,19 +1336,19 @@
                     "200": {
                         "description": "Successfully archived the project",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_projects.ProjectResponse"
+                            "$ref": "#/definitions/projects.ProjectResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized - invalid or missing API key",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "404": {
                         "description": "Not Found - project with the given ID does not exist",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -1385,31 +1385,31 @@
                     "200": {
                         "description": "API key created successfully",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.GeneralResponse-app_interfaces_http_routes_v1_organization_projects_api_keys_ApiKeyResponse"
+                            "$ref": "#/definitions/apikeys.ApiKeyCreateResponse"
                         }
                     },
                     "400": {
                         "description": "Bad request, e.g., invalid payload or missing IDs",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized, e.g., invalid or missing token",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "404": {
                         "description": "Not Found, e.g., project or organization not found",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -1445,7 +1445,7 @@
                         "in": "body",
                         "required": true,
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_projects_api_keys.CreateApiKeyRequest"
+                            "$ref": "#/definitions/apikeys.CreateApiKeyRequest"
                         }
                     }
                 ],
@@ -1453,36 +1453,295 @@
                     "200": {
                         "description": "API key created successfully",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.GeneralResponse-app_interfaces_http_routes_v1_organization_projects_api_keys_ApiKeyResponse"
+                            "$ref": "#/definitions/apikeys.ApiKeyCreateResponse"
                         }
                     },
                     "400": {
                         "description": "Bad request, e.g., invalid payload or missing IDs",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized, e.g., invalid or missing token",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "404": {
                         "description": "Not Found, e.g., project or organization not found",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
             }
         },
+        "/v1/responses": {
+            "post": {
+                "security": [
+                    {
+                        "BearerAuth": []
+                    }
+                ],
+                "description": "Creates a new LLM response for the given input. Supports multiple input types including text, images, files, web search, and more.\n\n**Supported Input Types:**\n- `text`: Plain text input\n- `image`: Image input (URL or base64)\n- `file`: File input by file ID\n- `web_search`: Web search input\n- `file_search`: File search input\n- `streaming`: Streaming input\n- `function_calls`: Function calls input\n- `reasoning`: Reasoning input\n\n**Example Request:**\n```json\n{\n\"model\": \"gpt-4\",\n\"input\": {\n\"type\": \"text\",\n\"text\": \"Hello, how are you?\"\n},\n\"max_tokens\": 100,\n\"temperature\": 0.7,\n\"stream\": false,\n\"background\": false\n}\n```\n\n**Response Format:**\nThe response uses embedded structure where all fields are at the top level:\n- `jan_status`: Jan API status code (optional)\n- `id`: Response identifier\n- `object`: Object type (\"response\")\n- `created`: Unix timestamp\n- `model`: Model used\n- `status`: Response status\n- `input`: Input data\n- `output`: Generated output\n\n**Example Response:**\n```json\n{\n\"jan_status\": \"000000\",\n\"id\": \"resp_1234567890\",\n\"object\": \"response\",\n\"created\": 1234567890,\n\"model\": \"gpt-4\",\n\"status\": \"completed\",\n\"input\": {\n\"type\": \"text\",\n\"text\": \"Hello, how are you?\"\n},\n\"output\": {\n\"type\": \"text\",\n\"text\": {\n\"value\": \"I'm doing well, thank you!\"\n}\n}\n}\n```\n\n**Response Status:**\n- `completed`: Response generation finished successfully\n- `processing`: Response is being generated\n- `failed`: Response generation failed\n- `cancelled`: Response was cancelled",
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "Jan",
+                    "Jan-Responses"
+                ],
+                "summary": "Create a response",
+                "parameters": [
+                    {
+                        "description": "Request payload containing model, input, and generation parameters",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/responses.CreateResponseRequest"
+                        }
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Successful response with embedded fields"
+                    },
+                    "202": {
+                        "description": "Response accepted for background processing with embedded fields"
+                    },
+                    "400": {
+                        "description": "Invalid request payload"
+                    },
+                    "401": {
+                        "description": "Unauthorized"
+                    },
+                    "422": {
+                        "description": "Validation error"
+                    },
+                    "429": {
+                        "description": "Rate limit exceeded"
+                    },
+                    "500": {
+                        "description": "Internal server error"
+                    }
+                }
+            }
+        },
+        "/v1/responses/{response_id}": {
+            "get": {
+                "security": [
+                    {
+                        "BearerAuth": []
+                    }
+                ],
+                "description": "Retrieves an LLM response by its ID. Returns the complete response object with embedded structure where all fields are at the top level.\n\n**Response Format:**\nThe response uses embedded structure where all fields are at the top level:\n- `jan_status`: Jan API status code (optional)\n- `id`: Response identifier\n- `object`: Object type (\"response\")\n- `created`: Unix timestamp\n- `model`: Model used\n- `status`: Response status\n- `input`: Input data\n- `output`: Generated output",
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "Jan",
+                    "Jan-Responses"
+                ],
+                "summary": "Get a response",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Unique identifier of the response",
+                        "name": "response_id",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Successful response with embedded fields"
+                    },
+                    "400": {
+                        "description": "Invalid request"
+                    },
+                    "401": {
+                        "description": "Unauthorized"
+                    },
+                    "404": {
+                        "description": "Response not found"
+                    },
+                    "500": {
+                        "description": "Internal server error"
+                    }
+                }
+            },
+            "delete": {
+                "security": [
+                    {
+                        "BearerAuth": []
+                    }
+                ],
+                "description": "Deletes an LLM response by its ID. Returns the deleted response object with embedded structure where all fields are at the top level.\n\n**Response Format:**\nThe response uses embedded structure where all fields are at the top level:\n- `jan_status`: Jan API status code (optional)\n- `id`: Response identifier\n- `object`: Object type (\"response\")\n- `created`: Unix timestamp\n- `model`: Model used\n- `status`: Response status (will be \"cancelled\")\n- `input`: Input data\n- `cancelled_at`: Cancellation timestamp",
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "Jan",
+                    "Jan-Responses"
+                ],
+                "summary": "Delete a response",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Unique identifier of the response",
+                        "name": "response_id",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Successful response with embedded fields"
+                    },
+                    "400": {
+                        "description": "Invalid request"
+                    },
+                    "401": {
+                        "description": "Unauthorized"
+                    },
+                    "404": {
+                        "description": "Response not found"
+                    },
+                    "500": {
+                        "description": "Internal server error"
+                    }
+                }
+            }
+        },
+        "/v1/responses/{response_id}/cancel": {
+            "post": {
+                "security": [
+                    {
+                        "BearerAuth": []
+                    }
+                ],
+                "description": "Cancels a running LLM response that was created with background=true. Only responses that are currently processing can be cancelled.\n\n**Response Format:**\nThe response uses embedded structure where all fields are at the top level:\n- `jan_status`: Jan API status code (optional)\n- `id`: Response identifier\n- `object`: Object type (\"response\")\n- `created`: Unix timestamp\n- `model`: Model used\n- `status`: Response status (will be \"cancelled\")\n- `input`: Input data\n- `cancelled_at`: Cancellation timestamp",
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "Jan",
+                    "Jan-Responses"
+                ],
+                "summary": "Cancel a response",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Unique identifier of the response to cancel",
+                        "name": "response_id",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Response cancelled successfully with embedded fields"
+                    },
+                    "400": {
+                        "description": "Invalid request or response cannot be cancelled"
+                    },
+                    "401": {
+                        "description": "Unauthorized"
+                    },
+                    "404": {
+                        "description": "Response not found"
+                    },
+                    "500": {
+                        "description": "Internal server error"
+                    }
+                }
+            }
+        },
+        "/v1/responses/{response_id}/input_items": {
+            "get": {
+                "security": [
+                    {
+                        "BearerAuth": []
+                    }
+                ],
+                "description": "Retrieves a paginated list of input items for a response. Supports cursor-based pagination for efficient retrieval of large datasets.\n\n**Response Format:**\nThe response uses embedded structure where all fields are at the top level:\n- `jan_status`: Jan API status code (optional)\n- `first_id`: First item ID for pagination (optional)\n- `last_id`: Last item ID for pagination (optional)\n- `has_more`: Whether more items are available (optional)\n- `id`: Input item identifier\n- `object`: Object type (\"input_item\")\n- `created`: Unix timestamp\n- `type`: Input type\n- `text`: Text content (for text type)\n- `image`: Image content (for image type)\n- `file`: File content (for file type)\n\n**Example Response:**\n```json\n{\n\"jan_status\": \"000000\",\n\"first_id\": \"input_123\",\n\"last_id\": \"input_456\",\n\"has_more\": false,\n\"id\": \"input_1234567890\",\n\"object\": \"input_item\",\n\"created\": 1234567890,\n\"type\": \"text\",\n\"text\": \"Hello, world!\"\n}\n```",
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "Jan",
+                    "Jan-Responses"
+                ],
+                "summary": "List input items",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Unique identifier of the response",
+                        "name": "response_id",
+                        "in": "path",
+                        "required": true
+                    },
+                    {
+                        "type": "integer",
+                        "description": "Maximum number of items to return (default: 20, max: 100)",
+                        "name": "limit",
+                        "in": "query"
+                    },
+                    {
+                        "type": "string",
+                        "description": "Cursor for pagination - return items after this ID",
+                        "name": "after",
+                        "in": "query"
+                    },
+                    {
+                        "type": "string",
+                        "description": "Cursor for pagination - return items before this ID",
+                        "name": "before",
+                        "in": "query"
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Successful response with paginated input items and embedded fields"
+                    },
+                    "400": {
+                        "description": "Invalid request or pagination parameters"
+                    },
+                    "401": {
+                        "description": "Unauthorized"
+                    },
+                    "404": {
+                        "description": "Response not found"
+                    },
+                    "500": {
+                        "description": "Internal server error"
+                    }
+                }
+            }
+        },
         "/v1/version": {
             "get": {
                 "description": "Returns the current build version of the API server.",
@@ -1508,38 +1767,61 @@
         }
     },
     "definitions": {
-        "app_interfaces_http_routes_v1.Model": {
+        "apikeys.ApiKeyCreateResponse": {
             "type": "object",
             "properties": {
-                "created": {
-                    "type": "integer"
+                "result": {
+                    "$ref": "#/definitions/apikeys.ApiKeyResponse"
+                },
+                "status": {
+                    "type": "string"
+                }
+            }
+        },
+        "apikeys.ApiKeyResponse": {
+            "type": "object",
+            "properties": {
+                "apikeyType": {
+                    "type": "string"
+                },
+                "description": {
+                    "type": "string"
+                },
+                "enabled": {
+                    "type": "boolean"
+                },
+                "expiresAt": {
+                    "type": "string"
                 },
                 "id": {
                     "type": "string"
                 },
-                "object": {
+                "key": {
                     "type": "string"
                 },
-                "owned_by": {
+                "last_usedAt": {
+                    "type": "string"
+                },
+                "permissions": {
+                    "type": "string"
+                },
+                "plaintextHint": {
                     "type": "string"
                 }
             }
         },
-        "app_interfaces_http_routes_v1.ModelsResponse": {
+        "apikeys.CreateApiKeyRequest": {
             "type": "object",
             "properties": {
-                "data": {
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/app_interfaces_http_routes_v1.Model"
-                    }
+                "description": {
+                    "type": "string"
                 },
-                "object": {
+                "expiresAt": {
                     "type": "string"
                 }
             }
         },
-        "app_interfaces_http_routes_v1_auth.AccessTokenResponse": {
+        "auth.AccessTokenResponse": {
             "type": "object",
             "properties": {
                 "access_token": {
@@ -1553,7 +1835,7 @@
                 }
             }
         },
-        "app_interfaces_http_routes_v1_auth.GetMeResponse": {
+        "auth.GetMeResponse": {
             "type": "object",
             "properties": {
                 "email": {
@@ -1570,41 +1852,27 @@
                 }
             }
         },
-        "app_interfaces_http_routes_v1_auth_google.AccessTokenResponse": {
+        "chat.ChatCompletionChoice": {
             "type": "object",
             "properties": {
-                "access_token": {
+                "finish_reason": {
                     "type": "string"
                 },
-                "expires_in": {
+                "index": {
                     "type": "integer"
                 },
-                "object": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_auth_google.GoogleCallbackRequest": {
-            "type": "object",
-            "required": [
-                "code"
-            ],
-            "properties": {
-                "code": {
-                    "type": "string"
-                },
-                "state": {
-                    "type": "string"
+                "message": {
+                    "$ref": "#/definitions/chat.Message"
                 }
             }
         },
-        "app_interfaces_http_routes_v1_chat.ChatCompletionResponseSwagger": {
+        "chat.ChatCompletionResponseSwagger": {
             "type": "object",
             "properties": {
                 "choices": {
                     "type": "array",
                     "items": {
-                        "$ref": "#/definitions/openai.ChatCompletionChoice"
+                        "$ref": "#/definitions/chat.ChatCompletionChoice"
                     }
                 },
                 "created": {
@@ -1620,513 +1888,56 @@
                     "type": "string"
                 },
                 "usage": {
-                    "$ref": "#/definitions/openai.Usage"
+                    "$ref": "#/definitions/chat.Usage"
                 }
             }
         },
-        "app_interfaces_http_routes_v1_conversations.AnnotationResponse": {
+        "chat.Message": {
             "type": "object",
             "properties": {
-                "end_index": {
-                    "type": "integer"
-                },
-                "file_id": {
+                "content": {
                     "type": "string"
                 },
-                "index": {
-                    "type": "integer"
-                },
-                "start_index": {
-                    "type": "integer"
-                },
-                "text": {
-                    "type": "string"
-                },
-                "type": {
-                    "type": "string"
-                },
-                "url": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_conversations.ContentResponse": {
-            "type": "object",
-            "properties": {
-                "file": {
-                    "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.FileContentResponse"
-                },
-                "image": {
-                    "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.ImageContentResponse"
-                },
-                "input_text": {
-                    "type": "string"
-                },
-                "output_text": {
-                    "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.OutputTextResponse"
-                },
-                "text": {
-                    "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.TextResponse"
-                },
-                "type": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_conversations.ConversationContentRequest": {
-            "type": "object",
-            "required": [
-                "type"
-            ],
-            "properties": {
-                "text": {
-                    "type": "string"
-                },
-                "type": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_conversations.ConversationItemListResponse": {
-            "type": "object",
-            "properties": {
-                "data": {
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.ConversationItemResponse"
-                    }
-                },
-                "first_id": {
-                    "type": "string"
-                },
-                "has_more": {
-                    "type": "boolean"
-                },
-                "last_id": {
-                    "type": "string"
-                },
-                "object": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_conversations.ConversationItemRequest": {
-            "type": "object",
-            "required": [
-                "content",
-                "type"
-            ],
-            "properties": {
-                "content": {
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.ConversationContentRequest"
-                    }
-                },
                 "role": {
-                    "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_domain_conversation.ItemRole"
-                },
-                "type": {
                     "type": "string"
                 }
             }
         },
-        "app_interfaces_http_routes_v1_conversations.ConversationItemResponse": {
+        "chat.PostChatCompletionRequest": {
             "type": "object",
             "properties": {
-                "content": {
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.ContentResponse"
-                    }
-                },
-                "created_at": {
-                    "type": "integer"
-                },
-                "id": {
-                    "type": "string"
-                },
-                "object": {
-                    "type": "string"
-                },
-                "role": {
-                    "type": "string"
-                },
-                "status": {
-                    "type": "string"
-                },
-                "type": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_conversations.ConversationResponse": {
-            "type": "object",
-            "properties": {
-                "created_at": {
-                    "type": "integer"
-                },
-                "id": {
-                    "type": "string"
-                },
-                "metadata": {
-                    "type": "object",
-                    "additionalProperties": {
-                        "type": "string"
-                    }
-                },
-                "object": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_conversations.CreateConversationRequest": {
-            "type": "object",
-            "properties": {
-                "items": {
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.ConversationItemRequest"
-                    }
-                },
-                "metadata": {
-                    "type": "object",
-                    "additionalProperties": {
-                        "type": "string"
-                    }
-                },
-                "title": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_conversations.CreateItemsRequest": {
-            "type": "object",
-            "required": [
-                "items"
-            ],
-            "properties": {
-                "items": {
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.ConversationItemRequest"
-                    }
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_conversations.DeletedConversationResponse": {
-            "type": "object",
-            "properties": {
-                "deleted": {
-                    "type": "boolean"
-                },
-                "id": {
-                    "type": "string"
-                },
-                "object": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_conversations.FileContentResponse": {
-            "type": "object",
-            "properties": {
-                "file_id": {
-                    "type": "string"
-                },
-                "mime_type": {
-                    "type": "string"
-                },
-                "name": {
-                    "type": "string"
-                },
-                "size": {
+                "max_tokens": {
                     "type": "integer"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_conversations.ImageContentResponse": {
-            "type": "object",
-            "properties": {
-                "detail": {
-                    "type": "string"
-                },
-                "file_id": {
-                    "type": "string"
-                },
-                "url": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_conversations.OutputTextResponse": {
-            "type": "object",
-            "properties": {
-                "annotations": {
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.AnnotationResponse"
-                    }
                 },
-                "text": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_conversations.TextResponse": {
-            "type": "object",
-            "properties": {
-                "value": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_conversations.UpdateConversationRequest": {
-            "type": "object",
-            "properties": {
-                "metadata": {
-                    "type": "object",
-                    "additionalProperties": {
-                        "type": "string"
-                    }
-                },
-                "title": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_organization.AdminAPIKeyDeletedResponse": {
-            "type": "object",
-            "properties": {
-                "deleted": {
-                    "type": "boolean"
-                },
-                "id": {
-                    "type": "string"
-                },
-                "object": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_organization.AdminApiKeyListResponse": {
-            "type": "object",
-            "properties": {
-                "data": {
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/app_interfaces_http_routes_v1_organization.OrganizationAdminAPIKeyResponse"
-                    }
-                },
-                "first_id": {
-                    "type": "string"
-                },
-                "has_more": {
-                    "type": "boolean"
-                },
-                "last_id": {
-                    "type": "string"
-                },
-                "object": {
-                    "type": "string",
-                    "example": "list"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_organization.CreateOrganizationAdminAPIKeyRequest": {
-            "type": "object",
-            "required": [
-                "name"
-            ],
-            "properties": {
-                "name": {
-                    "type": "string",
-                    "example": "My Admin API Key"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_organization.OrganizationAdminAPIKeyResponse": {
-            "type": "object",
-            "properties": {
-                "created_at": {
-                    "type": "integer",
-                    "example": 1698765432
-                },
-                "id": {
-                    "type": "string",
-                    "example": "key_1234567890"
-                },
-                "last_used_at": {
-                    "type": "integer",
-                    "example": 1698765432
-                },
-                "name": {
-                    "type": "string",
-                    "example": "My Admin API Key"
-                },
-                "object": {
-                    "type": "string",
-                    "example": "api_key"
-                },
-                "owner": {
-                    "$ref": "#/definitions/app_interfaces_http_routes_v1_organization.Owner"
-                },
-                "redacted_value": {
-                    "type": "string",
-                    "example": "sk-...abcd"
-                },
-                "value": {
-                    "type": "string",
-                    "example": "sk-abcdef1234567890"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_organization.Owner": {
-            "type": "object",
-            "properties": {
-                "created_at": {
-                    "type": "integer",
-                    "example": 1698765432
-                },
-                "id": {
-                    "type": "string",
-                    "example": "user_1234567890"
-                },
-                "name": {
-                    "type": "string",
-                    "example": "John Doe"
-                },
-                "object": {
-                    "type": "string",
-                    "example": "user"
-                },
-                "role": {
-                    "type": "string",
-                    "example": "admin"
-                },
-                "type": {
-                    "type": "string",
-                    "example": "user"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_organization_projects.CreateProjectRequest": {
-            "type": "object",
-            "required": [
-                "name"
-            ],
-            "properties": {
-                "name": {
-                    "type": "string",
-                    "example": "New AI Project"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_organization_projects.ProjectListResponse": {
-            "type": "object",
-            "properties": {
-                "data": {
+                "messages": {
                     "type": "array",
                     "items": {
-                        "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_projects.ProjectResponse"
+                        "$ref": "#/definitions/chat.Message"
                     }
                 },
-                "first_id": {
-                    "type": "string"
-                },
-                "has_more": {
-                    "type": "boolean"
-                },
-                "last_id": {
-                    "type": "string"
-                },
-                "object": {
-                    "type": "string",
-                    "example": "list"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_organization_projects.ProjectResponse": {
-            "type": "object",
-            "properties": {
-                "archived_at": {
-                    "type": "integer",
-                    "example": 1698765432
-                },
-                "created_at": {
-                    "type": "integer",
-                    "example": 1698765432
-                },
-                "id": {
-                    "type": "string",
-                    "example": "proj_1234567890"
-                },
-                "name": {
-                    "type": "string",
-                    "example": "My First Project"
-                },
-                "object": {
-                    "type": "string",
-                    "example": "project"
-                },
-                "status": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_organization_projects.UpdateProjectRequest": {
-            "type": "object",
-            "properties": {
-                "name": {
-                    "type": "string",
-                    "example": "Updated AI Project"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_organization_projects_api_keys.ApiKeyResponse": {
-            "type": "object",
-            "properties": {
-                "apikeyType": {
-                    "type": "string"
-                },
-                "description": {
-                    "type": "string"
-                },
-                "enabled": {
-                    "type": "boolean"
-                },
-                "expiresAt": {
-                    "type": "string"
-                },
-                "id": {
-                    "type": "string"
-                },
-                "key": {
-                    "type": "string"
-                },
-                "last_usedAt": {
-                    "type": "string"
-                },
-                "permissions": {
+                "model": {
                     "type": "string"
                 },
-                "plaintextHint": {
-                    "type": "string"
+                "temperature": {
+                    "type": "number"
                 }
             }
         },
-        "app_interfaces_http_routes_v1_organization_projects_api_keys.CreateApiKeyRequest": {
+        "chat.Usage": {
             "type": "object",
             "properties": {
-                "description": {
-                    "type": "string"
+                "completion_tokens": {
+                    "type": "integer"
                 },
-                "expiresAt": {
-                    "type": "string"
+                "prompt_tokens": {
+                    "type": "integer"
+                },
+                "total_tokens": {
+                    "type": "integer"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_domain_conversation.ItemRole": {
+        "conversation.ItemRole": {
             "type": "string",
             "enum": [
                 "system",
@@ -2139,61 +1950,76 @@
                 "ItemRoleAssistant"
             ]
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse": {
+        "conversations.AnnotationResponse": {
             "type": "object",
             "properties": {
-                "code": {
+                "end_index": {
+                    "type": "integer"
+                },
+                "file_id": {
                     "type": "string"
                 },
-                "error": {
+                "index": {
+                    "type": "integer"
+                },
+                "start_index": {
+                    "type": "integer"
+                },
+                "text": {
+                    "type": "string"
+                },
+                "type": {
+                    "type": "string"
+                },
+                "url": {
                     "type": "string"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_responses.GeneralResponse-app_interfaces_http_routes_v1_organization_projects_api_keys_ApiKeyResponse": {
+        "conversations.ContentResponse": {
             "type": "object",
             "properties": {
-                "result": {
-                    "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_projects_api_keys.ApiKeyResponse"
+                "file": {
+                    "$ref": "#/definitions/conversations.FileContentResponse"
                 },
-                "status": {
+                "image": {
+                    "$ref": "#/definitions/conversations.ImageContentResponse"
+                },
+                "input_text": {
+                    "type": "string"
+                },
+                "output_text": {
+                    "$ref": "#/definitions/conversations.OutputTextResponse"
+                },
+                "text": {
+                    "$ref": "#/definitions/conversations.TextResponse"
+                },
+                "type": {
                     "type": "string"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.ListResponse-app_interfaces_http_routes_v1_conversations_ConversationItemResponse": {
+        "conversations.ConversationContentRequest": {
             "type": "object",
+            "required": [
+                "type"
+            ],
             "properties": {
-                "data": {
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.ConversationItemResponse"
-                    }
-                },
-                "first_id": {
+                "text": {
                     "type": "string"
                 },
-                "has_more": {
-                    "type": "boolean"
-                },
-                "last_id": {
+                "type": {
                     "type": "string"
-                },
-                "object": {
-                    "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.ObjectTypeList"
-                },
-                "total": {
-                    "type": "integer"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.ListResponse-app_interfaces_http_routes_v1_conversations_ConversationResponse": {
+        "conversations.ConversationItemListResponse": {
             "type": "object",
             "properties": {
                 "data": {
                     "type": "array",
                     "items": {
-                        "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.ConversationResponse"
+                        "$ref": "#/definitions/conversations.ConversationItemResponse"
                     }
                 },
                 "first_id": {
@@ -2206,632 +2032,534 @@
                     "type": "string"
                 },
                 "object": {
-                    "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.ObjectTypeList"
-                },
-                "total": {
-                    "type": "integer"
+                    "type": "string"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.ObjectTypeList": {
-            "type": "string",
-            "enum": [
-                "list"
-            ],
-            "x-enum-varnames": [
-                "ObjectTypeListList"
-            ]
-        },
-        "openai.ChatCompletionChoice": {
+        "conversations.ConversationItemRequest": {
             "type": "object",
+            "required": [
+                "content",
+                "type"
+            ],
             "properties": {
-                "content_filter_results": {
-                    "$ref": "#/definitions/openai.ContentFilterResults"
-                },
-                "finish_reason": {
-                    "description": "FinishReason\nstop: API returned complete message,\nor a message terminated by one of the stop sequences provided via the stop parameter\nlength: Incomplete model output due to max_tokens parameter or token limit\nfunction_call: The model decided to call a function\ncontent_filter: Omitted content due to a flag from our content filters\nnull: API response still in progress or incomplete",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/openai.FinishReason"
-                        }
-                    ]
-                },
-                "index": {
-                    "type": "integer"
+                "content": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/conversations.ConversationContentRequest"
+                    }
                 },
-                "logprobs": {
-                    "$ref": "#/definitions/openai.LogProbs"
+                "role": {
+                    "$ref": "#/definitions/conversation.ItemRole"
                 },
-                "message": {
-                    "$ref": "#/definitions/openai.ChatCompletionMessage"
+                "type": {
+                    "type": "string"
                 }
             }
         },
-        "openai.ChatCompletionMessage": {
+        "conversations.ConversationItemResponse": {
             "type": "object",
             "properties": {
                 "content": {
-                    "type": "string"
-                },
-                "function_call": {
-                    "$ref": "#/definitions/openai.FunctionCall"
-                },
-                "multiContent": {
                     "type": "array",
                     "items": {
-                        "$ref": "#/definitions/openai.ChatMessagePart"
+                        "$ref": "#/definitions/conversations.ContentResponse"
                     }
                 },
-                "name": {
-                    "description": "This property isn't in the official documentation, but it's in\nthe documentation for the official library for python:\n- https://github.com/openai/openai-python/blob/main/chatml.md\n- https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb",
-                    "type": "string"
+                "created_at": {
+                    "type": "integer"
                 },
-                "reasoning_content": {
-                    "description": "This property is used for the \"reasoning\" feature supported by deepseek-reasoner\nwhich is not in the official documentation.\nthe doc from deepseek:\n- https://api-docs.deepseek.com/api/create-chat-completion#responses",
+                "id": {
                     "type": "string"
                 },
-                "refusal": {
+                "object": {
                     "type": "string"
                 },
                 "role": {
                     "type": "string"
                 },
-                "tool_call_id": {
-                    "description": "For Role=tool prompts this should be set to the ID given in the assistant's prior request to call a tool.",
+                "status": {
                     "type": "string"
                 },
-                "tool_calls": {
-                    "description": "For Role=assistant prompts this may be set to the tool calls generated by the model, such as function calls.",
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/openai.ToolCall"
-                    }
+                "type": {
+                    "type": "string"
                 }
             }
         },
-        "openai.ChatCompletionRequest": {
+        "conversations.ConversationResponse": {
             "type": "object",
             "properties": {
-                "chat_template_kwargs": {
-                    "description": "ChatTemplateKwargs provides a way to add non-standard parameters to the request body.\nAdditional kwargs to pass to the template renderer. Will be accessible by the chat template.\nSuch as think mode for qwen3. \"chat_template_kwargs\": {\"enable_thinking\": false}\nhttps://qwen.readthedocs.io/en/latest/deployment/vllm.html#thinking-non-thinking-modes",
-                    "type": "object",
-                    "additionalProperties": {}
-                },
-                "frequency_penalty": {
-                    "type": "number"
-                },
-                "function_call": {
-                    "description": "Deprecated: use ToolChoice instead."
-                },
-                "functions": {
-                    "description": "Deprecated: use Tools instead.",
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/openai.FunctionDefinition"
-                    }
+                "created_at": {
+                    "type": "integer"
                 },
-                "guided_choice": {
-                    "description": "GuidedChoice is a vLLM-specific extension that restricts the model's output\nto one of the predefined string choices provided in this field. This feature\nis used to constrain the model's responses to a controlled set of options,\nensuring predictable and consistent outputs in scenarios where specific\nchoices are required.",
-                    "type": "array",
-                    "items": {
-                        "type": "string"
-                    }
+                "id": {
+                    "type": "string"
                 },
-                "logit_bias": {
-                    "description": "LogitBias is must be a token id string (specified by their token ID in the tokenizer), not a word string.\nincorrect: `\"logit_bias\":{\"You\": 6}`, correct: `\"logit_bias\":{\"1639\": 6}`\nrefs: https://platform.openai.com/docs/api-reference/chat/create#chat/create-logit_bias",
+                "metadata": {
                     "type": "object",
                     "additionalProperties": {
-                        "type": "integer"
+                        "type": "string"
                     }
                 },
-                "logprobs": {
-                    "description": "LogProbs indicates whether to return log probabilities of the output tokens or not.\nIf true, returns the log probabilities of each output token returned in the content of message.\nThis option is currently not available on the gpt-4-vision-preview model.",
-                    "type": "boolean"
-                },
-                "max_completion_tokens": {
-                    "description": "MaxCompletionTokens An upper bound for the number of tokens that can be generated for a completion,\nincluding visible output tokens and reasoning tokens https://platform.openai.com/docs/guides/reasoning",
-                    "type": "integer"
-                },
-                "max_tokens": {
-                    "description": "MaxTokens The maximum number of tokens that can be generated in the chat completion.\nThis value can be used to control costs for text generated via API.\nDeprecated: use MaxCompletionTokens. Not compatible with o1-series models.\nrefs: https://platform.openai.com/docs/api-reference/chat/create#chat-create-max_tokens",
-                    "type": "integer"
-                },
-                "messages": {
+                "object": {
+                    "type": "string"
+                }
+            }
+        },
+        "conversations.CreateConversationRequest": {
+            "type": "object",
+            "properties": {
+                "items": {
                     "type": "array",
                     "items": {
-                        "$ref": "#/definitions/openai.ChatCompletionMessage"
+                        "$ref": "#/definitions/conversations.ConversationItemRequest"
                     }
                 },
                 "metadata": {
-                    "description": "Metadata to store with the completion.",
                     "type": "object",
                     "additionalProperties": {
                         "type": "string"
                     }
                 },
-                "model": {
-                    "type": "string"
-                },
-                "n": {
-                    "type": "integer"
-                },
-                "parallel_tool_calls": {
-                    "description": "Disable the default behavior of parallel tool calls by setting it: false."
-                },
-                "prediction": {
-                    "description": "Configuration for a predicted output.",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/openai.Prediction"
-                        }
-                    ]
-                },
-                "presence_penalty": {
-                    "type": "number"
-                },
-                "reasoning_effort": {
-                    "description": "Controls effort on reasoning for reasoning models. It can be set to \"low\", \"medium\", or \"high\".",
-                    "type": "string"
-                },
-                "response_format": {
-                    "$ref": "#/definitions/openai.ChatCompletionResponseFormat"
-                },
-                "safety_identifier": {
-                    "description": "A stable identifier used to help detect users of your application that may be violating OpenAI's usage policies.\nThe IDs should be a string that uniquely identifies each user.\nWe recommend hashing their username or email address, in order to avoid sending us any identifying information.\nhttps://platform.openai.com/docs/api-reference/chat/create#chat_create-safety_identifier",
+                "title": {
                     "type": "string"
-                },
-                "seed": {
-                    "type": "integer"
-                },
-                "service_tier": {
-                    "description": "Specifies the latency tier to use for processing the request.",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/openai.ServiceTier"
-                        }
-                    ]
-                },
-                "stop": {
-                    "type": "array",
-                    "items": {
-                        "type": "string"
-                    }
-                },
-                "store": {
-                    "description": "Store can be set to true to store the output of this completion request for use in distillations and evals.\nhttps://platform.openai.com/docs/api-reference/chat/create#chat-create-store",
-                    "type": "boolean"
-                },
-                "stream": {
-                    "type": "boolean"
-                },
-                "stream_options": {
-                    "description": "Options for streaming response. Only set this when you set stream: true.",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/openai.StreamOptions"
-                        }
-                    ]
-                },
-                "temperature": {
-                    "type": "number"
-                },
-                "tool_choice": {
-                    "description": "This can be either a string or an ToolChoice object."
-                },
-                "tools": {
+                }
+            }
+        },
+        "conversations.CreateItemsRequest": {
+            "type": "object",
+            "required": [
+                "items"
+            ],
+            "properties": {
+                "items": {
                     "type": "array",
                     "items": {
-                        "$ref": "#/definitions/openai.Tool"
+                        "$ref": "#/definitions/conversations.ConversationItemRequest"
                     }
-                },
-                "top_logprobs": {
-                    "description": "TopLogProbs is an integer between 0 and 5 specifying the number of most likely tokens to return at each\ntoken position, each with an associated log probability.\nlogprobs must be set to true if this parameter is used.",
-                    "type": "integer"
-                },
-                "top_p": {
-                    "type": "number"
-                },
-                "user": {
-                    "type": "string"
                 }
             }
         },
-        "openai.ChatCompletionResponseFormat": {
+        "conversations.DeletedConversationResponse": {
             "type": "object",
             "properties": {
-                "json_schema": {
-                    "$ref": "#/definitions/openai.ChatCompletionResponseFormatJSONSchema"
+                "deleted": {
+                    "type": "boolean"
                 },
-                "type": {
-                    "$ref": "#/definitions/openai.ChatCompletionResponseFormatType"
+                "id": {
+                    "type": "string"
+                },
+                "object": {
+                    "type": "string"
                 }
             }
         },
-        "openai.ChatCompletionResponseFormatJSONSchema": {
+        "conversations.FileContentResponse": {
             "type": "object",
             "properties": {
-                "description": {
+                "file_id": {
+                    "type": "string"
+                },
+                "mime_type": {
                     "type": "string"
                 },
                 "name": {
                     "type": "string"
                 },
-                "schema": {},
-                "strict": {
-                    "type": "boolean"
+                "size": {
+                    "type": "integer"
                 }
             }
         },
-        "openai.ChatCompletionResponseFormatType": {
-            "type": "string",
-            "enum": [
-                "json_object",
-                "json_schema",
-                "text"
-            ],
-            "x-enum-varnames": [
-                "ChatCompletionResponseFormatTypeJSONObject",
-                "ChatCompletionResponseFormatTypeJSONSchema",
-                "ChatCompletionResponseFormatTypeText"
-            ]
-        },
-        "openai.ChatMessageImageURL": {
+        "conversations.ImageContentResponse": {
             "type": "object",
             "properties": {
                 "detail": {
-                    "$ref": "#/definitions/openai.ImageURLDetail"
+                    "type": "string"
+                },
+                "file_id": {
+                    "type": "string"
                 },
                 "url": {
                     "type": "string"
                 }
             }
         },
-        "openai.ChatMessagePart": {
+        "conversations.ListResponse-conversations_ConversationItemResponse": {
             "type": "object",
             "properties": {
-                "image_url": {
-                    "$ref": "#/definitions/openai.ChatMessageImageURL"
+                "data": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/conversations.ConversationItemResponse"
+                    }
                 },
-                "text": {
+                "first_id": {
                     "type": "string"
                 },
-                "type": {
-                    "$ref": "#/definitions/openai.ChatMessagePartType"
+                "has_more": {
+                    "type": "boolean"
+                },
+                "last_id": {
+                    "type": "string"
+                },
+                "object": {
+                    "type": "string"
+                },
+                "total": {
+                    "type": "integer"
                 }
             }
         },
-        "openai.ChatMessagePartType": {
-            "type": "string",
-            "enum": [
-                "text",
-                "image_url"
-            ],
-            "x-enum-varnames": [
-                "ChatMessagePartTypeText",
-                "ChatMessagePartTypeImageURL"
-            ]
-        },
-        "openai.CompletionTokensDetails": {
+        "conversations.ListResponse-conversations_ConversationResponse": {
             "type": "object",
             "properties": {
-                "accepted_prediction_tokens": {
-                    "type": "integer"
+                "data": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/conversations.ConversationResponse"
+                    }
+                },
+                "first_id": {
+                    "type": "string"
+                },
+                "has_more": {
+                    "type": "boolean"
                 },
-                "audio_tokens": {
-                    "type": "integer"
+                "last_id": {
+                    "type": "string"
                 },
-                "reasoning_tokens": {
-                    "type": "integer"
+                "object": {
+                    "type": "string"
                 },
-                "rejected_prediction_tokens": {
+                "total": {
                     "type": "integer"
                 }
             }
         },
-        "openai.ContentFilterResults": {
+        "conversations.OutputTextResponse": {
             "type": "object",
             "properties": {
-                "hate": {
-                    "$ref": "#/definitions/openai.Hate"
-                },
-                "jailbreak": {
-                    "$ref": "#/definitions/openai.JailBreak"
-                },
-                "profanity": {
-                    "$ref": "#/definitions/openai.Profanity"
-                },
-                "self_harm": {
-                    "$ref": "#/definitions/openai.SelfHarm"
-                },
-                "sexual": {
-                    "$ref": "#/definitions/openai.Sexual"
+                "annotations": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/conversations.AnnotationResponse"
+                    }
                 },
-                "violence": {
-                    "$ref": "#/definitions/openai.Violence"
+                "text": {
+                    "type": "string"
                 }
             }
         },
-        "openai.FinishReason": {
-            "type": "string",
-            "enum": [
-                "stop",
-                "length",
-                "function_call",
-                "tool_calls",
-                "content_filter",
-                "null"
-            ],
-            "x-enum-varnames": [
-                "FinishReasonStop",
-                "FinishReasonLength",
-                "FinishReasonFunctionCall",
-                "FinishReasonToolCalls",
-                "FinishReasonContentFilter",
-                "FinishReasonNull"
-            ]
-        },
-        "openai.FunctionCall": {
+        "conversations.TextResponse": {
             "type": "object",
             "properties": {
-                "arguments": {
-                    "description": "call function with arguments in JSON format",
+                "value": {
                     "type": "string"
+                }
+            }
+        },
+        "conversations.UpdateConversationRequest": {
+            "type": "object",
+            "properties": {
+                "metadata": {
+                    "type": "object",
+                    "additionalProperties": {
+                        "type": "string"
+                    }
                 },
-                "name": {
+                "title": {
                     "type": "string"
                 }
             }
         },
-        "openai.FunctionDefinition": {
+        "google.AccessTokenResponse": {
             "type": "object",
             "properties": {
-                "description": {
-                    "type": "string"
-                },
-                "name": {
+                "access_token": {
                     "type": "string"
                 },
-                "parameters": {
-                    "description": "Parameters is an object describing the function.\nYou can pass json.RawMessage to describe the schema,\nor you can pass in a struct which serializes to the proper JSON schema.\nThe jsonschema package is provided for convenience, but you should\nconsider another specialized library if you require more complex schemas."
+                "expires_in": {
+                    "type": "integer"
                 },
-                "strict": {
-                    "type": "boolean"
+                "object": {
+                    "type": "string"
                 }
             }
         },
-        "openai.Hate": {
+        "google.GoogleCallbackRequest": {
             "type": "object",
+            "required": [
+                "code"
+            ],
             "properties": {
-                "filtered": {
-                    "type": "boolean"
+                "code": {
+                    "type": "string"
                 },
-                "severity": {
+                "state": {
                     "type": "string"
                 }
             }
         },
-        "openai.ImageURLDetail": {
-            "type": "string",
-            "enum": [
-                "high",
-                "low",
-                "auto"
-            ],
-            "x-enum-varnames": [
-                "ImageURLDetailHigh",
-                "ImageURLDetailLow",
-                "ImageURLDetailAuto"
-            ]
-        },
-        "openai.JailBreak": {
+        "organization.AdminAPIKeyDeletedResponse": {
             "type": "object",
             "properties": {
-                "detected": {
+                "deleted": {
                     "type": "boolean"
                 },
-                "filtered": {
-                    "type": "boolean"
+                "id": {
+                    "type": "string"
+                },
+                "object": {
+                    "type": "string"
                 }
             }
         },
-        "openai.LogProb": {
+        "organization.AdminApiKeyListResponse": {
             "type": "object",
             "properties": {
-                "bytes": {
-                    "description": "Omitting the field if it is null",
+                "data": {
                     "type": "array",
                     "items": {
-                        "type": "integer"
+                        "$ref": "#/definitions/organization.OrganizationAdminAPIKeyResponse"
                     }
                 },
-                "logprob": {
-                    "type": "number"
+                "first_id": {
+                    "type": "string"
+                },
+                "has_more": {
+                    "type": "boolean"
                 },
-                "token": {
+                "last_id": {
                     "type": "string"
                 },
-                "top_logprobs": {
-                    "description": "TopLogProbs is a list of the most likely tokens and their log probability, at this token position.\nIn rare cases, there may be fewer than the number of requested top_logprobs returned.",
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/openai.TopLogProbs"
-                    }
+                "object": {
+                    "type": "string",
+                    "example": "list"
                 }
             }
         },
-        "openai.LogProbs": {
+        "organization.CreateOrganizationAdminAPIKeyRequest": {
             "type": "object",
+            "required": [
+                "name"
+            ],
             "properties": {
-                "content": {
-                    "description": "Content is a list of message content tokens with log probability information.",
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/openai.LogProb"
-                    }
+                "name": {
+                    "type": "string",
+                    "example": "My Admin API Key"
                 }
             }
         },
-        "openai.Prediction": {
+        "organization.OrganizationAdminAPIKeyResponse": {
             "type": "object",
             "properties": {
-                "content": {
-                    "type": "string"
+                "created_at": {
+                    "type": "integer",
+                    "example": 1698765432
                 },
-                "type": {
-                    "type": "string"
+                "id": {
+                    "type": "string",
+                    "example": "key_1234567890"
+                },
+                "last_used_at": {
+                    "type": "integer",
+                    "example": 1698765432
+                },
+                "name": {
+                    "type": "string",
+                    "example": "My Admin API Key"
+                },
+                "object": {
+                    "type": "string",
+                    "example": "api_key"
+                },
+                "owner": {
+                    "$ref": "#/definitions/organization.Owner"
+                },
+                "redacted_value": {
+                    "type": "string",
+                    "example": "sk-...abcd"
+                },
+                "value": {
+                    "type": "string",
+                    "example": "sk-abcdef1234567890"
                 }
             }
         },
-        "openai.Profanity": {
+        "organization.Owner": {
             "type": "object",
             "properties": {
-                "detected": {
-                    "type": "boolean"
+                "created_at": {
+                    "type": "integer",
+                    "example": 1698765432
                 },
-                "filtered": {
-                    "type": "boolean"
+                "id": {
+                    "type": "string",
+                    "example": "user_1234567890"
+                },
+                "name": {
+                    "type": "string",
+                    "example": "John Doe"
+                },
+                "object": {
+                    "type": "string",
+                    "example": "user"
+                },
+                "role": {
+                    "type": "string",
+                    "example": "admin"
+                },
+                "type": {
+                    "type": "string",
+                    "example": "user"
                 }
             }
         },
-        "openai.PromptTokensDetails": {
+        "projects.CreateProjectRequest": {
             "type": "object",
+            "required": [
+                "name"
+            ],
             "properties": {
-                "audio_tokens": {
-                    "type": "integer"
-                },
-                "cached_tokens": {
-                    "type": "integer"
+                "name": {
+                    "type": "string",
+                    "example": "New AI Project"
                 }
             }
         },
-        "openai.SelfHarm": {
+        "projects.ProjectListResponse": {
             "type": "object",
             "properties": {
-                "filtered": {
+                "data": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/projects.ProjectResponse"
+                    }
+                },
+                "first_id": {
+                    "type": "string"
+                },
+                "has_more": {
                     "type": "boolean"
                 },
-                "severity": {
+                "last_id": {
                     "type": "string"
+                },
+                "object": {
+                    "type": "string",
+                    "example": "list"
                 }
             }
         },
-        "openai.ServiceTier": {
-            "type": "string",
-            "enum": [
-                "auto",
-                "default",
-                "flex",
-                "priority"
-            ],
-            "x-enum-varnames": [
-                "ServiceTierAuto",
-                "ServiceTierDefault",
-                "ServiceTierFlex",
-                "ServiceTierPriority"
-            ]
-        },
-        "openai.Sexual": {
+        "projects.ProjectResponse": {
             "type": "object",
             "properties": {
-                "filtered": {
-                    "type": "boolean"
+                "archived_at": {
+                    "type": "integer",
+                    "example": 1698765432
+                },
+                "created_at": {
+                    "type": "integer",
+                    "example": 1698765432
+                },
+                "id": {
+                    "type": "string",
+                    "example": "proj_1234567890"
+                },
+                "name": {
+                    "type": "string",
+                    "example": "My First Project"
                 },
-                "severity": {
+                "object": {
+                    "type": "string",
+                    "example": "project"
+                },
+                "status": {
                     "type": "string"
                 }
             }
         },
-        "openai.StreamOptions": {
+        "projects.UpdateProjectRequest": {
             "type": "object",
             "properties": {
-                "include_usage": {
-                    "description": "If set, an additional chunk will be streamed before the data: [DONE] message.\nThe usage field on this chunk shows the token usage statistics for the entire request,\nand the choices field will always be an empty array.\nAll other chunks will also include a usage field, but with a null value.",
-                    "type": "boolean"
+                "name": {
+                    "type": "string",
+                    "example": "Updated AI Project"
                 }
             }
         },
-        "openai.Tool": {
+        "responses.CreateResponseRequest": {
             "type": "object",
+            "required": [
+                "input",
+                "model"
+            ],
             "properties": {
-                "function": {
-                    "$ref": "#/definitions/openai.FunctionDefinition"
+                "generation": {
+                    "type": "object",
+                    "additionalProperties": true
                 },
-                "type": {
-                    "$ref": "#/definitions/openai.ToolType"
-                }
-            }
-        },
-        "openai.ToolCall": {
-            "type": "object",
-            "properties": {
-                "function": {
-                    "$ref": "#/definitions/openai.FunctionCall"
+                "input": {
+                    "type": "object",
+                    "additionalProperties": true
                 },
-                "id": {
-                    "type": "string"
+                "max_tokens": {
+                    "type": "integer",
+                    "example": 1000
                 },
-                "index": {
-                    "description": "Index is not nil only in chat completion chunk object",
-                    "type": "integer"
+                "model": {
+                    "type": "string",
+                    "example": "gpt-4"
                 },
-                "type": {
-                    "$ref": "#/definitions/openai.ToolType"
+                "stream": {
+                    "type": "boolean",
+                    "example": false
+                },
+                "temperature": {
+                    "type": "number",
+                    "example": 0.7
                 }
             }
         },
-        "openai.ToolType": {
-            "type": "string",
-            "enum": [
-                "function"
-            ],
-            "x-enum-varnames": [
-                "ToolTypeFunction"
-            ]
-        },
-        "openai.TopLogProbs": {
+        "responses.ErrorResponse": {
             "type": "object",
             "properties": {
-                "bytes": {
-                    "type": "array",
-                    "items": {
-                        "type": "integer"
-                    }
-                },
-                "logprob": {
-                    "type": "number"
+                "code": {
+                    "type": "string"
                 },
-                "token": {
+                "error": {
                     "type": "string"
                 }
             }
         },
-        "openai.Usage": {
+        "v1.Model": {
             "type": "object",
             "properties": {
-                "completion_tokens": {
+                "created": {
                     "type": "integer"
                 },
-                "completion_tokens_details": {
-                    "$ref": "#/definitions/openai.CompletionTokensDetails"
-                },
-                "prompt_tokens": {
-                    "type": "integer"
+                "id": {
+                    "type": "string"
                 },
-                "prompt_tokens_details": {
-                    "$ref": "#/definitions/openai.PromptTokensDetails"
+                "object": {
+                    "type": "string"
                 },
-                "total_tokens": {
-                    "type": "integer"
+                "owned_by": {
+                    "type": "string"
                 }
             }
         },
-        "openai.Violence": {
+        "v1.ModelsResponse": {
             "type": "object",
             "properties": {
-                "filtered": {
-                    "type": "boolean"
+                "data": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/v1.Model"
+                    }
                 },
-                "severity": {
+                "object": {
                     "type": "string"
                 }
             }
diff --git a/apps/jan-api-gateway/application/docs/swagger.yaml b/apps/jan-api-gateway/application/docs/swagger.yaml
index 029d9c9c..5f0a97c8 100644
--- a/apps/jan-api-gateway/application/docs/swagger.yaml
+++ b/apps/jan-api-gateway/application/docs/swagger.yaml
@@ -1,26 +1,41 @@
 basePath: /
 definitions:
-  app_interfaces_http_routes_v1.Model:
+  apikeys.ApiKeyCreateResponse:
     properties:
-      created:
-        type: integer
+      result:
+        $ref: '#/definitions/apikeys.ApiKeyResponse'
+      status:
+        type: string
+    type: object
+  apikeys.ApiKeyResponse:
+    properties:
+      apikeyType:
+        type: string
+      description:
+        type: string
+      enabled:
+        type: boolean
+      expiresAt:
+        type: string
       id:
         type: string
-      object:
+      key:
         type: string
-      owned_by:
+      last_usedAt:
+        type: string
+      permissions:
+        type: string
+      plaintextHint:
         type: string
     type: object
-  app_interfaces_http_routes_v1.ModelsResponse:
+  apikeys.CreateApiKeyRequest:
     properties:
-      data:
-        items:
-          $ref: '#/definitions/app_interfaces_http_routes_v1.Model'
-        type: array
-      object:
+      description:
+        type: string
+      expiresAt:
         type: string
     type: object
-  app_interfaces_http_routes_v1_auth.AccessTokenResponse:
+  auth.AccessTokenResponse:
     properties:
       access_token:
         type: string
@@ -29,7 +44,7 @@ definitions:
       object:
         type: string
     type: object
-  app_interfaces_http_routes_v1_auth.GetMeResponse:
+  auth.GetMeResponse:
     properties:
       email:
         type: string
@@ -40,29 +55,20 @@ definitions:
       object:
         type: string
     type: object
-  app_interfaces_http_routes_v1_auth_google.AccessTokenResponse:
+  chat.ChatCompletionChoice:
     properties:
-      access_token:
+      finish_reason:
         type: string
-      expires_in:
+      index:
         type: integer
-      object:
-        type: string
-    type: object
-  app_interfaces_http_routes_v1_auth_google.GoogleCallbackRequest:
-    properties:
-      code:
-        type: string
-      state:
-        type: string
-    required:
-    - code
+      message:
+        $ref: '#/definitions/chat.Message'
     type: object
-  app_interfaces_http_routes_v1_chat.ChatCompletionResponseSwagger:
+  chat.ChatCompletionResponseSwagger:
     properties:
       choices:
         items:
-          $ref: '#/definitions/openai.ChatCompletionChoice'
+          $ref: '#/definitions/chat.ChatCompletionChoice'
         type: array
       created:
         type: integer
@@ -73,9 +79,48 @@ definitions:
       object:
         type: string
       usage:
-        $ref: '#/definitions/openai.Usage'
+        $ref: '#/definitions/chat.Usage'
+    type: object
+  chat.Message:
+    properties:
+      content:
+        type: string
+      role:
+        type: string
+    type: object
+  chat.PostChatCompletionRequest:
+    properties:
+      max_tokens:
+        type: integer
+      messages:
+        items:
+          $ref: '#/definitions/chat.Message'
+        type: array
+      model:
+        type: string
+      temperature:
+        type: number
     type: object
-  app_interfaces_http_routes_v1_conversations.AnnotationResponse:
+  chat.Usage:
+    properties:
+      completion_tokens:
+        type: integer
+      prompt_tokens:
+        type: integer
+      total_tokens:
+        type: integer
+    type: object
+  conversation.ItemRole:
+    enum:
+    - system
+    - user
+    - assistant
+    type: string
+    x-enum-varnames:
+    - ItemRoleSystem
+    - ItemRoleUser
+    - ItemRoleAssistant
+  conversations.AnnotationResponse:
     properties:
       end_index:
         type: integer
@@ -92,22 +137,22 @@ definitions:
       url:
         type: string
     type: object
-  app_interfaces_http_routes_v1_conversations.ContentResponse:
+  conversations.ContentResponse:
     properties:
       file:
-        $ref: '#/definitions/app_interfaces_http_routes_v1_conversations.FileContentResponse'
+        $ref: '#/definitions/conversations.FileContentResponse'
       image:
-        $ref: '#/definitions/app_interfaces_http_routes_v1_conversations.ImageContentResponse'
+        $ref: '#/definitions/conversations.ImageContentResponse'
       input_text:
         type: string
       output_text:
-        $ref: '#/definitions/app_interfaces_http_routes_v1_conversations.OutputTextResponse'
+        $ref: '#/definitions/conversations.OutputTextResponse'
       text:
-        $ref: '#/definitions/app_interfaces_http_routes_v1_conversations.TextResponse'
+        $ref: '#/definitions/conversations.TextResponse'
       type:
         type: string
     type: object
-  app_interfaces_http_routes_v1_conversations.ConversationContentRequest:
+  conversations.ConversationContentRequest:
     properties:
       text:
         type: string
@@ -116,11 +161,11 @@ definitions:
     required:
     - type
     type: object
-  app_interfaces_http_routes_v1_conversations.ConversationItemListResponse:
+  conversations.ConversationItemListResponse:
     properties:
       data:
         items:
-          $ref: '#/definitions/app_interfaces_http_routes_v1_conversations.ConversationItemResponse'
+          $ref: '#/definitions/conversations.ConversationItemResponse'
         type: array
       first_id:
         type: string
@@ -131,25 +176,25 @@ definitions:
       object:
         type: string
     type: object
-  app_interfaces_http_routes_v1_conversations.ConversationItemRequest:
+  conversations.ConversationItemRequest:
     properties:
       content:
         items:
-          $ref: '#/definitions/app_interfaces_http_routes_v1_conversations.ConversationContentRequest'
+          $ref: '#/definitions/conversations.ConversationContentRequest'
         type: array
       role:
-        $ref: '#/definitions/menlo_ai_jan-api-gateway_app_domain_conversation.ItemRole'
+        $ref: '#/definitions/conversation.ItemRole'
       type:
         type: string
     required:
     - content
     - type
     type: object
-  app_interfaces_http_routes_v1_conversations.ConversationItemResponse:
+  conversations.ConversationItemResponse:
     properties:
       content:
         items:
-          $ref: '#/definitions/app_interfaces_http_routes_v1_conversations.ContentResponse'
+          $ref: '#/definitions/conversations.ContentResponse'
         type: array
       created_at:
         type: integer
@@ -164,7 +209,7 @@ definitions:
       type:
         type: string
     type: object
-  app_interfaces_http_routes_v1_conversations.ConversationResponse:
+  conversations.ConversationResponse:
     properties:
       created_at:
         type: integer
@@ -177,11 +222,11 @@ definitions:
       object:
         type: string
     type: object
-  app_interfaces_http_routes_v1_conversations.CreateConversationRequest:
+  conversations.CreateConversationRequest:
     properties:
       items:
         items:
-          $ref: '#/definitions/app_interfaces_http_routes_v1_conversations.ConversationItemRequest'
+          $ref: '#/definitions/conversations.ConversationItemRequest'
         type: array
       metadata:
         additionalProperties:
@@ -190,16 +235,16 @@ definitions:
       title:
         type: string
     type: object
-  app_interfaces_http_routes_v1_conversations.CreateItemsRequest:
+  conversations.CreateItemsRequest:
     properties:
       items:
         items:
-          $ref: '#/definitions/app_interfaces_http_routes_v1_conversations.ConversationItemRequest'
+          $ref: '#/definitions/conversations.ConversationItemRequest'
         type: array
     required:
     - items
     type: object
-  app_interfaces_http_routes_v1_conversations.DeletedConversationResponse:
+  conversations.DeletedConversationResponse:
     properties:
       deleted:
         type: boolean
@@ -208,7 +253,7 @@ definitions:
       object:
         type: string
     type: object
-  app_interfaces_http_routes_v1_conversations.FileContentResponse:
+  conversations.FileContentResponse:
     properties:
       file_id:
         type: string
@@ -219,7 +264,7 @@ definitions:
       size:
         type: integer
     type: object
-  app_interfaces_http_routes_v1_conversations.ImageContentResponse:
+  conversations.ImageContentResponse:
     properties:
       detail:
         type: string
@@ -228,21 +273,55 @@ definitions:
       url:
         type: string
     type: object
-  app_interfaces_http_routes_v1_conversations.OutputTextResponse:
+  conversations.ListResponse-conversations_ConversationItemResponse:
+    properties:
+      data:
+        items:
+          $ref: '#/definitions/conversations.ConversationItemResponse'
+        type: array
+      first_id:
+        type: string
+      has_more:
+        type: boolean
+      last_id:
+        type: string
+      object:
+        type: string
+      total:
+        type: integer
+    type: object
+  conversations.ListResponse-conversations_ConversationResponse:
+    properties:
+      data:
+        items:
+          $ref: '#/definitions/conversations.ConversationResponse'
+        type: array
+      first_id:
+        type: string
+      has_more:
+        type: boolean
+      last_id:
+        type: string
+      object:
+        type: string
+      total:
+        type: integer
+    type: object
+  conversations.OutputTextResponse:
     properties:
       annotations:
         items:
-          $ref: '#/definitions/app_interfaces_http_routes_v1_conversations.AnnotationResponse'
+          $ref: '#/definitions/conversations.AnnotationResponse'
         type: array
       text:
         type: string
     type: object
-  app_interfaces_http_routes_v1_conversations.TextResponse:
+  conversations.TextResponse:
     properties:
       value:
         type: string
     type: object
-  app_interfaces_http_routes_v1_conversations.UpdateConversationRequest:
+  conversations.UpdateConversationRequest:
     properties:
       metadata:
         additionalProperties:
@@ -251,7 +330,25 @@ definitions:
       title:
         type: string
     type: object
-  app_interfaces_http_routes_v1_organization.AdminAPIKeyDeletedResponse:
+  google.AccessTokenResponse:
+    properties:
+      access_token:
+        type: string
+      expires_in:
+        type: integer
+      object:
+        type: string
+    type: object
+  google.GoogleCallbackRequest:
+    properties:
+      code:
+        type: string
+      state:
+        type: string
+    required:
+    - code
+    type: object
+  organization.AdminAPIKeyDeletedResponse:
     properties:
       deleted:
         type: boolean
@@ -260,11 +357,11 @@ definitions:
       object:
         type: string
     type: object
-  app_interfaces_http_routes_v1_organization.AdminApiKeyListResponse:
+  organization.AdminApiKeyListResponse:
     properties:
       data:
         items:
-          $ref: '#/definitions/app_interfaces_http_routes_v1_organization.OrganizationAdminAPIKeyResponse'
+          $ref: '#/definitions/organization.OrganizationAdminAPIKeyResponse'
         type: array
       first_id:
         type: string
@@ -276,7 +373,7 @@ definitions:
         example: list
         type: string
     type: object
-  app_interfaces_http_routes_v1_organization.CreateOrganizationAdminAPIKeyRequest:
+  organization.CreateOrganizationAdminAPIKeyRequest:
     properties:
       name:
         example: My Admin API Key
@@ -284,7 +381,7 @@ definitions:
     required:
     - name
     type: object
-  app_interfaces_http_routes_v1_organization.OrganizationAdminAPIKeyResponse:
+  organization.OrganizationAdminAPIKeyResponse:
     properties:
       created_at:
         example: 1698765432
@@ -302,7 +399,7 @@ definitions:
         example: api_key
         type: string
       owner:
-        $ref: '#/definitions/app_interfaces_http_routes_v1_organization.Owner'
+        $ref: '#/definitions/organization.Owner'
       redacted_value:
         example: sk-...abcd
         type: string
@@ -310,7 +407,7 @@ definitions:
         example: sk-abcdef1234567890
         type: string
     type: object
-  app_interfaces_http_routes_v1_organization.Owner:
+  organization.Owner:
     properties:
       created_at:
         example: 1698765432
@@ -331,7 +428,7 @@ definitions:
         example: user
         type: string
     type: object
-  app_interfaces_http_routes_v1_organization_projects.CreateProjectRequest:
+  projects.CreateProjectRequest:
     properties:
       name:
         example: New AI Project
@@ -339,11 +436,11 @@ definitions:
     required:
     - name
     type: object
-  app_interfaces_http_routes_v1_organization_projects.ProjectListResponse:
+  projects.ProjectListResponse:
     properties:
       data:
         items:
-          $ref: '#/definitions/app_interfaces_http_routes_v1_organization_projects.ProjectResponse'
+          $ref: '#/definitions/projects.ProjectResponse'
         type: array
       first_id:
         type: string
@@ -355,7 +452,7 @@ definitions:
         example: list
         type: string
     type: object
-  app_interfaces_http_routes_v1_organization_projects.ProjectResponse:
+  projects.ProjectResponse:
     properties:
       archived_at:
         example: 1698765432
@@ -375,576 +472,61 @@ definitions:
       status:
         type: string
     type: object
-  app_interfaces_http_routes_v1_organization_projects.UpdateProjectRequest:
+  projects.UpdateProjectRequest:
     properties:
       name:
         example: Updated AI Project
         type: string
     type: object
-  app_interfaces_http_routes_v1_organization_projects_api_keys.ApiKeyResponse:
+  responses.CreateResponseRequest:
     properties:
-      apikeyType:
-        type: string
-      description:
-        type: string
-      enabled:
-        type: boolean
-      expiresAt:
-        type: string
-      id:
-        type: string
-      key:
-        type: string
-      last_usedAt:
-        type: string
-      permissions:
-        type: string
-      plaintextHint:
-        type: string
-    type: object
-  app_interfaces_http_routes_v1_organization_projects_api_keys.CreateApiKeyRequest:
-    properties:
-      description:
-        type: string
-      expiresAt:
-        type: string
-    type: object
-  menlo_ai_jan-api-gateway_app_domain_conversation.ItemRole:
-    enum:
-    - system
-    - user
-    - assistant
-    type: string
-    x-enum-varnames:
-    - ItemRoleSystem
-    - ItemRoleUser
-    - ItemRoleAssistant
-  menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse:
-    properties:
-      code:
-        type: string
-      error:
-        type: string
-    type: object
-  ? menlo_ai_jan-api-gateway_app_interfaces_http_responses.GeneralResponse-app_interfaces_http_routes_v1_organization_projects_api_keys_ApiKeyResponse
-  : properties:
-      result:
-        $ref: '#/definitions/app_interfaces_http_routes_v1_organization_projects_api_keys.ApiKeyResponse'
-      status:
-        type: string
-    type: object
-  ? menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.ListResponse-app_interfaces_http_routes_v1_conversations_ConversationItemResponse
-  : properties:
-      data:
-        items:
-          $ref: '#/definitions/app_interfaces_http_routes_v1_conversations.ConversationItemResponse'
-        type: array
-      first_id:
-        type: string
-      has_more:
-        type: boolean
-      last_id:
-        type: string
-      object:
-        $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.ObjectTypeList'
-      total:
-        type: integer
-    type: object
-  ? menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.ListResponse-app_interfaces_http_routes_v1_conversations_ConversationResponse
-  : properties:
-      data:
-        items:
-          $ref: '#/definitions/app_interfaces_http_routes_v1_conversations.ConversationResponse'
-        type: array
-      first_id:
-        type: string
-      has_more:
-        type: boolean
-      last_id:
-        type: string
-      object:
-        $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.ObjectTypeList'
-      total:
-        type: integer
-    type: object
-  menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.ObjectTypeList:
-    enum:
-    - list
-    type: string
-    x-enum-varnames:
-    - ObjectTypeListList
-  openai.ChatCompletionChoice:
-    properties:
-      content_filter_results:
-        $ref: '#/definitions/openai.ContentFilterResults'
-      finish_reason:
-        allOf:
-        - $ref: '#/definitions/openai.FinishReason'
-        description: |-
-          FinishReason
-          stop: API returned complete message,
-          or a message terminated by one of the stop sequences provided via the stop parameter
-          length: Incomplete model output due to max_tokens parameter or token limit
-          function_call: The model decided to call a function
-          content_filter: Omitted content due to a flag from our content filters
-          null: API response still in progress or incomplete
-      index:
-        type: integer
-      logprobs:
-        $ref: '#/definitions/openai.LogProbs'
-      message:
-        $ref: '#/definitions/openai.ChatCompletionMessage'
-    type: object
-  openai.ChatCompletionMessage:
-    properties:
-      content:
-        type: string
-      function_call:
-        $ref: '#/definitions/openai.FunctionCall'
-      multiContent:
-        items:
-          $ref: '#/definitions/openai.ChatMessagePart'
-        type: array
-      name:
-        description: |-
-          This property isn't in the official documentation, but it's in
-          the documentation for the official library for python:
-          - https://github.com/openai/openai-python/blob/main/chatml.md
-          - https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
-        type: string
-      reasoning_content:
-        description: |-
-          This property is used for the "reasoning" feature supported by deepseek-reasoner
-          which is not in the official documentation.
-          the doc from deepseek:
-          - https://api-docs.deepseek.com/api/create-chat-completion#responses
-        type: string
-      refusal:
-        type: string
-      role:
-        type: string
-      tool_call_id:
-        description: For Role=tool prompts this should be set to the ID given in the
-          assistant's prior request to call a tool.
-        type: string
-      tool_calls:
-        description: For Role=assistant prompts this may be set to the tool calls
-          generated by the model, such as function calls.
-        items:
-          $ref: '#/definitions/openai.ToolCall'
-        type: array
-    type: object
-  openai.ChatCompletionRequest:
-    properties:
-      chat_template_kwargs:
-        additionalProperties: {}
-        description: |-
-          ChatTemplateKwargs provides a way to add non-standard parameters to the request body.
-          Additional kwargs to pass to the template renderer. Will be accessible by the chat template.
-          Such as think mode for qwen3. "chat_template_kwargs": {"enable_thinking": false}
-          https://qwen.readthedocs.io/en/latest/deployment/vllm.html#thinking-non-thinking-modes
+      generation:
+        additionalProperties: true
         type: object
-      frequency_penalty:
-        type: number
-      function_call:
-        description: 'Deprecated: use ToolChoice instead.'
-      functions:
-        description: 'Deprecated: use Tools instead.'
-        items:
-          $ref: '#/definitions/openai.FunctionDefinition'
-        type: array
-      guided_choice:
-        description: |-
-          GuidedChoice is a vLLM-specific extension that restricts the model's output
-          to one of the predefined string choices provided in this field. This feature
-          is used to constrain the model's responses to a controlled set of options,
-          ensuring predictable and consistent outputs in scenarios where specific
-          choices are required.
-        items:
-          type: string
-        type: array
-      logit_bias:
-        additionalProperties:
-          type: integer
-        description: |-
-          LogitBias is must be a token id string (specified by their token ID in the tokenizer), not a word string.
-          incorrect: `"logit_bias":{"You": 6}`, correct: `"logit_bias":{"1639": 6}`
-          refs: https://platform.openai.com/docs/api-reference/chat/create#chat/create-logit_bias
+      input:
+        additionalProperties: true
         type: object
-      logprobs:
-        description: |-
-          LogProbs indicates whether to return log probabilities of the output tokens or not.
-          If true, returns the log probabilities of each output token returned in the content of message.
-          This option is currently not available on the gpt-4-vision-preview model.
-        type: boolean
-      max_completion_tokens:
-        description: |-
-          MaxCompletionTokens An upper bound for the number of tokens that can be generated for a completion,
-          including visible output tokens and reasoning tokens https://platform.openai.com/docs/guides/reasoning
-        type: integer
       max_tokens:
-        description: |-
-          MaxTokens The maximum number of tokens that can be generated in the chat completion.
-          This value can be used to control costs for text generated via API.
-          Deprecated: use MaxCompletionTokens. Not compatible with o1-series models.
-          refs: https://platform.openai.com/docs/api-reference/chat/create#chat-create-max_tokens
+        example: 1000
         type: integer
-      messages:
-        items:
-          $ref: '#/definitions/openai.ChatCompletionMessage'
-        type: array
-      metadata:
-        additionalProperties:
-          type: string
-        description: Metadata to store with the completion.
-        type: object
       model:
+        example: gpt-4
         type: string
-      "n":
-        type: integer
-      parallel_tool_calls:
-        description: 'Disable the default behavior of parallel tool calls by setting
-          it: false.'
-      prediction:
-        allOf:
-        - $ref: '#/definitions/openai.Prediction'
-        description: Configuration for a predicted output.
-      presence_penalty:
-        type: number
-      reasoning_effort:
-        description: Controls effort on reasoning for reasoning models. It can be
-          set to "low", "medium", or "high".
-        type: string
-      response_format:
-        $ref: '#/definitions/openai.ChatCompletionResponseFormat'
-      safety_identifier:
-        description: |-
-          A stable identifier used to help detect users of your application that may be violating OpenAI's usage policies.
-          The IDs should be a string that uniquely identifies each user.
-          We recommend hashing their username or email address, in order to avoid sending us any identifying information.
-          https://platform.openai.com/docs/api-reference/chat/create#chat_create-safety_identifier
-        type: string
-      seed:
-        type: integer
-      service_tier:
-        allOf:
-        - $ref: '#/definitions/openai.ServiceTier'
-        description: Specifies the latency tier to use for processing the request.
-      stop:
-        items:
-          type: string
-        type: array
-      store:
-        description: |-
-          Store can be set to true to store the output of this completion request for use in distillations and evals.
-          https://platform.openai.com/docs/api-reference/chat/create#chat-create-store
-        type: boolean
       stream:
+        example: false
         type: boolean
-      stream_options:
-        allOf:
-        - $ref: '#/definitions/openai.StreamOptions'
-        description: 'Options for streaming response. Only set this when you set stream:
-          true.'
       temperature:
+        example: 0.7
         type: number
-      tool_choice:
-        description: This can be either a string or an ToolChoice object.
-      tools:
-        items:
-          $ref: '#/definitions/openai.Tool'
-        type: array
-      top_logprobs:
-        description: |-
-          TopLogProbs is an integer between 0 and 5 specifying the number of most likely tokens to return at each
-          token position, each with an associated log probability.
-          logprobs must be set to true if this parameter is used.
-        type: integer
-      top_p:
-        type: number
-      user:
-        type: string
-    type: object
-  openai.ChatCompletionResponseFormat:
-    properties:
-      json_schema:
-        $ref: '#/definitions/openai.ChatCompletionResponseFormatJSONSchema'
-      type:
-        $ref: '#/definitions/openai.ChatCompletionResponseFormatType'
-    type: object
-  openai.ChatCompletionResponseFormatJSONSchema:
-    properties:
-      description:
-        type: string
-      name:
-        type: string
-      schema: {}
-      strict:
-        type: boolean
-    type: object
-  openai.ChatCompletionResponseFormatType:
-    enum:
-    - json_object
-    - json_schema
-    - text
-    type: string
-    x-enum-varnames:
-    - ChatCompletionResponseFormatTypeJSONObject
-    - ChatCompletionResponseFormatTypeJSONSchema
-    - ChatCompletionResponseFormatTypeText
-  openai.ChatMessageImageURL:
-    properties:
-      detail:
-        $ref: '#/definitions/openai.ImageURLDetail'
-      url:
-        type: string
-    type: object
-  openai.ChatMessagePart:
-    properties:
-      image_url:
-        $ref: '#/definitions/openai.ChatMessageImageURL'
-      text:
-        type: string
-      type:
-        $ref: '#/definitions/openai.ChatMessagePartType'
-    type: object
-  openai.ChatMessagePartType:
-    enum:
-    - text
-    - image_url
-    type: string
-    x-enum-varnames:
-    - ChatMessagePartTypeText
-    - ChatMessagePartTypeImageURL
-  openai.CompletionTokensDetails:
-    properties:
-      accepted_prediction_tokens:
-        type: integer
-      audio_tokens:
-        type: integer
-      reasoning_tokens:
-        type: integer
-      rejected_prediction_tokens:
-        type: integer
-    type: object
-  openai.ContentFilterResults:
-    properties:
-      hate:
-        $ref: '#/definitions/openai.Hate'
-      jailbreak:
-        $ref: '#/definitions/openai.JailBreak'
-      profanity:
-        $ref: '#/definitions/openai.Profanity'
-      self_harm:
-        $ref: '#/definitions/openai.SelfHarm'
-      sexual:
-        $ref: '#/definitions/openai.Sexual'
-      violence:
-        $ref: '#/definitions/openai.Violence'
-    type: object
-  openai.FinishReason:
-    enum:
-    - stop
-    - length
-    - function_call
-    - tool_calls
-    - content_filter
-    - "null"
-    type: string
-    x-enum-varnames:
-    - FinishReasonStop
-    - FinishReasonLength
-    - FinishReasonFunctionCall
-    - FinishReasonToolCalls
-    - FinishReasonContentFilter
-    - FinishReasonNull
-  openai.FunctionCall:
-    properties:
-      arguments:
-        description: call function with arguments in JSON format
-        type: string
-      name:
-        type: string
-    type: object
-  openai.FunctionDefinition:
-    properties:
-      description:
-        type: string
-      name:
-        type: string
-      parameters:
-        description: |-
-          Parameters is an object describing the function.
-          You can pass json.RawMessage to describe the schema,
-          or you can pass in a struct which serializes to the proper JSON schema.
-          The jsonschema package is provided for convenience, but you should
-          consider another specialized library if you require more complex schemas.
-      strict:
-        type: boolean
-    type: object
-  openai.Hate:
-    properties:
-      filtered:
-        type: boolean
-      severity:
-        type: string
-    type: object
-  openai.ImageURLDetail:
-    enum:
-    - high
-    - low
-    - auto
-    type: string
-    x-enum-varnames:
-    - ImageURLDetailHigh
-    - ImageURLDetailLow
-    - ImageURLDetailAuto
-  openai.JailBreak:
-    properties:
-      detected:
-        type: boolean
-      filtered:
-        type: boolean
-    type: object
-  openai.LogProb:
-    properties:
-      bytes:
-        description: Omitting the field if it is null
-        items:
-          type: integer
-        type: array
-      logprob:
-        type: number
-      token:
-        type: string
-      top_logprobs:
-        description: |-
-          TopLogProbs is a list of the most likely tokens and their log probability, at this token position.
-          In rare cases, there may be fewer than the number of requested top_logprobs returned.
-        items:
-          $ref: '#/definitions/openai.TopLogProbs'
-        type: array
-    type: object
-  openai.LogProbs:
-    properties:
-      content:
-        description: Content is a list of message content tokens with log probability
-          information.
-        items:
-          $ref: '#/definitions/openai.LogProb'
-        type: array
+    required:
+    - input
+    - model
     type: object
-  openai.Prediction:
+  responses.ErrorResponse:
     properties:
-      content:
+      code:
         type: string
-      type:
+      error:
         type: string
     type: object
-  openai.Profanity:
+  v1.Model:
     properties:
-      detected:
-        type: boolean
-      filtered:
-        type: boolean
-    type: object
-  openai.PromptTokensDetails:
-    properties:
-      audio_tokens:
-        type: integer
-      cached_tokens:
+      created:
         type: integer
-    type: object
-  openai.SelfHarm:
-    properties:
-      filtered:
-        type: boolean
-      severity:
+      id:
         type: string
-    type: object
-  openai.ServiceTier:
-    enum:
-    - auto
-    - default
-    - flex
-    - priority
-    type: string
-    x-enum-varnames:
-    - ServiceTierAuto
-    - ServiceTierDefault
-    - ServiceTierFlex
-    - ServiceTierPriority
-  openai.Sexual:
-    properties:
-      filtered:
-        type: boolean
-      severity:
+      object:
         type: string
-    type: object
-  openai.StreamOptions:
-    properties:
-      include_usage:
-        description: |-
-          If set, an additional chunk will be streamed before the data: [DONE] message.
-          The usage field on this chunk shows the token usage statistics for the entire request,
-          and the choices field will always be an empty array.
-          All other chunks will also include a usage field, but with a null value.
-        type: boolean
-    type: object
-  openai.Tool:
-    properties:
-      function:
-        $ref: '#/definitions/openai.FunctionDefinition'
-      type:
-        $ref: '#/definitions/openai.ToolType'
-    type: object
-  openai.ToolCall:
-    properties:
-      function:
-        $ref: '#/definitions/openai.FunctionCall'
-      id:
+      owned_by:
         type: string
-      index:
-        description: Index is not nil only in chat completion chunk object
-        type: integer
-      type:
-        $ref: '#/definitions/openai.ToolType'
     type: object
-  openai.ToolType:
-    enum:
-    - function
-    type: string
-    x-enum-varnames:
-    - ToolTypeFunction
-  openai.TopLogProbs:
+  v1.ModelsResponse:
     properties:
-      bytes:
+      data:
         items:
-          type: integer
+          $ref: '#/definitions/v1.Model'
         type: array
-      logprob:
-        type: number
-      token:
-        type: string
-    type: object
-  openai.Usage:
-    properties:
-      completion_tokens:
-        type: integer
-      completion_tokens_details:
-        $ref: '#/definitions/openai.CompletionTokensDetails'
-      prompt_tokens:
-        type: integer
-      prompt_tokens_details:
-        $ref: '#/definitions/openai.PromptTokensDetails'
-      total_tokens:
-        type: integer
-    type: object
-  openai.Violence:
-    properties:
-      filtered:
-        type: boolean
-      severity:
+      object:
         type: string
     type: object
 info:
@@ -966,28 +548,28 @@ paths:
         name: request
         required: true
         schema:
-          $ref: '#/definitions/app_interfaces_http_routes_v1_auth_google.GoogleCallbackRequest'
+          $ref: '#/definitions/google.GoogleCallbackRequest'
       produces:
       - application/json
       responses:
         "200":
           description: Successfully authenticated and returned tokens
           schema:
-            $ref: '#/definitions/app_interfaces_http_routes_v1_auth_google.AccessTokenResponse'
+            $ref: '#/definitions/google.AccessTokenResponse'
         "400":
           description: Bad request (e.g., invalid state, missing code, or invalid
             claims)
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
         "401":
           description: Unauthorized (e.g., a user claim is not found or is invalid
             in the context)
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
         "500":
           description: Internal Server Error
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
       summary: Google OAuth2 Callback
       tags:
       - Authentication
@@ -1001,7 +583,7 @@ paths:
         "500":
           description: Internal Server Error
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
       summary: Google OAuth2 Login
       tags:
       - Authentication
@@ -1014,15 +596,15 @@ paths:
         "200":
           description: Successfully refreshed the access token
           schema:
-            $ref: '#/definitions/app_interfaces_http_routes_v1_auth.AccessTokenResponse'
+            $ref: '#/definitions/auth.AccessTokenResponse'
         "400":
           description: Bad Request (e.g., invalid refresh token)
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
         "401":
           description: Unauthorized (e.g., expired or missing refresh token)
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
       summary: Guest Login
       tags:
       - Authentication
@@ -1040,11 +622,11 @@ paths:
         "400":
           description: Bad Request (e.g., invalid refresh token)
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
         "401":
           description: Unauthorized (e.g., expired or missing refresh token)
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
       summary: Refresh an access token
       tags:
       - Authentication
@@ -1058,11 +640,11 @@ paths:
         "200":
           description: Successfully retrieved user profile
           schema:
-            $ref: '#/definitions/app_interfaces_http_routes_v1_auth.GetMeResponse'
+            $ref: '#/definitions/auth.GetMeResponse'
         "401":
           description: Unauthorized (e.g., missing or invalid JWT)
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
       security:
       - BearerAuth: []
       summary: Get user profile
@@ -1080,15 +662,15 @@ paths:
         "200":
           description: Successfully refreshed the access token
           schema:
-            $ref: '#/definitions/app_interfaces_http_routes_v1_auth.AccessTokenResponse'
+            $ref: '#/definitions/auth.AccessTokenResponse'
         "400":
           description: Bad Request (e.g., invalid refresh token)
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
         "401":
           description: Unauthorized (e.g., expired or missing refresh token)
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
       summary: Refresh an access token
       tags:
       - Authentication
@@ -1103,26 +685,26 @@ paths:
         name: request
         required: true
         schema:
-          $ref: '#/definitions/openai.ChatCompletionRequest'
+          $ref: '#/definitions/chat.PostChatCompletionRequest'
       produces:
       - application/json
       responses:
         "200":
           description: Successful response
           schema:
-            $ref: '#/definitions/app_interfaces_http_routes_v1_chat.ChatCompletionResponseSwagger'
+            $ref: '#/definitions/chat.ChatCompletionResponseSwagger'
         "400":
           description: Invalid request payload
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
         "401":
           description: Unauthorized
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
         "500":
           description: Internal server error
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
       security:
       - BearerAuth: []
       summary: Create a chat completion
@@ -1151,19 +733,19 @@ paths:
         "200":
           description: Successfully retrieved the list of conversations
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.ListResponse-app_interfaces_http_routes_v1_conversations_ConversationResponse'
+            $ref: '#/definitions/conversations.ListResponse-conversations_ConversationResponse'
         "400":
           description: Bad Request - Invalid pagination parameters
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
         "401":
           description: Unauthorized - invalid or missing API key
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
         "500":
           description: Internal Server Error
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
       security:
       - BearerAuth: []
       summary: List Conversations
@@ -1179,26 +761,26 @@ paths:
         name: request
         required: true
         schema:
-          $ref: '#/definitions/app_interfaces_http_routes_v1_conversations.CreateConversationRequest'
+          $ref: '#/definitions/conversations.CreateConversationRequest'
       produces:
       - application/json
       responses:
         "200":
           description: Created conversation
           schema:
-            $ref: '#/definitions/app_interfaces_http_routes_v1_conversations.ConversationResponse'
+            $ref: '#/definitions/conversations.ConversationResponse'
         "400":
           description: Invalid request
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
         "401":
           description: Unauthorized
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
         "500":
           description: Internal server error
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
       security:
       - BearerAuth: []
       summary: Create a conversation
@@ -1219,23 +801,23 @@ paths:
         "200":
           description: Deleted conversation
           schema:
-            $ref: '#/definitions/app_interfaces_http_routes_v1_conversations.DeletedConversationResponse'
+            $ref: '#/definitions/conversations.DeletedConversationResponse'
         "401":
           description: Unauthorized
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
         "403":
           description: Access denied
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
         "404":
           description: Conversation not found
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
         "500":
           description: Internal server error
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
       security:
       - BearerAuth: []
       summary: Delete a conversation
@@ -1255,23 +837,23 @@ paths:
         "200":
           description: Conversation details
           schema:
-            $ref: '#/definitions/app_interfaces_http_routes_v1_conversations.ConversationResponse'
+            $ref: '#/definitions/conversations.ConversationResponse'
         "401":
           description: Unauthorized
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
         "403":
           description: Access denied
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
         "404":
           description: Conversation not found
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
         "500":
           description: Internal server error
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
       security:
       - BearerAuth: []
       summary: Get a conversation
@@ -1292,34 +874,34 @@ paths:
         name: request
         required: true
         schema:
-          $ref: '#/definitions/app_interfaces_http_routes_v1_conversations.UpdateConversationRequest'
+          $ref: '#/definitions/conversations.UpdateConversationRequest'
       produces:
       - application/json
       responses:
         "200":
           description: Updated conversation
           schema:
-            $ref: '#/definitions/app_interfaces_http_routes_v1_conversations.ConversationResponse'
+            $ref: '#/definitions/conversations.ConversationResponse'
         "400":
           description: Invalid request
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
         "401":
           description: Unauthorized
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
         "403":
           description: Access denied
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
         "404":
           description: Conversation not found
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
         "500":
           description: Internal server error
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
       security:
       - BearerAuth: []
       summary: Update a conversation
@@ -1352,23 +934,23 @@ paths:
         "200":
           description: List of items
           schema:
-            $ref: '#/definitions/app_interfaces_http_routes_v1_conversations.ConversationItemListResponse'
+            $ref: '#/definitions/conversations.ConversationItemListResponse'
         "401":
           description: Unauthorized
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
         "403":
           description: Access denied
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
         "404":
           description: Conversation not found
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
         "500":
           description: Internal server error
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
       security:
       - BearerAuth: []
       summary: List items in a conversation
@@ -1389,34 +971,34 @@ paths:
         name: request
         required: true
         schema:
-          $ref: '#/definitions/app_interfaces_http_routes_v1_conversations.CreateItemsRequest'
+          $ref: '#/definitions/conversations.CreateItemsRequest'
       produces:
       - application/json
       responses:
         "200":
           description: Created items
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.ListResponse-app_interfaces_http_routes_v1_conversations_ConversationItemResponse'
+            $ref: '#/definitions/conversations.ListResponse-conversations_ConversationItemResponse'
         "400":
           description: Invalid request
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
         "401":
           description: Unauthorized
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
         "403":
           description: Access denied
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
         "404":
           description: Conversation not found
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
         "500":
           description: Internal server error
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
       security:
       - BearerAuth: []
       summary: Create items in a conversation
@@ -1442,23 +1024,23 @@ paths:
         "200":
           description: Updated conversation
           schema:
-            $ref: '#/definitions/app_interfaces_http_routes_v1_conversations.ConversationResponse'
+            $ref: '#/definitions/conversations.ConversationResponse'
         "401":
           description: Unauthorized
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
         "403":
           description: Access denied
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
         "404":
           description: Conversation not found
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
         "500":
           description: Internal server error
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
       security:
       - BearerAuth: []
       summary: Delete an item from a conversation
@@ -1483,23 +1065,23 @@ paths:
         "200":
           description: Item details
           schema:
-            $ref: '#/definitions/app_interfaces_http_routes_v1_conversations.ConversationItemResponse'
+            $ref: '#/definitions/conversations.ConversationItemResponse'
         "401":
           description: Unauthorized
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
         "403":
           description: Access denied
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
         "404":
           description: Conversation not found
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
         "500":
           description: Internal server error
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
       security:
       - BearerAuth: []
       summary: Get an item from a conversation
@@ -1541,7 +1123,7 @@ paths:
         "200":
           description: Successful response
           schema:
-            $ref: '#/definitions/app_interfaces_http_routes_v1.ModelsResponse'
+            $ref: '#/definitions/v1.ModelsResponse'
       security:
       - BearerAuth: []
       summary: List available models
@@ -1566,15 +1148,15 @@ paths:
         "200":
           description: Successfully retrieved the list of admin API keys
           schema:
-            $ref: '#/definitions/app_interfaces_http_routes_v1_organization.AdminApiKeyListResponse'
+            $ref: '#/definitions/organization.AdminApiKeyListResponse'
         "401":
           description: Unauthorized - invalid or missing API key
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
         "500":
           description: Internal Server Error
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
       security:
       - BearerAuth: []
       summary: List Admin API Keys
@@ -1591,22 +1173,22 @@ paths:
         name: body
         required: true
         schema:
-          $ref: '#/definitions/app_interfaces_http_routes_v1_organization.CreateOrganizationAdminAPIKeyRequest'
+          $ref: '#/definitions/organization.CreateOrganizationAdminAPIKeyRequest'
       produces:
       - application/json
       responses:
         "200":
           description: Successfully created admin API key
           schema:
-            $ref: '#/definitions/app_interfaces_http_routes_v1_organization.OrganizationAdminAPIKeyResponse'
+            $ref: '#/definitions/organization.OrganizationAdminAPIKeyResponse'
         "400":
           description: Bad request - invalid payload
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
         "401":
           description: Unauthorized - invalid or missing API key
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
       security:
       - BearerAuth: []
       summary: Create Admin API Key
@@ -1625,16 +1207,16 @@ paths:
         "200":
           description: Successfully deleted the admin API key
           schema:
-            $ref: '#/definitions/app_interfaces_http_routes_v1_organization.AdminAPIKeyDeletedResponse'
+            $ref: '#/definitions/organization.AdminAPIKeyDeletedResponse'
         "401":
           description: Unauthorized - invalid or missing API key
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
         "404":
           description: Not Found - API key with the given ID does not exist or does
             not belong to the organization
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
       security:
       - BearerAuth: []
       summary: Delete Admin API Key
@@ -1652,16 +1234,16 @@ paths:
         "200":
           description: Successfully retrieved the admin API key
           schema:
-            $ref: '#/definitions/app_interfaces_http_routes_v1_organization.OrganizationAdminAPIKeyResponse'
+            $ref: '#/definitions/organization.OrganizationAdminAPIKeyResponse'
         "401":
           description: Unauthorized - invalid or missing API key
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
         "404":
           description: Not Found - API key with the given ID does not exist or does
             not belong to the organization
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
       security:
       - BearerAuth: []
       summary: Get Admin API Key
@@ -1690,15 +1272,15 @@ paths:
         "200":
           description: Successfully retrieved the list of projects
           schema:
-            $ref: '#/definitions/app_interfaces_http_routes_v1_organization_projects.ProjectListResponse'
+            $ref: '#/definitions/projects.ProjectListResponse'
         "401":
           description: Unauthorized - invalid or missing API key
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
         "500":
           description: Internal Server Error
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
       security:
       - BearerAuth: []
       summary: List Projects
@@ -1714,26 +1296,26 @@ paths:
         name: body
         required: true
         schema:
-          $ref: '#/definitions/app_interfaces_http_routes_v1_organization_projects.CreateProjectRequest'
+          $ref: '#/definitions/projects.CreateProjectRequest'
       produces:
       - application/json
       responses:
         "200":
           description: Successfully created project
           schema:
-            $ref: '#/definitions/app_interfaces_http_routes_v1_organization_projects.ProjectResponse'
+            $ref: '#/definitions/projects.ProjectResponse'
         "400":
           description: Bad request - invalid payload
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
         "401":
           description: Unauthorized - invalid or missing API key
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
         "500":
           description: Internal Server Error
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
       security:
       - BearerAuth: []
       summary: Create Project
@@ -1752,16 +1334,16 @@ paths:
         "200":
           description: Successfully retrieved the project
           schema:
-            $ref: '#/definitions/app_interfaces_http_routes_v1_organization_projects.ProjectResponse'
+            $ref: '#/definitions/projects.ProjectResponse'
         "401":
           description: Unauthorized - invalid or missing API key
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
         "404":
           description: Not Found - project with the given ID does not exist or does
             not belong to the organization
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
       security:
       - BearerAuth: []
       summary: Get Project
@@ -1782,26 +1364,26 @@ paths:
         name: body
         required: true
         schema:
-          $ref: '#/definitions/app_interfaces_http_routes_v1_organization_projects.UpdateProjectRequest'
+          $ref: '#/definitions/projects.UpdateProjectRequest'
       produces:
       - application/json
       responses:
         "200":
           description: Successfully updated the project
           schema:
-            $ref: '#/definitions/app_interfaces_http_routes_v1_organization_projects.ProjectResponse'
+            $ref: '#/definitions/projects.ProjectResponse'
         "400":
           description: Bad request - invalid payload
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
         "401":
           description: Unauthorized - invalid or missing API key
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
         "404":
           description: Not Found - project with the given ID does not exist
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
       security:
       - BearerAuth: []
       summary: Update Project
@@ -1820,15 +1402,15 @@ paths:
         "200":
           description: Successfully archived the project
           schema:
-            $ref: '#/definitions/app_interfaces_http_routes_v1_organization_projects.ProjectResponse'
+            $ref: '#/definitions/projects.ProjectResponse'
         "401":
           description: Unauthorized - invalid or missing API key
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
         "404":
           description: Not Found - project with the given ID does not exist
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
       security:
       - BearerAuth: []
       summary: Archive Project
@@ -1851,23 +1433,23 @@ paths:
         "200":
           description: API key created successfully
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.GeneralResponse-app_interfaces_http_routes_v1_organization_projects_api_keys_ApiKeyResponse'
+            $ref: '#/definitions/apikeys.ApiKeyCreateResponse'
         "400":
           description: Bad request, e.g., invalid payload or missing IDs
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
         "401":
           description: Unauthorized, e.g., invalid or missing token
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
         "404":
           description: Not Found, e.g., project or organization not found
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
         "500":
           description: Internal server error
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
       security:
       - BearerAuth: []
       summary: List new project API key
@@ -1888,35 +1470,333 @@ paths:
         name: requestBody
         required: true
         schema:
-          $ref: '#/definitions/app_interfaces_http_routes_v1_organization_projects_api_keys.CreateApiKeyRequest'
+          $ref: '#/definitions/apikeys.CreateApiKeyRequest'
       produces:
       - application/json
       responses:
         "200":
           description: API key created successfully
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.GeneralResponse-app_interfaces_http_routes_v1_organization_projects_api_keys_ApiKeyResponse'
+            $ref: '#/definitions/apikeys.ApiKeyCreateResponse'
         "400":
           description: Bad request, e.g., invalid payload or missing IDs
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
         "401":
           description: Unauthorized, e.g., invalid or missing token
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
         "404":
           description: Not Found, e.g., project or organization not found
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
         "500":
           description: Internal server error
           schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
+            $ref: '#/definitions/responses.ErrorResponse'
       security:
       - BearerAuth: []
       summary: Create a new project API key
       tags:
       - Organizations
+  /v1/responses:
+    post:
+      consumes:
+      - application/json
+      description: |-
+        Creates a new LLM response for the given input. Supports multiple input types including text, images, files, web search, and more.
+
+        **Supported Input Types:**
+        - `text`: Plain text input
+        - `image`: Image input (URL or base64)
+        - `file`: File input by file ID
+        - `web_search`: Web search input
+        - `file_search`: File search input
+        - `streaming`: Streaming input
+        - `function_calls`: Function calls input
+        - `reasoning`: Reasoning input
+
+        **Example Request:**
+        ```json
+        {
+        "model": "gpt-4",
+        "input": {
+        "type": "text",
+        "text": "Hello, how are you?"
+        },
+        "max_tokens": 100,
+        "temperature": 0.7,
+        "stream": false,
+        "background": false
+        }
+        ```
+
+        **Response Format:**
+        The response uses embedded structure where all fields are at the top level:
+        - `jan_status`: Jan API status code (optional)
+        - `id`: Response identifier
+        - `object`: Object type ("response")
+        - `created`: Unix timestamp
+        - `model`: Model used
+        - `status`: Response status
+        - `input`: Input data
+        - `output`: Generated output
+
+        **Example Response:**
+        ```json
+        {
+        "jan_status": "000000",
+        "id": "resp_1234567890",
+        "object": "response",
+        "created": 1234567890,
+        "model": "gpt-4",
+        "status": "completed",
+        "input": {
+        "type": "text",
+        "text": "Hello, how are you?"
+        },
+        "output": {
+        "type": "text",
+        "text": {
+        "value": "I'm doing well, thank you!"
+        }
+        }
+        }
+        ```
+
+        **Response Status:**
+        - `completed`: Response generation finished successfully
+        - `processing`: Response is being generated
+        - `failed`: Response generation failed
+        - `cancelled`: Response was cancelled
+      parameters:
+      - description: Request payload containing model, input, and generation parameters
+        in: body
+        name: request
+        required: true
+        schema:
+          $ref: '#/definitions/responses.CreateResponseRequest'
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: Successful response with embedded fields
+        "202":
+          description: Response accepted for background processing with embedded fields
+        "400":
+          description: Invalid request payload
+        "401":
+          description: Unauthorized
+        "422":
+          description: Validation error
+        "429":
+          description: Rate limit exceeded
+        "500":
+          description: Internal server error
+      security:
+      - BearerAuth: []
+      summary: Create a response
+      tags:
+      - Jan
+      - Jan-Responses
+  /v1/responses/{response_id}:
+    delete:
+      consumes:
+      - application/json
+      description: |-
+        Deletes an LLM response by its ID. Returns the deleted response object with embedded structure where all fields are at the top level.
+
+        **Response Format:**
+        The response uses embedded structure where all fields are at the top level:
+        - `jan_status`: Jan API status code (optional)
+        - `id`: Response identifier
+        - `object`: Object type ("response")
+        - `created`: Unix timestamp
+        - `model`: Model used
+        - `status`: Response status (will be "cancelled")
+        - `input`: Input data
+        - `cancelled_at`: Cancellation timestamp
+      parameters:
+      - description: Unique identifier of the response
+        in: path
+        name: response_id
+        required: true
+        type: string
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: Successful response with embedded fields
+        "400":
+          description: Invalid request
+        "401":
+          description: Unauthorized
+        "404":
+          description: Response not found
+        "500":
+          description: Internal server error
+      security:
+      - BearerAuth: []
+      summary: Delete a response
+      tags:
+      - Jan
+      - Jan-Responses
+    get:
+      consumes:
+      - application/json
+      description: |-
+        Retrieves an LLM response by its ID. Returns the complete response object with embedded structure where all fields are at the top level.
+
+        **Response Format:**
+        The response uses embedded structure where all fields are at the top level:
+        - `jan_status`: Jan API status code (optional)
+        - `id`: Response identifier
+        - `object`: Object type ("response")
+        - `created`: Unix timestamp
+        - `model`: Model used
+        - `status`: Response status
+        - `input`: Input data
+        - `output`: Generated output
+      parameters:
+      - description: Unique identifier of the response
+        in: path
+        name: response_id
+        required: true
+        type: string
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: Successful response with embedded fields
+        "400":
+          description: Invalid request
+        "401":
+          description: Unauthorized
+        "404":
+          description: Response not found
+        "500":
+          description: Internal server error
+      security:
+      - BearerAuth: []
+      summary: Get a response
+      tags:
+      - Jan
+      - Jan-Responses
+  /v1/responses/{response_id}/cancel:
+    post:
+      consumes:
+      - application/json
+      description: |-
+        Cancels a running LLM response that was created with background=true. Only responses that are currently processing can be cancelled.
+
+        **Response Format:**
+        The response uses embedded structure where all fields are at the top level:
+        - `jan_status`: Jan API status code (optional)
+        - `id`: Response identifier
+        - `object`: Object type ("response")
+        - `created`: Unix timestamp
+        - `model`: Model used
+        - `status`: Response status (will be "cancelled")
+        - `input`: Input data
+        - `cancelled_at`: Cancellation timestamp
+      parameters:
+      - description: Unique identifier of the response to cancel
+        in: path
+        name: response_id
+        required: true
+        type: string
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: Response cancelled successfully with embedded fields
+        "400":
+          description: Invalid request or response cannot be cancelled
+        "401":
+          description: Unauthorized
+        "404":
+          description: Response not found
+        "500":
+          description: Internal server error
+      security:
+      - BearerAuth: []
+      summary: Cancel a response
+      tags:
+      - Jan
+      - Jan-Responses
+  /v1/responses/{response_id}/input_items:
+    get:
+      consumes:
+      - application/json
+      description: |-
+        Retrieves a paginated list of input items for a response. Supports cursor-based pagination for efficient retrieval of large datasets.
+
+        **Response Format:**
+        The response uses embedded structure where all fields are at the top level:
+        - `jan_status`: Jan API status code (optional)
+        - `first_id`: First item ID for pagination (optional)
+        - `last_id`: Last item ID for pagination (optional)
+        - `has_more`: Whether more items are available (optional)
+        - `id`: Input item identifier
+        - `object`: Object type ("input_item")
+        - `created`: Unix timestamp
+        - `type`: Input type
+        - `text`: Text content (for text type)
+        - `image`: Image content (for image type)
+        - `file`: File content (for file type)
+
+        **Example Response:**
+        ```json
+        {
+        "jan_status": "000000",
+        "first_id": "input_123",
+        "last_id": "input_456",
+        "has_more": false,
+        "id": "input_1234567890",
+        "object": "input_item",
+        "created": 1234567890,
+        "type": "text",
+        "text": "Hello, world!"
+        }
+        ```
+      parameters:
+      - description: Unique identifier of the response
+        in: path
+        name: response_id
+        required: true
+        type: string
+      - description: 'Maximum number of items to return (default: 20, max: 100)'
+        in: query
+        name: limit
+        type: integer
+      - description: Cursor for pagination - return items after this ID
+        in: query
+        name: after
+        type: string
+      - description: Cursor for pagination - return items before this ID
+        in: query
+        name: before
+        type: string
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: Successful response with paginated input items and embedded
+            fields
+        "400":
+          description: Invalid request or pagination parameters
+        "401":
+          description: Unauthorized
+        "404":
+          description: Response not found
+        "500":
+          description: Internal server error
+      security:
+      - BearerAuth: []
+      summary: List input items
+      tags:
+      - Jan
+      - Jan-Responses
   /v1/version:
     get:
       description: Returns the current build version of the API server.