Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/dev-build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ concurrency:

on:
push:
branches: ['mobile-support'] # put your current branch to create a build. Core team only.
branches: ['upload-ui-ux'] # put your current branch to create a build. Core team only.
paths-ignore:
- '**.md'
- 'cloud-deployments/*'
Expand Down
33 changes: 33 additions & 0 deletions collector/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,39 @@ app.post(
}
);

app.post(
"/parse",
[verifyPayloadIntegrity],
async function (request, response) {
const { filename, options = {} } = reqBody(request);
try {
const targetFilename = path
.normalize(filename)
.replace(/^(\.\.(\/|\\|$))+/, "");
const {
success,
reason,
documents = [],
} = await processSingleFile(targetFilename, {
...options,
parseOnly: true,
});
response
.status(200)
.json({ filename: targetFilename, success, reason, documents });
} catch (e) {
console.error(e);
response.status(200).json({
filename: filename,
success: false,
reason: "A processing error occurred.",
documents: [],
});
}
return;
}
);

app.post(
"/process-link",
[verifyPayloadIntegrity],
Expand Down
1 change: 1 addition & 0 deletions collector/processSingleFile/convert/asAudio.js
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ async function asAudio({ fullFilePath = "", filename = "", options = {} }) {
const document = writeToServerDocuments({
data,
filename: `${slugify(filename)}-${data.id}`,
options: { parseOnly: options.parseOnly },
});
trashFile(fullFilePath);
console.log(
Expand Down
3 changes: 2 additions & 1 deletion collector/processSingleFile/convert/asDocx.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ const {
const { tokenizeString } = require("../../utils/tokenizer");
const { default: slugify } = require("slugify");

async function asDocX({ fullFilePath = "", filename = "" }) {
async function asDocX({ fullFilePath = "", filename = "", options = {} }) {
const loader = new DocxLoader(fullFilePath);

console.log(`-- Working ${filename} --`);
Expand Down Expand Up @@ -48,6 +48,7 @@ async function asDocX({ fullFilePath = "", filename = "" }) {
const document = writeToServerDocuments({
data,
filename: `${slugify(filename)}-${data.id}`,
options: { parseOnly: options.parseOnly },
});
trashFile(fullFilePath);
console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`);
Expand Down
3 changes: 2 additions & 1 deletion collector/processSingleFile/convert/asEPub.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ const {
} = require("../../utils/files");
const { default: slugify } = require("slugify");

async function asEPub({ fullFilePath = "", filename = "" }) {
async function asEPub({ fullFilePath = "", filename = "", options = {} }) {
let content = "";
try {
const loader = new EPubLoader(fullFilePath, { splitChapters: false });
Expand Down Expand Up @@ -46,6 +46,7 @@ async function asEPub({ fullFilePath = "", filename = "" }) {
const document = writeToServerDocuments({
data,
filename: `${slugify(filename)}-${data.id}`,
options: { parseOnly: options.parseOnly },
});
trashFile(fullFilePath);
console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`);
Expand Down
1 change: 1 addition & 0 deletions collector/processSingleFile/convert/asImage.js
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ async function asImage({ fullFilePath = "", filename = "", options = {} }) {
const document = writeToServerDocuments({
data,
filename: `${slugify(filename)}-${data.id}`,
options: { parseOnly: options.parseOnly },
});
trashFile(fullFilePath);
console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`);
Expand Down
1 change: 1 addition & 0 deletions collector/processSingleFile/convert/asMbox.js
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ async function asMbox({ fullFilePath = "", filename = "" }) {
const document = writeToServerDocuments({
data,
filename: `${slugify(filename)}-${data.id}-msg-${item}`,
options: { parseOnly: options.parseOnly },
});
documents.push(document);
}
Expand Down
1 change: 1 addition & 0 deletions collector/processSingleFile/convert/asOfficeMime.js
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ async function asOfficeMime({ fullFilePath = "", filename = "" }) {
const document = writeToServerDocuments({
data,
filename: `${slugify(filename)}-${data.id}`,
options: { parseOnly: options.parseOnly },
});
trashFile(fullFilePath);
console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`);
Expand Down
1 change: 1 addition & 0 deletions collector/processSingleFile/convert/asPDF/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ async function asPdf({ fullFilePath = "", filename = "", options = {} }) {
const document = writeToServerDocuments({
data,
filename: `${slugify(filename)}-${data.id}`,
options: { parseOnly: options.parseOnly },
});
trashFile(fullFilePath);
console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`);
Expand Down
3 changes: 2 additions & 1 deletion collector/processSingleFile/convert/asTxt.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ const {
} = require("../../utils/files");
const { default: slugify } = require("slugify");

async function asTxt({ fullFilePath = "", filename = "" }) {
async function asTxt({ fullFilePath = "", filename = "", options = {} }) {
let content = "";
try {
content = fs.readFileSync(fullFilePath, "utf8");
Expand Down Expand Up @@ -44,6 +44,7 @@ async function asTxt({ fullFilePath = "", filename = "" }) {
const document = writeToServerDocuments({
data,
filename: `${slugify(filename)}-${data.id}`,
options: { parseOnly: options.parseOnly },
});
trashFile(fullFilePath);
console.log(`[SUCCESS]: ${filename} converted & ready for embedding.\n`);
Expand Down
8 changes: 6 additions & 2 deletions collector/processSingleFile/convert/asXlsx.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ const {
trashFile,
writeToServerDocuments,
documentsFolder,
directUploadsFolder,
} = require("../../utils/files");
const { tokenizeString } = require("../../utils/tokenizer");
const { default: slugify } = require("slugify");
Expand All @@ -26,14 +27,16 @@ function convertToCSV(data) {
.join("\n");
}

async function asXlsx({ fullFilePath = "", filename = "" }) {
async function asXlsx({ fullFilePath = "", filename = "", options = {} }) {
const documents = [];
const folderName = slugify(`${path.basename(filename)}-${v4().slice(0, 4)}`, {
lower: true,
trim: true,
});
const outFolderPath = options.parseOnly
? path.resolve(directUploadsFolder, folderName)
: path.resolve(documentsFolder, folderName);

const outFolderPath = path.resolve(documentsFolder, folderName);
try {
const workSheetsFromFile = xlsx.parse(fullFilePath);
if (!fs.existsSync(outFolderPath))
Expand Down Expand Up @@ -68,6 +71,7 @@ async function asXlsx({ fullFilePath = "", filename = "" }) {
data: sheetData,
filename: `sheet-${slugify(name)}`,
destinationOverride: outFolderPath,
options: { parseOnly: options.parseOnly },
});
documents.push(document);
console.log(
Expand Down
18 changes: 17 additions & 1 deletion collector/utils/files/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,16 @@ const documentsFolder =
? path.resolve(__dirname, `../../../server/storage/documents`)
: path.resolve(process.env.STORAGE_DIR, `documents`);

/**
* The folder where direct uploads are stored to be stored when
* processed by the collector. These are files that were DnD'd into UI
* and are not to be embedded or selectable from the file picker.
*/
const directUploadsFolder =
process.env.NODE_ENV === "development"
? path.resolve(__dirname, `../../../server/storage/direct-uploads`)
: path.resolve(process.env.STORAGE_DIR, `direct-uploads`);

/**
* Checks if a file is text by checking the mime type and then falling back to buffer inspection.
* This way we can capture all the cases where the mime type is not known but still parseable as text
Expand Down Expand Up @@ -102,17 +112,21 @@ function createdDate(filepath) {
* @param {Object} params.data - The data to write to the file. Must look like a document object.
* @param {string} params.filename - The name of the file to write to.
* @param {string|null} params.destinationOverride - A forced destination to write to - will be honored if provided.
* @param {Object} params.options - The options for the function.
* @param {boolean} params.options.parseOnly - If true, the file will be written to the direct uploads folder instead of the documents folder. Will be ignored if destinationOverride is provided.
* @returns {Object} - The data with the location added.
*/
function writeToServerDocuments({
data = {},
filename = null,
filename,
destinationOverride = null,
options = {},
}) {
if (!filename) throw new Error("Filename is required!");

let destination = null;
if (destinationOverride) destination = path.resolve(destinationOverride);
else if (options.parseOnly) destination = path.resolve(directUploadsFolder);
else destination = path.resolve(documentsFolder, "custom-documents");

if (!fs.existsSync(destination))
Expand All @@ -129,6 +143,7 @@ function writeToServerDocuments({
// that will work since we know the location exists and since we only allow
// 1-level deep folders this will always work. This still works for integrations like GitHub and YouTube.
location: destinationFilePath.split("/").slice(-2).join("/"),
isDirectUpload: options.parseOnly || false,
};
}

Expand Down Expand Up @@ -207,4 +222,5 @@ module.exports = {
isWithin,
sanitizeFileName,
documentsFolder,
directUploadsFolder,
};
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,12 @@ export function ChatTooltips() {
delayShow={500}
className="tooltip !text-xs max-w-[350px]"
/>
<Tooltip
id="context-window-limit-exceeded"
place="top"
delayShow={500}
className="tooltip !text-xs max-w-[350px]"
/>
<DocumentLevelTooltip />
</>
);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
import { CircleNotch } from "@phosphor-icons/react";
import ModalWrapper from "@/components/ModalWrapper";
import pluralize from "pluralize";
import { numberWithCommas } from "@/utils/numbers";
import useUser from "@/hooks/useUser";
import { Link } from "react-router-dom";
import Paths from "@/utils/paths";
import Workspace from "@/models/workspace";

export default function FileUploadWarningModal({
show,
onClose,
onContinue,
onEmbed,
tokenCount,
maxTokens,
fileCount = 1,
isEmbedding = false,
embedProgress = 0,
}) {
const { user } = useUser();
const canEmbed = !user || user.role !== "default";
if (!show) return null;

if (isEmbedding) {
return (
<ModalWrapper isOpen={show}>
<div className="relative max-w-[600px] bg-theme-bg-primary rounded-lg shadow border border-theme-modal-border">
<div className="p-6 flex flex-col items-center justify-center">
<p className="text-white text-lg font-semibold mb-4">
Embedding {embedProgress + 1} of {fileCount}{" "}
{pluralize("file", fileCount)}
</p>
<CircleNotch size={32} className="animate-spin text-white" />
<p className="text-white/60 text-sm mt-2">
Please wait while we embed your files...
</p>
</div>
</div>
</ModalWrapper>
);
}

return (
<ModalWrapper isOpen={show}>
<div className="relative max-w-[600px] bg-theme-bg-primary rounded-lg shadow border border-theme-modal-border">
<div className="relative p-6 border-b border-theme-modal-border">
<div className="w-full flex gap-x-2 items-center">
<h3 className="text-xl font-semibold text-white overflow-hidden overflow-ellipsis whitespace-nowrap">
Context Window Warning
</h3>
</div>
</div>

<div className="py-7 px-9 space-y-4">
<p className="text-theme-text-primary text-sm">
Your workspace is using {numberWithCommas(tokenCount)} of{" "}
{numberWithCommas(maxTokens)} available tokens. We recommend keeping
usage below {(Workspace.maxContextWindowLimit * 100).toFixed(0)}% to
ensure the best chat experience. Adding {fileCount} more{" "}
{pluralize("file", fileCount)} would exceed this limit.{" "}
<Link
target="_blank"
to={Paths.documentation.contextWindows()}
className="text-theme-text-secondary text-sm underline"
>
Learn more about context windows &rarr;
</Link>
</p>
<p className="text-theme-text-primary text-sm">
Choose how you would like to proceed with these uploads.
</p>
</div>

<div className="flex w-full justify-between items-center p-6 space-x-2 border-t border-theme-modal-border rounded-b">
<button
onClick={onClose}
type="button"
className="border-none transition-all duration-300 bg-theme-modal-border text-white hover:opacity-60 px-4 py-2 rounded-lg text-sm"
>
Cancel
</button>
<div className="flex w-full justify-end items-center space-x-2">
<button
onClick={onContinue}
type="button"
className="border-none transition-all duration-300 bg-theme-modal-border text-white hover:opacity-60 px-4 py-2 rounded-lg text-sm"
>
Continue Anyway
</button>
{canEmbed && (
<button
onClick={onEmbed}
disabled={isEmbedding || !canEmbed}
type="button"
className="border-none transition-all duration-300 bg-white text-black hover:opacity-60 px-4 py-2 rounded-lg text-sm"
>
Embed {pluralize("File", fileCount)}
</button>
)}
</div>
</div>
</div>
</ModalWrapper>
);
}
Loading