diff --git a/app/api/ena/route.ts b/app/api/ena/route.ts new file mode 100644 index 00000000..60a55afb --- /dev/null +++ b/app/api/ena/route.ts @@ -0,0 +1,240 @@ +import { + ENAAssembly, + PrimaryDataApiResult, +} from "app/apis/catalog/brc-analytics-catalog/common/entities"; +import { NextResponse } from "next/server"; + +const items_limit = 10000; + +async function fetchDataFromENA( + url: string, + redirect_arg: RequestRedirect = "manual" +): Promise { + const response: Response = await fetch(url, { redirect: redirect_arg }); + if (response.status !== 200) { + if (response.status === 301) { + console.debug( + `ENA Rest API has been updated, internal API call needs update: ${url}` + ); + return { + count: 0, + data: [], + error: `ENA Rest API has been updated!!!`, + status: 301, + }; + } + const errorMessageText = await response.text(); + let errorMessage; + try { + errorMessage = JSON.parse(errorMessageText).message; + } catch (e) { + errorMessage = `errorMessageText: ${e}`; + } + return { + count: 0, + data: [], + error: `from ENA, ${errorMessage}`, + status: response.status, + }; + } + const result = await response.json(); + return Promise.resolve({ + count: "count" in result ? parseInt(result.count) : result.length, + data: result, + error: "", + status: response.status, + }); +} + +async function processAccessionIds(filter: string): Promise { + // Identify any GCG/GCA accession IDS in the filter string, if they exist + // we need to translate them into sample IDs + // Will convert the following string + // 'accession=GCA_009859065.2 AND accession=GCF_009859065.2' + // into + // (sample_accession="SAMN09946140" OR sample_accession="SAMN0994555") OR sample_accession="NO_SAMPLE" + // GCA_009859065.2 generated 2 sample_accession and GCF_009859065.2 generated 0 sample_accession + + function getSampleAcceionIds(filter: string): { + accessions: string[]; + accessionsDict: { [key: string]: string[] }; + accessionsUrl: string; + } { + const accessions: string[] = []; + const accessionsDict: { [key: string]: string[] } = {}; + const accessionRegex = /(accession\s*=\s*)("?)(GC[FA]_\d+\.\d+)("?)/g; + let match; + let counterCounter = 0; + while ((match = accessionRegex.exec(filter)) !== null) { + counterCounter++; + accessions.push(match[3]); + if (accessionsDict[match[3]]) { + accessionsDict[match[3]].push( + `${match[1]}${match[2]}${match[3]}${match[4]}` + ); + } else { + accessionsDict[match[3]] = [ + `${match[1]}${match[2]}${match[3]}${match[4]}`, + ]; + } + } + + if (filter.match(/\b(accession\s*=)\s*/g)?.length !== counterCounter) { + throw new Error( + `GCF/GCA syntax error, on or multiple accession id have incorrect format, should be GCF_XXXXXXX.X or GCA_XXXXXXX.X` + ); + } + if (accessions.length === 0) { + accessions.push("NO_ACCESSION"); + } + const queryParams = new URLSearchParams({ + query: accessions + .map((item) => `assembly_set_accession="${item}"`) + .join(" OR "), + }); + // Construct the URL for the API request to fetch sample IDs based on accession IDs + return { + accessions: accessions, + accessionsDict: accessionsDict, + accessionsUrl: `https://www.ebi.ac.uk/ena/portal/api/search?result=assembly&fields=assembly_set_accession,sample_accession&${queryParams.toString()}&format=json`, + }; + } + + function updateFilterWithSampleIds( + filter: string, + accessions: string[], + accessionsDict: { [key: string]: string[] }, + response: { data: []; error: string; status: number } + ): string { + // Extract sample IDs from the API response and add them to the sample_ids array + const sample_ids: { [key: string]: string[] } = {}; + for (const assembly of response.data as ENAAssembly[]) { + if (!sample_ids[assembly.assembly_set_accession]) { + sample_ids[assembly.assembly_set_accession] = [ + assembly.sample_accession, + ]; + } else { + sample_ids[assembly.assembly_set_accession].push( + assembly.sample_accession + ); + } + } + for (const accession of accessions) { + if (sample_ids[accession]) { + for (const accessionExpression of accessionsDict[accession]) { + filter = filter.replace( + accessionExpression, + "(" + + sample_ids[accession] + .map((sample_id) => `sample_accession="${sample_id}"`) + .join(" OR ") + + ")" + ); + } + } else { + for (const accessionExpression of accessionsDict[accession]) { + filter = filter.replace( + accessionExpression, + `sample_accession="NO_SAMPLE"` + ); + } + } + } + return filter; + } + + if (filter.includes("accession=")) { + // Find accessions id and create an url and mappers to + // convert the accession id to sample id + const { accessions, accessionsDict, accessionsUrl } = + getSampleAcceionIds(filter); + + // if (accessions.length === 1 && accessions[0] === "NO_ACCESSION") { + // throw new Error( + // `GCF/GCA syntax error, on or multiple accession id have incorrect format, should be GCF_XXXXXXX.X or GCA_XXXXXXX.X` + // ); + // } + + // Fetch the data from the API + const accession_response = await fetchDataFromENA(accessionsUrl); + if (accession_response.status !== 200) { + throw new Error( + `ENA API error: status: ${accession_response.status}, message: ${JSON.stringify(accession_response.data)}` + ); + } + + // Update the filter string with the sample IDs + return updateFilterWithSampleIds( + filter, + accessions, + accessionsDict, + accession_response + ); + } + return filter; +} + +export async function POST(request: Request): Promise { + let { filter } = await request.json(); + const fields = [ + "accession", + "sra_md5", + "base_count", + "study_accession", + "sample_accession", + "instrument_platform", + "instrument_model", + "library_layout", + ]; + try { + filter = await processAccessionIds(filter); + } catch (error) { + return NextResponse.json( + { count: 0, data: [], error: `${error}` }, + { status: 500 } + ); + } + + const runQueryParams = new URLSearchParams({ + query: filter, + }); + + const filter_url = `${runQueryParams.toString().replace(/%3D%3D/g, "%3D")}`; + const count_url = `https://www.ebi.ac.uk/ena/portal/api/count?result=read_run&${filter_url}&format=json`; + + const count_response = await fetchDataFromENA(count_url); + if (count_response.status !== 200) { + return NextResponse.json( + { count: 0, data: [], error: count_response.error }, + { + status: count_response.status, + } + ); + } + + const count: number = count_response.count; + if (count === 0) { + return NextResponse.json({ count: 0, data: [] }, { status: 200 }); + } + if (count > items_limit) { + return NextResponse.json({ + count: 0, + data: [], + error: `To many entries return: ${count}, please add filters to reduce the number of entries.`, + }); + } + + const url_search = `https://www.ebi.ac.uk/ena/portal/api/search?result=read_run&${filter_url}&fields=${fields.join(",")}&limit=${items_limit}&format=json`; + //console.debug(`Search URL: ${url_search}`); + const search_response = await fetchDataFromENA(url_search); + if (search_response.status !== 200) { + return NextResponse.json(search_response.data, { + status: search_response.status, + }); + } + + return NextResponse.json( + { count: count, data: search_response.data }, + { status: search_response.status } + ); +} diff --git a/app/apis/catalog/brc-analytics-catalog/common/entities.ts b/app/apis/catalog/brc-analytics-catalog/common/entities.ts index 1a0bbf5f..db94dd43 100644 --- a/app/apis/catalog/brc-analytics-catalog/common/entities.ts +++ b/app/apis/catalog/brc-analytics-catalog/common/entities.ts @@ -46,6 +46,38 @@ export interface EntitiesResponsePagination { total: number; } +export interface ENAAssembly { + assembly_set_accession: string; + sample_accession: string; +} + +export interface ENAReadRuns { + accession: string; + base_count: string; + sample_accession: string; + studies_accession: string; +} + +export interface PrimaryDataApiResult { + count: number; + data: []; + error: string; + status: number; +} + +export interface RunReadsFields { + description: string; + name: string; + type: string; +} + +export interface ReadRunStatistics { + bases: number; + biosamples: number; + read_runs: number; + studies: number; +} + export interface WorkflowCategory { category: string; description: string; diff --git a/app/components/Entity/components/PrimaryDataViewer/primaryDataViewer.style.ts b/app/components/Entity/components/PrimaryDataViewer/primaryDataViewer.style.ts new file mode 100644 index 00000000..f4910bee --- /dev/null +++ b/app/components/Entity/components/PrimaryDataViewer/primaryDataViewer.style.ts @@ -0,0 +1,110 @@ +import { GridPaperSection } from "@databiosphere/findable-ui/lib/components/common/Section/section.styles"; + +import styled from "@emotion/styled"; + +export const StyledSection = styled(GridPaperSection)` + flex-direction: column; +`; + +export const SectionContent = styled.div` + flex: 1; + display: flex; + flex-direction: column; + align-items: center; + img { + width: 600px; + height: 600px; + } +`; + +export const BlinkingDots = styled.span` + font-weight: bold; + @keyframes blink { + 0% { + opacity: 1; + } + 33% { + opacity: 0; + } + 66% { + opacity: 0; + } + 100% { + opacity: 1; + } + } + .dot:nth-child(1) { + animation: blink 1.5s infinite; + } + .dot:nth-child(2) { + animation: blink 1.5s infinite 0.5s; + } + .dot:nth-child(3) { + animation: blink 1.5s infinite 1s; + } +`; + +export const ReactPageNation = styled.span` + /* General container styling */ + .container { + max-width: 800px; + margin: 50px auto; + padding: 20px; + background-color: #f9f9f9; + border-radius: 8px; + box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); + font-family: "Arial", sans-serif; + } + + /* Blog posts list styling */ + .list-group-item { + background-color: white; + border: 1px solid #ddd; + border-radius: 8px; + padding: 20px; + margin-bottom: 15px; + transition: box-shadow 0.3s ease; + } + + .list-group-item p { + color: #666; + line-height: 1.6; + } + + /* Pagination styling */ + .pagination { + display: flex; + justify-content: center; + padding: 0px; + + list-style: none; + } + + .pagination li { + margin: 0 1px; + font-size: 0.7rem; + } + + .pagination li a { + display: inline-block; + padding: 5px 10px 5px 10px; + background-color: white; + color: #212b36; + border-radius: 5px; + text-decoration: none; + transition: + background-color 0.3s ease, + color 0.3s ease; + } + + .pagination li a:hover { + background-color: #212b36; + color: white; + } + + .pagination li.active a { + border-color: #212b36; + border-style: dashed; + border-width: 1px; + } +`; diff --git a/app/components/Entity/components/PrimaryDataViewer/primaryDataViewer.tsx b/app/components/Entity/components/PrimaryDataViewer/primaryDataViewer.tsx new file mode 100644 index 00000000..d81e1868 --- /dev/null +++ b/app/components/Entity/components/PrimaryDataViewer/primaryDataViewer.tsx @@ -0,0 +1,553 @@ +"use client"; // Ensure client-side rendering + +import React, { useEffect, useState } from "react"; + +import runReadFields from "../../../../../catalog/output/runReadFields.json"; +import { + ENAReadRuns, + PrimaryDataApiResult, + ReadRunStatistics, +} from "app/apis/catalog/brc-analytics-catalog/common/entities"; + +import { + FluidPaper, + GridPaper, +} from "@databiosphere/findable-ui/lib/components/common/Paper/paper.styles"; + +import { + BlinkingDots, + ReactPageNation, + StyledSection, +} from "./primaryDataViewer.style"; +import ReactPaginate from "react-paginate"; + +import { + combineExpressionParts, + formatExpression, + formatLargeNumber, + splitUnqouatedSpace, + validateExpression, +} from "./utils"; + +interface PrimaryDataViewerProps { + initialQuery: string; +} + +interface TableColumns { + accessor: string; + header: string; +} + +const columns: TableColumns[] = [ + { accessor: "library_layout", header: "Layout" }, + { accessor: "instrument_model", header: "Model" }, + { accessor: "instrument_platform", header: "Platform" }, + { accessor: "accession", header: "SRR" }, +]; + +const fetchData = async ( + filterString: string, + setData: React.Dispatch>, + setStatistics: React.Dispatch> +): Promise<{ status: number }> => { + try { + const response = await fetch(`http://127.0.0.1:3000/api/ena`, { + body: JSON.stringify({ filter: filterString }), + headers: { + "Content-Type": "application/json", + }, + method: "POST", + }); + + const result = await response.json(); + const stats = { + bases: 0, + biosamples: new Set(), + read_runs: new Set(), + studies: new Set(), + }; + result.data.forEach((item: ENAReadRuns) => { + stats.bases += parseInt(item.base_count, 10); + stats.biosamples.add(item.sample_accession); + stats.read_runs.add(item.accession); + stats.studies.add(item.studies_accession); + }); + setStatistics({ + bases: stats.bases, + biosamples: stats.biosamples.size, + read_runs: stats.read_runs.size, + studies: stats.studies.size, + }); + setData(result); + } catch (error) { + console.error("Error fetching data:", error); + } + return { status: 200 }; +}; + +function renderQueryBuilder( + filterString: string, + validateQueryExpression: (expression: string) => boolean +): JSX.Element { + const split_on_first_equals = (s: string): string[] => { + const index = s.indexOf("="); + return index !== -1 ? [s.slice(0, index), s.slice(index + 1)] : [s]; + }; + const filterStringParts = splitUnqouatedSpace(filterString).map((item) => + split_on_first_equals(item) + ); + return ( + <> +

+ Query builder +

+ + + {filterStringParts.length ? ( + filterStringParts.map((token, index) => ( + + + + + + + )) + ) : ( + + + + + + + )} + +
+ + + { + filterStringParts[index][1] = e.target.value; + validateQueryExpression( + combineExpressionParts(filterStringParts) + ); + }} + /> + + + +
+ + + + + +
+ + ); +} + +const renderInputSearchField = ( + filterString: string, + validateQueryExpression: (expression: string) => boolean, + searchSyntaxError: string[] +): JSX.Element => { + return ( + <> + validateQueryExpression(e.target.value)} + style={{ + border: "1px solid #ccc", + borderRadius: "4px", + boxShadow: "0 2px 4px rgba(0, 0, 0, 0.1)", + boxSizing: "border-box", + fontSize: "1rem", + margin: "10px 0", + padding: "10px", + width: "100%", + }} + /> + {searchSyntaxError && + searchSyntaxError.map((error, index) => ( +

+ {error} +

+ ))} + + ); +}; + +const renderLoadingText = (): JSX.Element => { + return ( +

+ + Loading + . + . + . + +

+ ); +}; + +const renderPrimaryDataTable = ( + data: PrimaryDataApiResult, + columns: TableColumns[], + itemsPerPage: number, + currentPage: number +): JSX.Element => { + const renderRows = ( + data: PrimaryDataApiResult, + columns: TableColumns[] + ): JSX.Element => { + if (!data) { + return ( + + + +

+ Unknown error!!! +

+ + + + ); + } + if (data.error) { + return ( + + + +

{data.error}

+ + + + ); + } + if (data.count === 0) { + return ( + + + +

No data found

+ + + + ); + } + const offset = currentPage * itemsPerPage; + const currentData = data.data.slice(offset, offset + itemsPerPage); + return ( + + {currentData.map((row: { [key: string]: string }, index: number) => ( + + {columns.map((column: TableColumns, colIndex: number) => ( + {row[column.accessor]} + ))} + + ))} + + ); + }; + return ( + <> + + + + {columns.map((column, index) => ( + + ))} + + + {renderRows(data, columns)} +
{column.header}
+ + ); +}; + +const renderPagenation = ( + itemsPerPage: number, + setItemsPerPage: React.Dispatch>, + currentPage: number, + setCurrentPage: React.Dispatch>, + data: PrimaryDataApiResult, + handlePageClick: (data: { selected: number }) => void +): JSX.Element => { + return ( +
+ + items per page: + + + +
+ + "} + breakLabel={"..."} + breakClassName={"break-me"} + forcePage={currentPage} + pageCount={Math.ceil(data.count / itemsPerPage)} + marginPagesDisplayed={1} + pageRangeDisplayed={1} + onPageChange={handlePageClick} + containerClassName={"pagination"} + activeClassName={"active"} + /> + + { + const page = Number(e.target.value) - 1; + if (page >= 0 && page < Math.ceil(data.count / itemsPerPage)) { + setCurrentPage(page); + } + }} + style={{ + border: "1px solid #ccc", + borderRadius: "4px", + boxShadow: "0 2px 4px rgba(0, 0, 0, 0.1)", + boxSizing: "border-box", + fontSize: "0.7rem", + margin: "0 10px", + padding: "5px", + width: "60px", + }} + /> +
+
+
+ ); +}; + +export const PrimaryDataViewer = ({ + initialQuery, +}: PrimaryDataViewerProps): JSX.Element => { + const [data, setData] = useState({ + count: 0, + data: [], + error: "", + status: 200, + }); + const [statistics, setStatistics] = useState({ + bases: 0, + biosamples: 0, + read_runs: 0, + studies: 0, + }); + const [filterString, setFilterString] = useState( + initialQuery ? initialQuery : "" + ); + const [searchSyntaxError, setSearchSyntaxError] = useState([]); + const [currentPage, setCurrentPage] = useState(0); + const [itemsPerPage, setItemsPerPage] = useState(25); + const [validExpression, setValidExpression] = useState(true); + + const validateQueryExpression = (expression: string): boolean => { + setValidExpression(false); + expression = formatExpression(expression); + const expressionValidated = validateExpression(expression); + const valid = expressionValidated.length === 0; + setValidExpression(valid); + if (!valid) { + setSearchSyntaxError(expressionValidated); + } else { + setSearchSyntaxError([]); + } + + setFilterString(expression); + return valid; + }; + + useEffect(() => { + if (validExpression) { + fetchData(filterString, setData, setStatistics); + } else { + setData({ count: 0, data: [], error: "", status: 200 }); + } + }, [filterString, validExpression]); + + const handlePageClick = (data: { selected: number }): void => { + setCurrentPage(data.selected); + }; + + return ( + + + + {renderPagenation( + itemsPerPage, + setItemsPerPage, + currentPage, + setCurrentPage, + data, + handlePageClick + )} + + Statistics: Read runs: {statistics.read_runs}, Biosamples:{" "} + {statistics.biosamples}, Studies: {statistics.studies}, Bases:{" "} + {formatLargeNumber(statistics.bases)} + + + {renderQueryBuilder(filterString, validateQueryExpression)} + + {renderInputSearchField( + filterString, + validateQueryExpression, + searchSyntaxError + )} + + {data && data.data + ? renderPrimaryDataTable(data, columns, itemsPerPage, currentPage) + : renderLoadingText()} + + {renderPagenation( + itemsPerPage, + setItemsPerPage, + currentPage, + setCurrentPage, + data, + handlePageClick + )} + + + + ); +}; diff --git a/app/components/Entity/components/PrimaryDataViewer/utils.tsx b/app/components/Entity/components/PrimaryDataViewer/utils.tsx new file mode 100644 index 00000000..9bc93d84 --- /dev/null +++ b/app/components/Entity/components/PrimaryDataViewer/utils.tsx @@ -0,0 +1,176 @@ +/** + * Combines parts of an expression into a single string. + * + * This function takes an array of string arrays, where each inner array represents + * a part of an expression. It joins the elements of each inner array with an "=" sign, + * then joins all the parts with a space, and finally replaces any multiple spaces with a single space. + * + * @param parts - An array of string arrays representing parts of an expression. + * @returns The combined expression as a single string. + */ +export function combineExpressionParts(parts: string[][]): string { + return parts + .map((part: string[]) => part.join("=")) + .join(" ") + .replace(/\s+/g, " "); +} + +export function formatExpression(expression: string): string { + // Make sure that any form of ' And ' and ' Or ' will be convert to + // ' AND ' and ' OR ' respectively + // Also remove whitespace adjecent to = sign + return expression + .replace(/\s[Aa][Nn][Dd]\s/g, " AND ") + .replace(/\s[Oo][Rr]\s/g, " OR ") + .replace(/\b\s*=\s*\b/g, "="); +} + +/** + * Formats a large number into a more readable string with appropriate suffixes. + * + * - Numbers greater than or equal to 1 billion are formatted with a 'G' suffix. + * - Numbers greater than or equal to 1 million are formatted with an 'M' suffix. + * - Numbers greater than or equal to 1 thousand are formatted with a 'K' suffix. + * - Numbers less than 1 thousand are returned as-is. + * + * @param value - The number to format. + * @returns The formatted number as a string with the appropriate suffix. + */ +export function formatLargeNumber(value: number): string { + if (value >= 1e9) return `${(value / 1e9).toFixed(1)}G`; + if (value >= 1e6) return `${(value / 1e6).toFixed(1)}M`; + if (value >= 1e3) return `${(value / 1e3).toFixed(1)}K`; + return value.toString(); +} + +/** + * Splits a given string by spaces, parentheses, and quotes, while preserving quoted substrings. + * + * This function takes a string and splits it into an array of substrings based on spaces, + * parentheses, and quotes. Quoted substrings (either single or double quotes) are preserved + * as single elements in the resulting array. + * + * @param s - The input string to be split. + * @returns An array of substrings, with quoted substrings preserved. + * + * @example + * ```typescript + * validateExpression('(tax_id=7165 AND library_layout=SINGLE) OR (scientific_name="Taeniopygia guttata" AND instrument_platform=ILLUMINA) OR accession=GCA_023851605.1') + * // returns ["(", "tax_id=7165", "AND", "library_layout=SINGLE", ")", "OR", "(", 'scientific_name="Taeniopygia guttata"', "AND", "instrument_platform=ILLUMINA", ")", "OR", "accession=GCA_023851605.1"] + * ``` + */ +export function splitUnqouatedSpace(s: string): string[] { + if (/\s/.test(s)) { + let part; + let qouteChar = ""; + const parts = s.split(/(\(|\)|\s+)/).filter((part) => part.trim() !== ""); + const combinedParts = []; + while ((part = parts.shift()) !== undefined) { + const firstQuoteIndex = part.search(/['"]/); + if (firstQuoteIndex === -1) { + // No quotes in the part + combinedParts.push(part); + } else { + // Quotes in the part, lets find the closing quote + qouteChar = part[firstQuoteIndex]; + while (!part.endsWith(qouteChar) && parts.length > 0) { + part += ` ${parts.shift()}`; + } + combinedParts.push(part); + } + } + if (part) { + combinedParts.push(part); + } + return combinedParts; + } + return [s]; +} + +/** + * Validates an expression to ensure it follows the expected format. + * + * This function checks for balanced parentheses, valid conditions, and proper use of logical operators. + * It returns an array of error messages if any issues are found in the expression. + * + * @param expression - The expression to validate. + * @returns An array of error messages, or an empty array if the expression is valid. + * + * @example + * ```typescript + * validateExpression('(tax_id=7165 AND library_layout=SINGLE) OR (scientific_name="Taeniopygia guttata" AND instrument_platform=ILLUMINA) OR accession=GCA_023851605.1') + * // returns [] + * ``` + */ +export function validateExpression(expression: string): string[] { + const expression_bullder = /(\(|\)|\bAND\b|\bOR\b)/; + + const proceessParts = (parts: string[]): string[] => { + // Regular expressions for validation + // Ensure that non-operator parts follow the pattern: + // = + const conditionPattern = /^\s*([a-z_]+)\s*!?=\s*([A-Za-z0-9._-]+)\s*$/; + // = + const conditionPatternWithSpace = + /^\s*([a-z_]+)\s*!?=\s*(["'][A-Za-z0-9._ -]+["'])\s*$/; + // Logical operators, which need to be in uppercase and have adjacent spaces + const operatorPattern = /^\s*(AND|OR)\s*$/; + let prevWasCondition = false; + const expression_status = []; + for (let i = 0; i < parts.length; i++) { + const part = parts[i]; + if (conditionPattern.test(part) || conditionPatternWithSpace.test(part)) { + if (prevWasCondition) { + expression_status.push(`Error: Missing operator before '${part}'.`); + } + prevWasCondition = true; + } else if (operatorPattern.test(part)) { + if (!prevWasCondition) { + expression_status.push( + `Error: Operator '${part}' must be between conditions.` + ); + } + prevWasCondition = false; + } else { + expression_status.push(`Error: Invalid expression '${part}'.`); + } + } + + if (!prevWasCondition) { + expression_status.push( + "Error: Expression must end with a valid condition." + ); + } + + return expression_status; + }; + + const expression_status = []; + if (expression !== "") { + // Check for parentheses without content + const emptyParentheses = /\(\s*\)/; + + // Check for unbalanced parentheses + if ( + expression.match(/(\()/g)?.length !== expression.match(/(\))/g)?.length + ) { + expression_status.push("Error: Unbalanced parentheses."); + } + + if (emptyParentheses.test(expression)) { + expression_status.push("Error: Parentheses without content."); + } + + // Process parts of the expression + const parts = proceessParts( + expression + .replace(/[()]/g, "") + .split(expression_bullder) + .map((t) => t.trim()) + .filter((t) => t.length > 0) + ); + expression_status.push(...parts); + } + + return expression_status; +} diff --git a/app/components/index.ts b/app/components/index.ts index 5865a9e6..09f1096b 100644 --- a/app/components/index.ts +++ b/app/components/index.ts @@ -36,3 +36,4 @@ export { AnalysisPortals } from "./Entity/components/AnalysisPortals/analysisPor export { GridPaperSection } from "./Layout/components/Detail/components/Section/section.styles"; export { Branding } from "./Layout/components/Footer/components/Branding/branding"; export { AnalyzeGenome } from "./Table/components/TableCell/components/AnalyzeGenome/analyzeGenome"; +export { PrimaryDataViewer } from "./Entity/components/PrimaryDataViewer/primaryDataViewer"; diff --git a/app/viewModelBuilders/catalog/brc-analytics-catalog/common/viewModelBuilders.ts b/app/viewModelBuilders/catalog/brc-analytics-catalog/common/viewModelBuilders.ts index ab5e1224..f12e5af6 100644 --- a/app/viewModelBuilders/catalog/brc-analytics-catalog/common/viewModelBuilders.ts +++ b/app/viewModelBuilders/catalog/brc-analytics-catalog/common/viewModelBuilders.ts @@ -315,6 +315,19 @@ export const buildGenomeAnalysisMethods = ( }; }; +/** + * Build props for the genome AnalysisMethodsCatalog component. + * @param genome - Genome entity. + * @returns Props to be used for the AnalysisMethodsCatalog component. + */ +export const buildGenomePrimaryData = ( + genome: BRCDataCatalogGenome +): ComponentProps => { + return { + initialQuery: `tax_id=${genome.speciesTaxonomyId}`, + }; +}; + /** * Build props for the genome AnalysisPortals component. * @param genome - Genome entity. diff --git a/catalog/build/ts/build-catalog.ts b/catalog/build/ts/build-catalog.ts index 201acc34..c41ce59a 100644 --- a/catalog/build/ts/build-catalog.ts +++ b/catalog/build/ts/build-catalog.ts @@ -4,6 +4,7 @@ import YAML from "yaml"; import { BRCDataCatalogGenome, BRCDataCatalogOrganism, + RunReadsFields, WorkflowCategory, } from "../../../app/apis/catalog/brc-analytics-catalog/common/entities"; import { @@ -25,6 +26,9 @@ buildCatalog(); async function buildCatalog(): Promise { const genomes = await buildGenomes(); const organisms = buildOrganisms(genomes); + const runFields = await buildRunReadFields( + "https://www.ebi.ac.uk/ena/portal/api/searchFields?result=read_run&format=json" + ); const workflows = await buildWorkflows(); console.log("Genomes:", genomes.length); @@ -33,6 +37,9 @@ async function buildCatalog(): Promise { console.log("Organisms:", organisms.length); await saveJson("catalog/output/organisms.json", organisms); + console.log("Run Read Fields:", runFields.length); + await saveJson("catalog/output/runReadFields.json", runFields); + console.log("Workflows:", workflows.length); await saveJson("catalog/output/workflows.json", workflows); @@ -106,6 +113,47 @@ function buildOrganism( }; } +async function buildRunReadFields(filePath: string): Promise { + const response = await fetch(filePath); + const sourceRows = await response.json(); + const mappedRows: RunReadsFields[] = [ + { + description: "AND statment", + name: "AND", + type: "expression", + }, + { + description: "OR statement", + name: "OR", + type: "expression", + }, + { + description: "starting paranthesis", + name: "(", + type: "expression", + }, + { + description: "ending paranthesis", + name: ")", + type: "expression", + }, + { + description: "GCF/GCA accession id", + name: "accession", + type: "string", + }, + ]; + for (const row of sourceRows) { + mappedRows.push({ + description: row.description, + name: row.columnId, + type: row.type, + }); + } + + return mappedRows.sort((a, b) => a.name.localeCompare(b.name)); +} + async function buildWorkflows(): Promise { const sourceWorkflowCategories = await readYamlFile( SOURCE_PATH_WORKFLOW_CATEGORIES diff --git a/catalog/output/runReadFields.json b/catalog/output/runReadFields.json new file mode 100644 index 00000000..67241540 --- /dev/null +++ b/catalog/output/runReadFields.json @@ -0,0 +1,785 @@ +[ + { + "name": "AND", + "description": "AND statment", + "type": "expression" + }, + { + "name": "OR", + "description": "OR statement", + "type": "expression" + }, + { + "name": "(", + "description": "starting paranthesis", + "type": "expression" + }, + { + "name": ")", + "description": "ending paranthesis", + "type": "expression" + }, + { + "name": "accession", + "description": "GCF/GCA accession id", + "type": "string" + }, + { + "name": "age", + "description": "Age when the sample was taken", + "type": "text" + }, + { + "name": "altitude", + "description": "Altitude (m)", + "type": "number" + }, + { + "name": "assembly_quality", + "description": "Quality of assembly", + "type": "text" + }, + { + "name": "assembly_software", + "description": "Assembly software", + "type": "text" + }, + { + "name": "base_count", + "description": "number of base pairs", + "type": "number" + }, + { + "name": "binning_software", + "description": "Binning software", + "type": "text" + }, + { + "name": "bio_material", + "description": "identifier for biological material including institute and collection code", + "type": "text" + }, + { + "name": "bisulfite_protocol", + "description": "text" + }, + { + "name": "broad_scale_environmental_context", + "description": "Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO’s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS.", + "type": "text" + }, + { + "name": "broker_name", + "description": "broker name", + "type": "controlled value" + }, + { + "name": "cage_protocol", + "description": "Link to the protocol for CAGE-seq experiments", + "type": "text" + }, + { + "name": "cell_line", + "description": "cell line from which the sample was obtained", + "type": "text" + }, + { + "name": "cell_type", + "description": "cell type from which the sample was obtained", + "type": "text" + }, + { + "name": "center_name", + "description": "Submitting center", + "type": "text" + }, + { + "name": "checklist", + "description": "ENA metadata reporting standard used to register the biosample (Checklist used)", + "type": "controlled value" + }, + { + "name": "chip_ab_provider", + "description": "text" + }, + { + "name": "chip_protocol", + "description": "text" + }, + { + "name": "chip_target", + "description": "Chip target", + "type": "text" + }, + { + "name": "collected_by", + "description": "name of the person who collected the specimen", + "type": "text" + }, + { + "name": "completeness_score", + "description": "Completeness score (%)", + "type": "number" + }, + { + "name": "contamination_score", + "description": "Contamination score (%)", + "type": "number" + }, + { + "name": "control_experiment", + "description": "Control experiment", + "type": "text" + }, + { + "name": "country", + "description": "locality of sample isolation: country names, oceans or seas, followed by regions and localities", + "type": "text" + }, + { + "name": "cultivar", + "description": "cultivar (cultivated variety) of plant from which sample was obtained", + "type": "text" + }, + { + "name": "culture_collection", + "description": "identifier for the sample culture including institute and collection code", + "type": "text" + }, + { + "name": "datahub", + "description": "DCC datahub name", + "type": "controlled value" + }, + { + "name": "description", + "description": "brief sequence description", + "type": "text" + }, + { + "name": "dev_stage", + "description": "sample obtained from an organism in a specific developmental stage", + "type": "text" + }, + { + "name": "disease", + "description": "Disease associated with the sample", + "type": "text" + }, + { + "name": "dnase_protocol", + "description": "text" + }, + { + "name": "ecotype", + "description": "a population within a given species displaying traits that reflect adaptation to a local habitat", + "type": "text" + }, + { + "name": "elevation", + "description": "Elevation (m)", + "type": "number" + }, + { + "name": "environment_biome", + "description": "Environment (Biome)", + "type": "text" + }, + { + "name": "environment_feature", + "description": "Environment (Feature)", + "type": "text" + }, + { + "name": "environment_material", + "description": "Environment (Material)", + "type": "text" + }, + { + "name": "environmental_medium", + "description": "Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top).", + "type": "text" + }, + { + "name": "environmental_sample", + "description": "identifies sequences derived by direct molecular isolation from an environmental DNA sample", + "type": "boolean" + }, + { + "name": "experiment_accession", + "description": "experiment accession number", + "type": "text" + }, + { + "name": "experiment_alias", + "description": "submitter's name for the experiment", + "type": "text" + }, + { + "name": "experiment_target", + "description": "text" + }, + { + "name": "experiment_title", + "description": "brief experiment title", + "type": "text" + }, + { + "name": "experimental_factor", + "description": "variable aspects of the experimental design", + "type": "text" + }, + { + "name": "experimental_protocol", + "description": "text" + }, + { + "name": "extraction_protocol", + "description": "text" + }, + { + "name": "faang_library_selection", + "description": "Library Selection for FAANG WGS/BS-Seq experiments", + "type": "text" + }, + { + "name": "first_created", + "description": "date when first created", + "type": "date" + }, + { + "name": "first_public", + "description": "date when made public", + "type": "date" + }, + { + "name": "germline", + "description": "the sample is an unrearranged molecule that was inherited from the parental germline", + "type": "boolean" + }, + { + "name": "hi_c_protocol", + "description": "Link to Hi-C Protocol for FAANG experiments", + "type": "text" + }, + { + "name": "host", + "description": "natural (as opposed to laboratory) host to the organism from which sample was obtained", + "type": "text" + }, + { + "name": "host_body_site", + "description": "name of body site from where the sample was obtained", + "type": "text" + }, + { + "name": "host_genotype", + "description": "genotype of host", + "type": "text" + }, + { + "name": "host_gravidity", + "description": "whether or not subject is gravid, including date due or date post-conception where applicable", + "type": "text" + }, + { + "name": "host_growth_conditions", + "description": "literature reference giving growth conditions of the host", + "type": "text" + }, + { + "name": "host_phenotype", + "description": "phenotype of host", + "type": "text" + }, + { + "name": "host_scientific_name", + "description": "Scientific name of the natural (as opposed to laboratory) host to the organism from which sample was obtained", + "type": "text" + }, + { + "name": "host_sex", + "description": "physical sex of the host", + "type": "controlled value" + }, + { + "name": "host_status", + "description": "condition of host (eg. diseased or healthy)", + "type": "text" + }, + { + "name": "host_tax_id", + "description": "NCBI taxon id of the host", + "type": "number" + }, + { + "name": "identified_by", + "description": "name of the taxonomist who identified the specimen", + "type": "text" + }, + { + "name": "instrument_model", + "description": "instrument model used in sequencing experiment", + "type": "text" + }, + { + "name": "instrument_platform", + "description": "instrument platform used in sequencing experiment", + "type": "controlled value" + }, + { + "name": "investigation_type", + "description": "the study type targeted by the sequencing", + "type": "text" + }, + { + "name": "isolate", + "description": "individual isolate from which sample was obtained", + "type": "text" + }, + { + "name": "isolation_source", + "description": "describes the physical, environmental and/or local geographical source of the sample", + "type": "text" + }, + { + "name": "last_updated", + "description": "date when last updated", + "type": "date" + }, + { + "name": "library_construction_protocol", + "description": "Library construction protocol", + "type": "text" + }, + { + "name": "library_gen_protocol", + "description": "text" + }, + { + "name": "library_layout", + "description": "sequencing library layout", + "type": "controlled value" + }, + { + "name": "library_max_fragment_size", + "description": "number" + }, + { + "name": "library_min_fragment_size", + "description": "number" + }, + { + "name": "library_name", + "description": "sequencing library name", + "type": "text" + }, + { + "name": "library_pcr_isolation_protocol", + "description": "text" + }, + { + "name": "library_prep_date", + "description": "text" + }, + { + "name": "library_prep_date_format", + "description": "text" + }, + { + "name": "library_prep_latitude", + "description": "number" + }, + { + "name": "library_prep_location", + "description": "text" + }, + { + "name": "library_prep_longitude", + "description": "number" + }, + { + "name": "library_selection", + "description": "method used to select or enrich the material being sequenced", + "type": "text" + }, + { + "name": "library_source", + "description": "source material being sequenced", + "type": "controlled value" + }, + { + "name": "library_strategy", + "description": "sequencing technique intended for the library", + "type": "text" + }, + { + "name": "local_environmental_context", + "description": "Report the entity or entities which are in the sample or specimen’s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for \"broad-scale environmental context\". Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS.", + "type": "text" + }, + { + "name": "location", + "description": "geographic location of isolation of the sample", + "type": "latlon" + }, + { + "name": "location_end", + "description": "latlon" + }, + { + "name": "location_start", + "description": "latlon" + }, + { + "name": "marine_region", + "description": "geographical origin of the sample as defined by the marine region", + "type": "text" + }, + { + "name": "mating_type", + "description": "mating type of the organism from which the sequence was obtained", + "type": "text" + }, + { + "name": "ncbi_reporting_standard", + "description": "NCBI metadata reporting standard used to register the biosample (Package used)", + "type": "controlled value" + }, + { + "name": "nominal_length", + "description": "average fragmentation size of paired reads", + "type": "number" + }, + { + "name": "nominal_sdev", + "description": "standard deviation of fragmentation size of paired reads", + "type": "number" + }, + { + "name": "pcr_isolation_protocol", + "description": "text" + }, + { + "name": "project_name", + "description": "name of the project within which the sequencing was organized", + "type": "text" + }, + { + "name": "protocol_label", + "description": "the protocol used to produce the sample", + "type": "text" + }, + { + "name": "read_count", + "description": "number of reads", + "type": "number" + }, + { + "name": "read_strand", + "description": "text" + }, + { + "name": "restriction_enzyme", + "description": "text" + }, + { + "name": "restriction_enzyme_target_sequence", + "description": "The DNA sequence targeted by the restrict enzyme", + "type": "text" + }, + { + "name": "restriction_site", + "description": "text" + }, + { + "name": "rna_integrity_num", + "description": "number" + }, + { + "name": "rna_prep_3_protocol", + "description": "text" + }, + { + "name": "rna_prep_5_protocol", + "description": "text" + }, + { + "name": "rna_purity_230_ratio", + "description": "number" + }, + { + "name": "rna_purity_280_ratio", + "description": "number" + }, + { + "name": "rt_prep_protocol", + "description": "text" + }, + { + "name": "run_accession", + "description": "accession number", + "type": "text" + }, + { + "name": "run_alias", + "description": "submitter's name for the run", + "type": "text" + }, + { + "name": "salinity", + "description": "Salinity (PSU)", + "type": "number" + }, + { + "name": "sample_accession", + "description": "sample accession number", + "type": "text" + }, + { + "name": "sample_alias", + "description": "submitter's name for the sample", + "type": "text" + }, + { + "name": "sample_capture_status", + "description": "Sample capture status", + "type": "text" + }, + { + "name": "sample_collection", + "description": "the method or device employed for collecting the sample", + "type": "text" + }, + { + "name": "sample_description", + "description": "detailed sample description", + "type": "text" + }, + { + "name": "sample_material", + "description": "sample material label", + "type": "text" + }, + { + "name": "sample_prep_interval", + "description": "number" + }, + { + "name": "sample_prep_interval_units", + "description": "text" + }, + { + "name": "sample_storage", + "description": "text" + }, + { + "name": "sample_storage_processing", + "description": "text" + }, + { + "name": "sample_title", + "description": "brief sample title", + "type": "text" + }, + { + "name": "sampling_campaign", + "description": "the activity within which this sample was collected", + "type": "text" + }, + { + "name": "sampling_platform", + "description": "the large infrastructure from which this sample was collected", + "type": "text" + }, + { + "name": "sampling_site", + "description": "the site/station where this sample was collection", + "type": "text" + }, + { + "name": "scientific_name", + "description": "scientific name of an organism", + "type": "text" + }, + { + "name": "secondary_project", + "description": "Secondary project", + "type": "text" + }, + { + "name": "secondary_sample_accession", + "description": "secondary sample accession number", + "type": "text" + }, + { + "name": "secondary_study_accession", + "description": "secondary study accession number", + "type": "text" + }, + { + "name": "sequencing_date", + "description": "text" + }, + { + "name": "sequencing_date_format", + "description": "text" + }, + { + "name": "sequencing_location", + "description": "text" + }, + { + "name": "sequencing_longitude", + "description": "number" + }, + { + "name": "sequencing_method", + "description": "sequencing method used", + "type": "text" + }, + { + "name": "sequencing_primer_catalog", + "description": "The catalog from which the sequencing primer library was purchased", + "type": "text" + }, + { + "name": "sequencing_primer_lot", + "description": "The lot identifier of the sequencing primer library", + "type": "text" + }, + { + "name": "sequencing_primer_provider", + "description": "The name of the company, laboratory or person that provided the sequencing primer library", + "type": "text" + }, + { + "name": "serotype", + "description": "serological variety of a species characterized by its antigenic properties", + "type": "text" + }, + { + "name": "serovar", + "description": "serological variety of a species (usually a prokaryote) characterized by its antigenic properties", + "type": "text" + }, + { + "name": "sex", + "description": "sex of the organism from which the sample was obtained", + "type": "controlled value" + }, + { + "name": "specimen_voucher", + "description": "identifier for the sample culture including institute and collection code", + "type": "text" + }, + { + "name": "status", + "description": "Status", + "type": "number" + }, + { + "name": "strain", + "description": "strain from which sample was obtained", + "type": "text" + }, + { + "name": "study_accession", + "description": "study accession number", + "type": "text" + }, + { + "name": "study_alias", + "description": "submitter's name for the study", + "type": "text" + }, + { + "name": "study_title", + "description": "brief sequencing study description", + "type": "text" + }, + { + "name": "sub_species", + "description": "name of sub-species of organism from which sample was obtained", + "type": "text" + }, + { + "name": "sub_strain", + "description": "name or identifier of a genetically or otherwise modified strain from which sample was obtained", + "type": "text" + }, + { + "name": "submission_accession", + "description": "submission accession number", + "type": "text" + }, + { + "name": "submission_tool", + "description": "Submission tool", + "type": "text" + }, + { + "name": "submitted_host_sex", + "description": "physical sex of the host", + "type": "text" + }, + { + "name": "submitted_md5", + "description": "MD5 checksum of submitted files", + "type": "text" + }, + { + "name": "submitted_read_type", + "description": "submitted FASTQ read type", + "type": "list" + }, + { + "name": "tag", + "description": "Classification Tags", + "type": "controlled value" + }, + { + "name": "target_gene", + "description": "targeted gene or locus name for marker gene studies", + "type": "text" + }, + { + "name": "tax_id", + "description": "NCBI taxonomic classification", + "type": "taxonomy" + }, + { + "name": "taxonomic_classification", + "description": "Taxonomic classification", + "type": "text" + }, + { + "name": "taxonomic_identity_marker", + "description": "Taxonomic identity marker", + "type": "text" + }, + { + "name": "temperature", + "description": "Temperature (C)", + "type": "number" + }, + { + "name": "tissue_lib", + "description": "tissue library from which sample was obtained", + "type": "text" + }, + { + "name": "tissue_type", + "description": "tissue type from which the sample was obtained", + "type": "text" + }, + { + "name": "transposase_protocol", + "description": "text" + }, + { + "name": "variety", + "description": "variety (varietas, a formal Linnaean rank) of organism from which sample was derived", + "type": "text" + } +] diff --git a/catalog/output/runReadFields.json.d.ts b/catalog/output/runReadFields.json.d.ts new file mode 100644 index 00000000..ba168d35 --- /dev/null +++ b/catalog/output/runReadFields.json.d.ts @@ -0,0 +1,4 @@ +import { RunReadsFields } from "../../app/apis/catalog/brc-analytics-catalog/common/entities"; + +const runReadFields: RunReadsFields[]; +export default runReadFields; diff --git a/package-lock.json b/package-lock.json index cca2e7b8..7d2b1a84 100644 --- a/package-lock.json +++ b/package-lock.json @@ -23,6 +23,7 @@ "next-compose-plugins": "^2.2.1", "react": "^18.3.1", "react-dom": "^18.3.1", + "react-paginate": "^8.3.0", "react-window": "1.8.9", "uuid": "8.3.2", "validate.js": "^0.13.1" @@ -15514,6 +15515,18 @@ "resolved": "https://registry.npmjs.org/react-is/-/react-is-18.3.1.tgz", "integrity": "sha512-/LLMVyas0ljjAtoYiPqYiL8VWXzUUdThrmU5+n20DZv+a+ClRoevUzw5JxU+Ieh5/c87ytoTBV9G1FiKfNJdmg==" }, + "node_modules/react-paginate": { + "version": "8.3.0", + "resolved": "https://registry.npmjs.org/react-paginate/-/react-paginate-8.3.0.tgz", + "integrity": "sha512-TptZE37HPkT3R+7AszWA++LOTIsIHXcCSWMP9WW/abeF8sLpJzExFB/dVs7xbtqteJ5njF6kk+udTDC0AR3y5w==", + "license": "MIT", + "dependencies": { + "prop-types": "^15" + }, + "peerDependencies": { + "react": "^16 || ^17 || ^18 || ^19" + } + }, "node_modules/react-transition-group": { "version": "4.4.5", "resolved": "https://registry.npmjs.org/react-transition-group/-/react-transition-group-4.4.5.tgz", diff --git a/package.json b/package.json index b8ec4345..3f00bda6 100644 --- a/package.json +++ b/package.json @@ -35,6 +35,7 @@ "next-compose-plugins": "^2.2.1", "react": "^18.3.1", "react-dom": "^18.3.1", + "react-paginate": "^8.3.0", "react-window": "1.8.9", "uuid": "8.3.2", "validate.js": "^0.13.1" diff --git a/site-config/brc-analytics/local/entity/genome/analysisMethodMainColumn.ts b/site-config/brc-analytics/local/entity/genome/analysisMethodMainColumn.ts index bf8f3c96..2287de87 100644 --- a/site-config/brc-analytics/local/entity/genome/analysisMethodMainColumn.ts +++ b/site-config/brc-analytics/local/entity/genome/analysisMethodMainColumn.ts @@ -9,6 +9,10 @@ export const mainColumn: ComponentsConfig = [ component: C.AnalysisMethodsCatalog, viewBuilder: V.buildGenomeAnalysisMethods, }, + { + component: C.PrimaryDataViewer, + viewBuilder: V.buildGenomePrimaryData, + }, ], component: C.BackPageContentMainColumn, },