Skip to content

Commit

Permalink
Merge pull request #42 from nleroy917/seeding
Browse files Browse the repository at this point in the history
Set Seed + Iterations
  • Loading branch information
nleroy917 authored Oct 22, 2022
2 parents dedb738 + ffc00d1 commit 1b42882
Show file tree
Hide file tree
Showing 10 changed files with 143 additions and 28 deletions.
11 changes: 10 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,2 +1,11 @@
{
}
"css.validate": false,
"editor.tabSize": 2,
"editor.codeActionsOnSave": {
"source.fixAll": true,
"source.fixAll.eslint": true
},
"editor.formatOnSave": true, // Tell VSCode to format files on save
"editor.defaultFormatter": "esbenp.prettier-vscode" // Tell VSCode to use Prettier as default file formatter
}

2 changes: 1 addition & 1 deletion optipyzer/const.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
VERSION = "0.1.4"
VERSION = "0.2.0"

VALID_AMINO_ACIDS = "ARNDCQEGHILKMFPSTWYV"

Expand Down
32 changes: 26 additions & 6 deletions optipyzer/oengine.py
Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,7 @@ def validate_query(query, DNA):
return query, stop_pos


def optimize_sequence(random_num_table, query):
def optimize_sequence(random_num_table, query, seed=None):
"""
takes the current iteration of the multi-species optimized codon preference table and uses it with a weighted
codon-randomization method to convert a fasta-formatted protein sequence to an optimized DNA sequence
Expand All @@ -439,11 +439,17 @@ def optimize_sequence(random_num_table, query):
:param query: a fasta=formatted protein sequence of the gene to be optimized
:return:
"""
# set a seed if passed
# to create a random number generator
if seed:
rng = random.Random(seed)
else:
rng = random.Random()
# initialize the DNA sequence
optimmized_query = ""
# loops through the query and generates a random integer between 1 and 100000 for each residue in query
for residue in query:
value = random.randint(1, 100000001)
value = rng.randint(1, 100000001)
# compares random number to the random number bounds for that codon
# adds the appropriate DNA codon (based on the random number) to the optimized query
for codon in random_num_table[residue]:
Expand All @@ -455,7 +461,7 @@ def optimize_sequence(random_num_table, query):
optimmized_query += codon
# generates a random integer between 1 and 100000 and compares it to the random number bounds for the stop codons
# adds the selected stop codon to the optimized DNA sequence
value = random.randint(1, 100000001)
value = rng.randint(1, 100000001)
for codon in random_num_table["Stop"]:
if (
random_num_table["Stop"][codon][0]
Expand Down Expand Up @@ -660,7 +666,13 @@ def get_redundantaa_rn(query):


def adjust_table(
rca_expression_dif, species_expression, et, aa_rn, query_table, multi_table
rca_expression_dif,
species_expression,
et,
aa_rn,
query_table,
multi_table,
seed=None,
):
"""
Adjusts the table in favor of or against species that have a predicted expression different than their target
Expand All @@ -685,6 +697,10 @@ def adjust_table(
encode that residue are the key, and the value is the codon preference after adjusting for species over- or under-
performing.
"""
if seed is not None:
rng = random.Random(seed)
else:
rng = random.Random()
for species in rca_expression_dif:
# when current table is performing worse than the current best table, adjusts the multi_table
# codon preferences in favor of species which currently has an expression difference greater than
Expand All @@ -696,7 +712,7 @@ def adjust_table(
# in the query
while aa_adjusted < 10:
aa_adjusted += 1
v = random.randint(1, 100000001)
v = rng.randint(1, 100000001)
for residue in aa_rn:
new_sum = 0
if aa_rn[residue][0] <= v < aa_rn[residue][1]:
Expand All @@ -722,6 +738,7 @@ def optimize_multitable_sd(
species_expression,
et=0.05,
iterations=1000,
seed=None,
):
"""
iterates upon the multi_table while optimizing the query to select the best-optimized DNA sequence using a sum of
Expand Down Expand Up @@ -756,7 +773,7 @@ def optimize_multitable_sd(
# levels
square_diff = 0
# calls to optimmize the query sequence
optimized_seq = optimize_sequence(rn, query)
optimized_seq = optimize_sequence(rn, query, seed=seed)
# calculates the rca measure of relative expression for each species
rca = calculate_predicted_expression(rca_xyz, optimized_seq)
# initializes a dictionary to store the difference in species expression
Expand All @@ -781,6 +798,7 @@ def optimize_multitable_sd(
aa_rn,
query_table,
multi_table,
seed=seed,
)
# gets a new random number table for the new table
rn = get_multitable_randomnumbers(multi_table)
Expand All @@ -801,6 +819,7 @@ def optimize_multitable_ad(
species_expression,
et=0.05,
iterations=1000,
seed=None,
):
"""
iterates upon the multi_table while optimizing the query to select the best-optimized DNA sequence using an
Expand Down Expand Up @@ -860,6 +879,7 @@ def optimize_multitable_ad(
aa_rn,
query_table,
multi_table,
seed=seed,
)
# gets a new random number table for the new table
rn = get_multitable_randomnumbers(multi_table)
Expand Down
32 changes: 26 additions & 6 deletions optipyzer/optimization.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from typing import Union
from optipyzer.const import DEFAULT_NUM_ITERATIONS
from .oengine import (
from optipyzer.oengine import (
averaged_table,
remove_prohibited_codons,
find_prohibited_codons,
Expand All @@ -8,8 +9,8 @@
optimize_multitable_ad,
optimize_multitable_sd,
)
from .utils import seq_detect, aa_to_dna
from .db.interfaces import calc_codon_usage
from optipyzer.utils import seq_detect, aa_to_dna
from optipyzer.db.interfaces import calc_codon_usage


def _calc_average_table(usage_data: dict, weights: dict):
Expand All @@ -28,6 +29,7 @@ def codon_optimize(
weights: dict = None,
seq_type: str = None,
iterations: int = DEFAULT_NUM_ITERATIONS,
seed: Union[str, int] = None,
):
"""Optimize a sequence given an organism list and a map/dictionary of weights"""
if seq_type is None:
Expand All @@ -51,16 +53,34 @@ def codon_optimize(

peptide_seq, stop_codon = validate_query(seq, (seq_type == "dna"))

# squared difference optimization
(
optimized_sd,
min_difference_sumsquares,
best_expression_sd,
) = optimize_multitable_sd(
average_table, peptide_seq, usage_data, rca_xyz, weights, iterations=iterations
average_table,
peptide_seq,
usage_data,
rca_xyz,
weights,
iterations=iterations,
seed=seed,
)

optimized_ad, min_difference_absvalue, best_expression_ad = optimize_multitable_ad(
average_table, peptide_seq, usage_data, rca_xyz, weights, iterations=iterations
# absolute difference optimization
(
optimized_ad,
min_difference_absvalue,
best_expression_ad,
) = optimize_multitable_ad(
average_table,
peptide_seq,
usage_data,
rca_xyz,
weights,
iterations=iterations,
seed=seed,
)

return {
Expand Down
3 changes: 2 additions & 1 deletion optipyzer/request_models.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Dict, Optional
from typing import Dict, Optional, Union
from fastapi import Query
from pydantic import BaseModel

Expand All @@ -7,6 +7,7 @@ class OptimizeQuery(BaseModel):
seq: str
weights: Dict[str, int]
iterations: Optional[int]
seed: Optional[Union[int, str]]

class Config:
schema_extra = {
Expand Down
2 changes: 2 additions & 0 deletions optipyzer/routers/optimize.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def optimize_dna(query: OptimizeQuery = Depends(verify_dna)):
query.weights,
seq_type="dna",
iterations=(query.iterations or DEFAULT_NUM_ITERATIONS),
seed=query.seed,
)

return result
Expand All @@ -49,6 +50,7 @@ def optimize_protein(query: OptimizeQuery = Depends(verify_protein)):
query.weights,
seq_type="protein",
iterations=(query.iterations or DEFAULT_NUM_ITERATIONS),
seed=query.seed,
)

return result
33 changes: 24 additions & 9 deletions sdk/optipyzer/api.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,20 @@
from typing import Dict
from typing import Dict, Optional, Union
import requests
import time
from .const import LOCAL_SERVER_BASE, PUBLIC_SERVER_BASE, SESSION_HDRS, SLEEP_MIN
from .log import _LOGGER
from .helpers import verify_dna, verify_protein
from sdk.optipyzer.const import (
LOCAL_SERVER_BASE,
PUBLIC_SERVER_BASE,
SESSION_HDRS,
SLEEP_MIN,
)
from sdk.optipyzer.log import _LOGGER
from sdk.optipyzer.helpers import verify_dna, verify_protein

# return types
from requests import Response
from .models import SearchResult
from .const import VALID_SEQ_TYPES
from .models import CodonUsage, OptimizationResult
from sdk.optipyzer.models import SearchResult
from sdk.optipyzer.const import VALID_SEQ_TYPES
from sdk.optipyzer.models import CodonUsage, OptimizationResult


class api:
Expand Down Expand Up @@ -75,7 +80,12 @@ def search(self, name: str, limit: int = 50) -> SearchResult:
return search_results

def optimize(
self, seq: str, weights: Dict[str, int], seq_type: str = "dna"
self,
seq: str,
weights: Dict[str, int],
seq_type: str = "dna",
iterations: Optional[int] = None,
seed: Optional[Union[int, str]] = None,
) -> OptimizationResult:
"""Optimize a sequence given specific organism weights"""
# force seq_type lower
Expand All @@ -93,7 +103,12 @@ def optimize(
result = self._make_request(
f"/optimize/{seq_type}",
method="POST",
body_={"seq": seq, "weights": weights},
body_={
"seq": seq,
"weights": weights,
"iterations": iterations,
"seed": seed,
},
)
return result.json()

Expand Down
38 changes: 38 additions & 0 deletions web/src/components/HyperParameterSelection.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import { Dispatch, FC, SetStateAction } from 'react'

interface Props {
iterations: number | null
setIterations: Dispatch<SetStateAction<number | null>>
seed: number | string | null
setSeed: Dispatch<SetStateAction<number | string | null>>
}

const HyperParameterSelection: FC<Props> = (props) => {
const { iterations, setIterations, seed, setSeed } = props
return (
<div className="flex flex-col md:flex-row">
<div>
<p className="mt-2 text-lg font-bold text-blue-600">
Number of iterations:
</p>
<input
value={iterations || undefined}
onChange={(e) => setIterations(parseInt(e.target.value))}
placeholder="1000"
type="number"
className="p-2 mb-2 text-sm text-gray-600 border border-black rounded-lg shadow-md"
/>
</div>
<div className="md:ml-2">
<p className="mt-2 text-lg font-bold text-blue-600">Random Seed:</p>
<input
value={seed || undefined}
onChange={(e) => setSeed(e.target.value)}
className="p-2 mb-2 text-sm text-gray-600 border border-black rounded-lg shadow-md"
/>
</div>
</div>
)
}

export default HyperParameterSelection
12 changes: 11 additions & 1 deletion web/src/pages/optimize.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,16 @@ import WeightSelector from '@/components/WeightSelector'
import { QueryError, QueryResult, Species, Weights } from '@/..'
import { purgeWeights } from '@/utils/purgeWeights'
import { NextPage } from 'next'
import HyperParameterSelection from '@/components/HyperParameterSelection'

const OptimizePage: NextPage = () => {
// state
const [seqType, setSeqType] = useState<string>('dna')
const [species, setSpecies] = useState<Species[]>([])
const [weights, setWeights] = useState<Weights>({})
const [seq, setSeq] = useState<string>('')
const [iterations, setIterations] = useState<number | null>(null)
const [seed, setSeed] = useState<string | number | null>(null)
const [optimizing, setOptimizing] = useState<boolean>(false)
const [result, setResult] = useState<QueryResult | null>(null)
const [error, setError] = useState<QueryError | null>(null)
Expand All @@ -45,6 +48,8 @@ const OptimizePage: NextPage = () => {
seq: seq,
org_list: species.map((s) => s.id),
weights: weights,
seed: seed,
iterations: iterations,
}
)
.then((res: AxiosResponse<QueryResult>) => {
Expand Down Expand Up @@ -128,7 +133,12 @@ const OptimizePage: NextPage = () => {
Input sequence:
</p>
<SeqInput seq={seq} setSeq={setSeq} seqType={seqType} />

<HyperParameterSelection
iterations={iterations}
setIterations={setIterations}
seed={seed}
setSeed={setSeed}
/>
{optimizing ? (
<button
className="px-4 py-2 my-2 text-lg font-bold text-white transition-all bg-blue-800 border-2 border-blue-800 rounded-lg cursor-wait"
Expand Down
6 changes: 3 additions & 3 deletions web/yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -894,9 +894,9 @@ camelcase@^5.3.1:
integrity sha512-L28STB170nwWS63UjtlEOE3dldQApaJXZkOI1uMFfzf3rRuPegHaHesyee+YxQ+W6SvRDQV6UrdOdRiR153wJg==

caniuse-lite@^1.0.30001271, caniuse-lite@^1.0.30001272, caniuse-lite@^1.0.30001283:
version "1.0.30001312"
resolved "https://registry.yarnpkg.com/caniuse-lite/-/caniuse-lite-1.0.30001312.tgz#e11eba4b87e24d22697dae05455d5aea28550d5f"
integrity sha512-Wiz1Psk2MEK0pX3rUzWaunLTZzqS2JYZFzNKqAiJGiuxIjRPLgV6+VDPOg6lQOUxmDwhTlh198JsTTi8Hzw6aQ==
version "1.0.30001423"
resolved "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001423.tgz"
integrity sha512-09iwWGOlifvE1XuHokFMP7eR38a0JnajoyL3/i87c8ZjRWRrdKo1fqjNfugfBD0UDBIOz0U+jtNhJ0EPm1VleQ==

chalk@^2.0.0, chalk@^2.4.2:
version "2.4.2"
Expand Down

1 comment on commit 1b42882

@vercel
Copy link

@vercel vercel bot commented on 1b42882 Oct 22, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.