Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add plass mapping rate to megahit table #13

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 18 additions & 2 deletions paper/appendix.tex
Original file line number Diff line number Diff line change
Expand Up @@ -392,9 +392,9 @@ \subsection{K-mer inclusion of reads by MEGAHIT assemblies}
\emph{M. bacterium 39\_7}, had the smallest query neighborhood.

\begin{table}
\begin{tabular}{l c}
\begin{tabular}{l c c}
\toprule
Species & MEGAHIT assembly containment \\
Species & MEGAHIT assembly containment & PLASS mapping \\
\midrule
% generated from notebook 'figures/megahit-assembly-inclusion.ipynb'
\input{tables/megahit-inclusion.table.tex}
Expand Down Expand Up @@ -557,3 +557,19 @@ \subsection{Other genes}
\caption{Bin and neighborhood rpsC protein content.}
\label{tab:rpsC}
\end{table}

\newpage

\begin{table}
\begin{tabular}{l c c}
\toprule
Species & MEGAHIT assembly containment & PLASS mapping \\
\midrule
% generated from notebook 'figures/megahit-assembly-inclusion.ipynb'
\input{tables/megahit-inclusion.table.tex}
% \input{tables/plass-genes/rpsC-table.tex}
\bottomrule
\end{tabular}
\caption{Containment of neighborhood k-mer content in MEGAHIT nucleotide assemblies.}
\label{tab:kmer_inclusion}
\end{table}
277 changes: 272 additions & 5 deletions paper/figures/megahit-assembly-inclusion.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -20,7 +20,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -33,13 +33,39 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'hu-genome19': 'M. bacterium 39_7', 'hu-genome20': 'P. acetatigenes isolate 50_10', 'hu-genome21': 'WS6 bacterium 34_10', 'hu-genome22': 'Methanobacterium sp. 42_16', 'hu-genome23': 'C. bacterium 38_11', 'hu-genome24': 'Methanocalculus sp. 52_23', 'hu-genome25': 'A. bacterium 49_20', 'hu-genome26': 'M. infera isolate 46_47', 'hu-genome27': 'WS6 bacterium 36_33', 'hu-genome28': 'P. bacterium 34_609', 'hu-genome29': 'Desulfotomaculum sp. 46_80', 'hu-genome30': 'M. marisnigri isolate 62_101', 'hu-genome31': 'S. bacterium 57_84', 'hu-genome32': 'B. bacterium', 'hu-genome33': 'S. bacterium 53_16', 'hu-genome34': 'A. bacterium 34_128', 'hu-genome35': 'M. bacterium 46_47', 'hu-genome36': 'Desulfotomaculum sp. 46_296', 'hu-genome37': 'TA06 bacterium 32_111', 'hu-genome38': 'P. bacterium 33_209', 'hu-genome39': 'A. bacterium 66_15', 'hu-genome40': 'A. thermophila isolate 46_16', 'hu-genome41': 'M. harundinacea isolate 57_489'}\n"
]
}
],
"source": [
"print(names_d)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"df = pandas.read_csv('megahit-containment.csv')"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"df2 = pandas.read_csv('/tmp/hu-plass-mapped.csv')"
]
},
{
"cell_type": "code",
"execution_count": 19,
Expand Down Expand Up @@ -326,7 +352,246 @@
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>genome</th>\n",
" <th>plass_map</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>genome19</td>\n",
" <td>95.6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>genome20</td>\n",
" <td>97.1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>genome21</td>\n",
" <td>96.1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>genome22</td>\n",
" <td>95.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>genome23</td>\n",
" <td>96.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>genome24</td>\n",
" <td>97.6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>genome25</td>\n",
" <td>96.9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>genome26</td>\n",
" <td>98.3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>genome27</td>\n",
" <td>98.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>genome28</td>\n",
" <td>97.9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>genome29</td>\n",
" <td>97.6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>genome30</td>\n",
" <td>97.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>genome31</td>\n",
" <td>97.9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>genome32</td>\n",
" <td>94.9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>genome33</td>\n",
" <td>98.1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>genome34</td>\n",
" <td>93.4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>genome35</td>\n",
" <td>98.4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>genome36</td>\n",
" <td>97.7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>genome37</td>\n",
" <td>98.8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>genome38</td>\n",
" <td>97.6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>genome39</td>\n",
" <td>96.9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>genome40</td>\n",
" <td>95.9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>genome41</td>\n",
" <td>97.8</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" genome plass_map\n",
"0 genome19 95.6\n",
"1 genome20 97.1\n",
"2 genome21 96.1\n",
"3 genome22 95.2\n",
"4 genome23 96.0\n",
"5 genome24 97.6\n",
"6 genome25 96.9\n",
"7 genome26 98.3\n",
"8 genome27 98.2\n",
"9 genome28 97.9\n",
"10 genome29 97.6\n",
"11 genome30 97.2\n",
"12 genome31 97.9\n",
"13 genome32 94.9\n",
"14 genome33 98.1\n",
"15 genome34 93.4\n",
"16 genome35 98.4\n",
"17 genome36 97.7\n",
"18 genome37 98.8\n",
"19 genome38 97.6\n",
"20 genome39 96.9\n",
"21 genome40 95.9\n",
"22 genome41 97.8"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"plass_d = {}\n",
"for row in df2.itertuples():\n",
" plass_d['hu-' + row.genome] = float(row.plass_map)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'hu-genome19': 95.6,\n",
" 'hu-genome20': 97.1,\n",
" 'hu-genome21': 96.1,\n",
" 'hu-genome22': 95.2,\n",
" 'hu-genome23': 96.0,\n",
" 'hu-genome24': 97.6,\n",
" 'hu-genome25': 96.9,\n",
" 'hu-genome26': 98.3,\n",
" 'hu-genome27': 98.2,\n",
" 'hu-genome28': 97.9,\n",
" 'hu-genome29': 97.6,\n",
" 'hu-genome30': 97.2,\n",
" 'hu-genome31': 97.9,\n",
" 'hu-genome32': 94.9,\n",
" 'hu-genome33': 98.1,\n",
" 'hu-genome34': 93.4,\n",
" 'hu-genome35': 98.4,\n",
" 'hu-genome36': 97.7,\n",
" 'hu-genome37': 98.8,\n",
" 'hu-genome38': 97.6,\n",
" 'hu-genome39': 96.9,\n",
" 'hu-genome40': 95.9,\n",
" 'hu-genome41': 97.8}"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"plass_d"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -338,8 +603,10 @@
" abbreviation = names_d[num]\n",
" \n",
" cont = '{:.1f}\\\\%'.format(containment * 100)\n",
" plass_num = plass_d[num]\n",
" plass_m = '{:.1f}\\\\%'.format(plass_num)\n",
"\n",
" print(abbreviation.replace('_', '\\\\_'), '&', cont, '\\\\\\\\', file=fp)\n",
" print(abbreviation.replace('_', '\\\\_'), '&', cont, '&', plass_m, '\\\\\\\\', file=fp)\n",
"\n",
"#latex_output(recA, open('recA-table.tex', 'wt'))\n",
"latex_output(df, open('../tables/megahit-inclusion.table.tex', 'wt'))"
Expand Down
46 changes: 23 additions & 23 deletions paper/tables/megahit-inclusion.table.tex
Original file line number Diff line number Diff line change
@@ -1,23 +1,23 @@
M. harundinacea isolate 57\_489 & 4.2\% \\
Desulfotomaculum sp. 46\_296 & 12.7\% \\
M. marisnigri isolate 62\_101 & 13.6\% \\
S. bacterium 57\_84 & 19.4\% \\
P. bacterium 34\_609 & 19.7\% \\
A. bacterium 66\_15 & 20.5\% \\
Desulfotomaculum sp. 46\_80 & 24.1\% \\
P. bacterium 33\_209 & 26.3\% \\
S. bacterium 53\_16 & 30.9\% \\
A. bacterium 49\_20 & 31.9\% \\
Methanocalculus sp. 52\_23 & 33.4\% \\
M. bacterium 46\_47 & 36.6\% \\
P. acetatigenes isolate 50\_10 & 36.6\% \\
A. bacterium 34\_128 & 36.8\% \\
M. infera isolate 46\_47 & 38.0\% \\
Methanobacterium sp. 42\_16 & 38.0\% \\
A. thermophila isolate 46\_16 & 38.6\% \\
TA06 bacterium 32\_111 & 44.1\% \\
C. bacterium 38\_11 & 44.4\% \\
WS6 bacterium 34\_10 & 53.2\% \\
WS6 bacterium 36\_33 & 53.8\% \\
B. bacterium & 54.2\% \\
M. bacterium 39\_7 & 55.7\% \\
M. harundinacea isolate 57\_489 & 4.2\% & 97.8\% \\
Desulfotomaculum sp. 46\_296 & 12.7\% & 97.7\% \\
M. marisnigri isolate 62\_101 & 13.6\% & 97.2\% \\
S. bacterium 57\_84 & 19.4\% & 97.9\% \\
P. bacterium 34\_609 & 19.7\% & 97.9\% \\
A. bacterium 66\_15 & 20.5\% & 96.9\% \\
Desulfotomaculum sp. 46\_80 & 24.1\% & 97.6\% \\
P. bacterium 33\_209 & 26.3\% & 97.6\% \\
S. bacterium 53\_16 & 30.9\% & 98.1\% \\
A. bacterium 49\_20 & 31.9\% & 96.9\% \\
Methanocalculus sp. 52\_23 & 33.4\% & 97.6\% \\
M. bacterium 46\_47 & 36.6\% & 98.4\% \\
P. acetatigenes isolate 50\_10 & 36.6\% & 97.1\% \\
A. bacterium 34\_128 & 36.8\% & 93.4\% \\
M. infera isolate 46\_47 & 38.0\% & 98.3\% \\
Methanobacterium sp. 42\_16 & 38.0\% & 95.2\% \\
A. thermophila isolate 46\_16 & 38.6\% & 95.9\% \\
TA06 bacterium 32\_111 & 44.1\% & 98.8\% \\
C. bacterium 38\_11 & 44.4\% & 96.0\% \\
WS6 bacterium 34\_10 & 53.2\% & 96.1\% \\
WS6 bacterium 36\_33 & 53.8\% & 98.2\% \\
B. bacterium & 54.2\% & 94.9\% \\
M. bacterium 39\_7 & 55.7\% & 95.6\% \\