diff --git a/README.md b/README.md
index 6b38e55..7dec7cc 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # sbx_gene_family
 
-Reads-level based alignment to gene clusters of interest, e.g. bai operon or butyrate producing genes. Please refer to [sunbeam_database](https://github.com/zhaoc1/sunbeam_databases.git) for details. 
+This extension can be used to perform functional mapping, i.e. mapping metagenomic reads to proteins. The database to map against could be [UniRef50](https://www.uniprot.org/downloads), all prokaryotic proteins from [KEGG](https://www.kegg.jp/kegg/download/), or more targeted [databases](https://github.com/zhaoc1/sunbeam_databases.git), e.g. bai operon or butyrate producing genes.
 
 Take [**UniRef50** database](https://www.uniprot.org/downloads) as an example. First download the uniref50.fasta into your current `sunbeam_output/mapping/sbx_gene_family/databases/`.
 
@@ -34,7 +34,7 @@ Take [**UniRef50** database](https://www.uniprot.org/downloads) as an example. F
   
  4. Run time
 
- - Use diamond
+By default, mapping uses DIAMOND, but this extension also supports using BLAST.
  
   ```bash
   sunbeam run -- --configfile sunbeam_config.yml all_gene_family
diff --git a/config.yml b/config.yml
index d3fe4e8..35c3361 100644
--- a/config.yml
+++ b/config.yml
@@ -1,6 +1,6 @@
 sbx_gene_clusters:
   threads: 8
   genes_fp: "{PROJECT_FP}"/mapping/sbx_gene_family/database
-  evalue: 1e-5
+  evalue: 1e-5 #  this value is good for read lengths ~126 bp but should be bigger for shorter reads, e.g. 1e-3 for 75bp reads
   alnLen: 30
-  mismatch: 3
\ No newline at end of file
+  mismatch: 3
diff --git a/sbx_gene_clusters.rules b/sbx_gene_clusters.rules
index e3d8161..77abfbf 100644
--- a/sbx_gene_clusters.rules
+++ b/sbx_gene_clusters.rules
@@ -5,16 +5,17 @@
 #   Rules for Diamond or BLASTx reads against protein databases
 
 import csv
+import gzip
 import os
 from collections import Counter, OrderedDict
 
-#GENES_DIR = Cfg['sbx_gene_clusters']['genes_fp']
-#GENES_KEY = [PurePath(f.name).stem for f in GENES_DIR.glob('*.fasta')]
-#GENES_VAL = [str(GENES_DIR) + '/' + g+'.fasta' for g in GENES_KEY]
-#GENES_DICT = dict(zip(GENES_KEY, GENES_VAL))
+GENES_DIR = Cfg['sbx_gene_clusters']['genes_fp']
+GENES_KEY = [PurePath(f.name).stem for f in GENES_DIR.glob('*.fasta')]
+GENES_VAL = [str(GENES_DIR) + '/' + g+'.fasta' for g in GENES_KEY]
+GENES_DICT = dict(zip(GENES_KEY, GENES_VAL))
 
-#TARGET_GENES = expand(str(MAPPING_FP/'sbx_gene_family'/'{gene}'/'{sample}_1.txt'), 
-#                     gene=GENES_DICT.keys(), sample=Samples.keys())
+TARGET_GENES = expand(str(MAPPING_FP/'sbx_gene_family'/'{gene}'/'{sample}_1.txt'), 
+                     gene=GENES_DICT.keys(), sample=Samples.keys())
 
 rule all_gene_family:
     input:
@@ -72,8 +73,15 @@ def write_gene_hits(in_fp, out_fp, db_annot_fp, evalue, alnLen, mismatch):
     
     ## Read in the alignment file to count
     counter_genes = Counter()
+    
+    # unzip if necessary
+    if os.path.splitext(in_fp)[1] == ".gz":
+        open_fxn = lambda in_fp: gzip.open(in_fp, mode="rt")
+    else:
+        open_fxn = lambda in_fp: open(in_fp)
+        
     if os.path.getsize(in_fp) > 0:    
-        with open(in_fp) as in_file:
+        with open_fxn(in_fp) as in_file:
             reader = csv.reader(in_file, delimiter='\t') # initialize the csv reader
             aln_chunk = [next(reader)] #initialize the seed chunk
         
@@ -103,6 +111,8 @@ rule gene_hits:
         evalue = float(Cfg['sbx_gene_clusters']['evalue']),
         alnLen = Cfg['sbx_gene_clusters']['alnLen'],
         mismatch = Cfg['sbx_gene_clusters']['mismatch']
+    resources:
+        mem_mb=8192
     run:
         write_gene_hits(input.aln_fp, output[0], input.db_annot_fp[0], params.evalue, params.alnLen, params.mismatch)
 
@@ -155,19 +165,25 @@ rule fq_2_fa:
 
 rule diamond_reads:
     input:
-        read = str(MAPPING_FP/'R1'/'{sample}_1.fasta'),
+        # input can be fastq.gz
+        read = str(QC_FP/'decontam'/'{sample}_1.fastq.gz'),
         db = expand(str(GENES_DIR/'{{gene}}.fasta.{index}'), index=['dmnd'])
     output:
-        str(MAPPING_FP/'sbx_gene_family'/'{gene}'/'{sample}_1.m8')
+        str(MAPPING_FP/'sbx_gene_family'/'{gene}'/'{sample}_1.m8.gz')
     threads:
         Cfg['sbx_gene_clusters']['threads']
+    resources:
+        mem_mb=16384
+    params:
+        evalue=Cfg['sbx_gene_clusters']['evalue']
     shell:
         """
         diamond blastx \
             --db {input.db} --query {input.read} \
-            --threads {threads} --evalue 1e-6 \
+            --threads {threads} --evalue {params.evalue} \
             --max-target-seqs 0 \
             --out {output} \
+            --compress 1 \
             --outfmt 6 qseqid sseqid pident qlen slen length mismatch gapopen qstart qend sstart send evalue bitscore
         """