Skip to content

feat: add wrapper for MEGAHIT #4121

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
May 23, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
feat: add wrapper for MEGAHIT
  • Loading branch information
alienzj committed May 22, 2025
commit c5f43a0ce73c1e7465341cb42abb8c547a69ddc9
7 changes: 7 additions & 0 deletions bio/megahit/environment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
channels:
- conda-forge
- bioconda
- nodefaults
dependencies:
- megahit =1.2.9
- snakemake-wrapper-utils =0.7.2
23 changes: 23 additions & 0 deletions bio/megahit/meta.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
name: "megahit"

url: https://github.com/voutcn/megahit

description: |
MEGAHIT is an ultra-fast and memory-efficient NGS assembler. It is optimized for metagenomes, but also works well on generic single genome assembly (small or mammalian size) and single-cell assembly.
Input options that can be specified for multiple times (supporting plain text and gz/bz2 extensions).

input:
- reads: list of reads in FASTQ format
- r1: forward reads
- r2: reverse reads
- interleaved: interleaved reads
- unpaired: unpaired reads

output:
- contigs: output file with contigs
- log: log file
- json: options json file

authors:
- Jie Zhu
- Filipe G. Vieira
29 changes: 29 additions & 0 deletions bio/megahit/test/Snakefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
container: "docker://continuumio/miniconda3:4.4.10"


rule run_megahit:
input:
reads=["test_reads/sample1_R1.fastq.gz", "test_reads/sample1_R2.fastq.gz"],
output:
contigs="assembly/contigs.fasta",
benchmark:
"logs/benchmarks/assembly/megahit.txt"
params:
# all parameters are optional
extra="--min-count 10 --k-list 21,29,39,59,79,99,119,141",
log:
"logs/megahit.log",
threads: 8
resources:
mem_mb=250000,
wrapper:
"master/bio/megahit"


rule download_test_reads:
output:
["test_reads/sample1_R1.fastq.gz", "test_reads/sample1_R2.fastq.gz"],
log:
"logs/download.log",
shell:
"(wget -O - https://zenodo.org/record/3992790/files/test_reads.tar.gz | tar -xzf -) > {log} 2>&1"
73 changes: 73 additions & 0 deletions bio/megahit/wrapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
"""Snakemake wrapper for megahit."""

__author__ = "Jie Zhu @alienzj"
__copyright__ = "Copyright 2025, Jie Zhu"
__email__ = "[email protected]"
__license__ = "MIT"

import os, tempfile, shutil
from snakemake.shell import shell
from snakemake_wrapper_utils.snakemake import get_mem

# get output_dir and files from output
output_dir = os.path.split(snakemake.output[0])[0]
contigs_file = snakemake.output.get("contigs", os.path.join(output_dir, "contigs.fa"))
contigs_file_original = os.path.join(output_dir, "final.contigs.fa")
options_file = snakemake.output.get("options", os.path.join(output_dir, "options.json"))
log_file = snakemake.output.get("log", os.path.join(output_dir, "log"))

# parse params
extra = snakemake.params.get("extra", "")
log = snakemake.log_fmt_shell(stdout=True, stderr=True)
memory_requirements = get_mem(snakemake, out_unit="KiB") * 1024

# parse short reads
if hasattr(snakemake.input, "reads"):
reads = snakemake.input.reads
else:
reads = snakemake.input

input_arg = ""

# handle named inputs if available
if hasattr(snakemake.input, "r1") and hasattr(snakemake.input, "r2"):
input_arg += " -1 {} -2 {} ".format(snakemake.input.r1, snakemake.input.r2)
elif len(reads) >= 2:
input_arg += " -1 {} -2 {} ".format(reads[0], reads[1])

# handle interleaved reads if specified
if hasattr(snakemake.input, "interleaved"):
input_arg += " --12 {} ".format(snakemake.input.interleaved)
elif len(reads) >= 3 and not hasattr(snakemake.input, "r1"):
input_arg += " --12 {} ".format(reads[2])

# handle additional reads if specified
if hasattr(snakemake.input, "unpaired"):
input_arg += " --read {} ".format(snakemake.input.unpaired)
elif len(reads) >= 4 and not hasattr(snakemake.input, "r1"):
input_arg += " --read {} ".format(reads[3])


with tempfile.TemporaryDirectory(dir=os.path.dirname(output_dir)) as temp_dir:
output_temp_dir = os.path.join(temp_dir, "temp")

shell(
"megahit "
" -t {snakemake.threads} "
" -m {memory_requirements} "
" -o {output_temp_dir} "
" {input_arg} "
" {extra} "
" > {snakemake.log[0]} 2>&1 "
)

if os.path.exists(os.path.join(output_temp_dir, "done")):
shell("rm -rf {output_dir}")
shutil.move(output_temp_dir, output_dir)

if (
os.path.exists(contigs_file_original)
and os.path.exists(options_file)
and os.path.exists(log_file)
):
shutil.move(contigs_file_original, contigs_file)
14 changes: 14 additions & 0 deletions test_wrappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -6398,6 +6398,20 @@ def test_metaspades(run):
)


def test_megahit(run):
run(
"bio/megahit",
[
"snakemake",
"run_megahit",
"--cores",
"2",
"--use-conda",
"-F",
],
)


def test_verifybamid2(run):
run(
"bio/verifybamid/verifybamid2",
Expand Down
Loading