Skip to content

Commit

Permalink
v5.1.
Browse files Browse the repository at this point in the history
  • Loading branch information
bastiaanvonmeijenfeldt committed Jun 22, 2020
1 parent 43bd6c1 commit f76afd0
Show file tree
Hide file tree
Showing 12 changed files with 2,669 additions and 2,602 deletions.
58 changes: 33 additions & 25 deletions CAT_pack/CAT
Original file line number Diff line number Diff line change
Expand Up @@ -12,43 +12,49 @@ import summarise


def usage():
message = ('usage: CAT (prepare | contigs | bin | bins | add_names | '
'summarise) [-v / --version] [-h / --help]\n'
'CAT: error: one of the arguments prepare contigs bin bins '
'add_names summarise is required')
message = (
'usage: CAT (prepare | contigs | bin | bins | add_names | '
'summarise) [-v / --version] [-h / --help]\n'
'CAT: error: one of the arguments prepare contigs bin bins '
'add_names summarise is required')

sys.stdout.write('{0}\n'.format(message))

return


def version():
message = ('CAT v{0} ({1}) by {2}.'
''.format(about.__version__, about.__date__, about.__author__))
message = ('CAT v{0} ({1}) by {2}.'.format(
about.__version__, about.__date__, about.__author__))

sys.stdout.write('{0}\n'.format(message))

return


def help():
message = ('usage: CAT (prepare | contigs | bin | bins | add_names | '
'summarise) [-v / --version] [-h / --help]\n\n'
'Run Contig Annotation Tool (CAT) or '
'Bin Annotation Tool (BAT).\n\n'
'Required choice:\n'
' prepare\t\tDownload database files and construct '
'databases.\n'
' contigs\t\tRun CAT.\n'
' bin\t\t\tRun BAT on a single bin.\n'
' bins\t\t\tRun BAT on a set of bins.\n'
' add_names\t\tAdd taxonomic names to CAT or BAT output '
'files.\n'
' summarise\t\tSummarise a named CAT or BAT classification '
'file.\n\n'
'Optional arguments:\n'
' -v, --version\t\tPrint version information and exit.\n'
' -h, --help\t\tShow this help message and exit.')
message = (
'usage: CAT (prepare | contigs | bin | bins | add_names | '
'summarise) [-v / --version] [-h / --help]\n\n'
'Run Contig Annotation Tool (CAT) or '
'Bin Annotation Tool (BAT).\n\n'
'Required choice:\n'
' prepare\t\tDownload database files and construct databases.\n'
' contigs\t\tRun CAT.\n'
' bin\t\t\tRun BAT on a single bin.\n'
' bins\t\t\tRun BAT on a set of bins.\n'
' add_names\t\tAdd taxonomic names to CAT or BAT output files.\n'
' summarise\t\tSummarise a named CAT or BAT classification file.'
'\n\n'
'Optional arguments:\n'
' -v, --version\t\tPrint version information and exit.\n'
' -h, --help\t\tShow this help message and exit.')

sys.stdout.write('{0}\n'.format(message))



return


def main():
if len(sys.argv) == 1:
usage()
Expand All @@ -71,6 +77,8 @@ def main():
else:
usage()

return


if __name__ == '__main__':
main()
4 changes: 2 additions & 2 deletions CAT_pack/about.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env python3

__author__ = 'F. A. Bastiaan von Meijenfeldt'
__version__ = '5.0.5.1'
__date__ = '18 June, 2020'
__version__ = '5.1'
__date__ = '22 June, 2020'
221 changes: 108 additions & 113 deletions CAT_pack/add_names.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#!/usr/bin/env python3

import argparse
import os
import sys

import about
Expand All @@ -11,115 +10,121 @@


def parse_arguments():
parser = argparse.ArgumentParser(prog='CAT add_names',
description='Add taxonomic names to CAT '
'or BAT output files.',
usage='CAT add_names -i -o -t '
'[options] [-h / --help]',
add_help=False)
parser = argparse.ArgumentParser(
prog='CAT add_names',
description='Add taxonomic names to CAT or BAT output files.',
usage='CAT add_names -i -o -t [options] [-h / --help]',
add_help=False)

required = parser.add_argument_group('Required arguments')

required.add_argument('-i',
'--input_file',
dest='input_file',
metavar='',
required=True,
type=str,
help='Path to input file. Can be either '
'classification output file or ORF2LCA output '
'file.')
required.add_argument('-o',
'--output_file',
dest='output_file',
metavar='',
required=True,
type=str,
help='Path to output file.')
required.add_argument('-t',
'--taxonomy_folder',
dest='taxonomy_folder',
metavar='',
required=True,
type=str,
help='Path to folder that contains taxonomy files.')
required.add_argument(
'-i',
'--input_file',
dest='input_file',
metavar='',
required=True,
type=str,
action=shared.PathAction,
help=('Path to input file. Can be either classification output '
'file or ORF2LCA output file.'))
required.add_argument(
'-o',
'--output_file',
dest='output_file',
metavar='',
required=True,
type=str,
action=shared.PathAction,
help='Path to output file.')
required.add_argument(
'-t',
'--taxonomy_folder',
dest='taxonomy_folder',
metavar='',
required=True,
type=str,
action=shared.PathAction,
help='Path to folder that contains taxonomy files.')

optional = parser.add_argument_group('Optional arguments')

optional.add_argument('--only_official',
dest='only_official',
required=False,
action='store_true',
help='Only output official level names.')
optional.add_argument('--exclude_scores',
dest='exclude_scores',
required=False,
action='store_true',
help='Do not include bit-score support scores in '
'the lineage.')
optional.add_argument('--force',
dest='force',
required=False,
action='store_true',
help='Force overwrite existing files.')
optional.add_argument('-q',
'--quiet',
dest='quiet',
required=False,
action='store_true',
help='Suppress verbosity.')
optional.add_argument('-h',
'--help',
action='help',
help='Show this help message and exit.')
optional.add_argument(
'--only_official',
dest='only_official',
required=False,
action='store_true',
help=('Only output official rank names (i.e., superkingdom, '
'phylum, class, order, family, genus, species).'))
optional.add_argument(
'--exclude_scores',
dest='exclude_scores',
required=False,
action='store_true',
help=('Do not include bit-score support scores in the lineage of '
'a classification output file.'))
optional.add_argument(
'--force',
dest='force',
required=False,
action='store_true',
help='Force overwrite existing files.')
optional.add_argument(
'-q',
'--quiet',
dest='quiet',
required=False,
action='store_true',
help='Suppress verbosity.')
optional.add_argument(
'-h',
'--help',
action='help',
help='Show this help message and exit.')

(args, extra_args) = parser.parse_known_args()

extra_args = [arg for (i, arg) in enumerate(extra_args) if
(i, arg) != (0, 'add_names')]
if len(extra_args) > 0:
sys.exit('error: too much arguments supplied:\n{0}'
''.format('\n'.join(extra_args)))
sys.exit('error: too much arguments supplied:\n{0}'.format(
'\n'.join(extra_args)))

# Add extra arguments.
shared.expand_arguments(args)

return args


def add_names(args):
(input_file,
output_file,
taxonomy_folder,
only_official,
exclude_scores,
force,
quiet) = check.convert_arguments(args)
def run():
args = parse_arguments()

# Currently add_names does not allow for a log file.
log_file = None

message = '# CAT v{0}.'.format(about.__version__)
shared.give_user_feedback(message, log_file, quiet, show_time=False)
shared.give_user_feedback(message, args.log_file, args.quiet,
show_time=False)

errors = []

errors.append(check.check_input_file(input_file, log_file, quiet))
errors.append(
check.check_input_file(args.input_file, args.log_file, args.quiet))

if not force:
errors.append(check.check_output_file(output_file, log_file, quiet))
if not args.force:
errors.append(
check.check_output_file(
args.output_file, args.log_file, args.quiet))

if True in errors:
sys.exit(1)

(nodes_dmp,
names_dmp,
prot_accession2taxid_file) = check.inspect_taxonomy_folder(taxonomy_folder)

(taxid2parent, taxid2rank) = tax.import_nodes(nodes_dmp, log_file, quiet)
taxid2name = tax.import_names(names_dmp, log_file, quiet)
(taxid2parent,
taxid2rank) = tax.import_nodes(
args.nodes_dmp, args.log_file, args.quiet)
taxid2name = tax.import_names(args.names_dmp, args.log_file, args.quiet)

message = 'Appending names...'
shared.give_user_feedback(message, log_file, quiet)
shared.give_user_feedback(message, args.log_file, args.quiet)

with open(input_file, 'r') as f1:
with open(args.input_file, 'r') as f1:
for line in f1:
if line.startswith('#'):
line = line.rstrip().split('\t')
Expand All @@ -128,11 +133,9 @@ def add_names(args):
lineage_index = line.index('lineage')
except:
message = ('{0} is not a supported classification file.'
''.format(input_file))
shared.give_user_feedback(message,
log_file,
quiet,
error=True)
''.format(input_file))
shared.give_user_feedback(
message, args.log_file, args.quiet, error=True)

sys.exit(1)

Expand All @@ -145,20 +148,20 @@ def add_names(args):

break
else:
message = ('{0} is not a supported classification file.'
''.format(input_file))
message = ('{0} is not a supported classification file.'.format(
args.input_file))
shared.give_user_feedback(message, log_file, quiet, error=True)

sys.exit(1)

with open(input_file, 'r') as f1, open(output_file, 'w') as outf1:
with open(args.input_file, 'r') as f1, open(args.output_file, 'w') as outf1:
for line in f1:
line = line.rstrip()

if line.startswith('#'):
if only_official:
if args.only_official:
outf1.write('{0}\tsuperkingdom\tphylum\tclass\torder\t'
'family\tgenus\tspecies\n'.format(line))
'family\tgenus\tspecies\n'.format(line))
else:
outf1.write('{0}\tfull lineage names\n'.format(line))

Expand All @@ -173,7 +176,7 @@ def add_names(args):
continue

if (line[1].startswith('no taxid found') or
line[2].startswith('no taxid found')):
line[2].startswith('no taxid found')):
# ORF has database hits but the accession number is not found
# in the taxonomy files.
outf1.write('{0}\n'.format('\t'.join(line)))
Expand All @@ -182,34 +185,26 @@ def add_names(args):

lineage = line[lineage_index].split(';')

if scores_index and not exclude_scores:
if scores_index and not args.exclude_scores:
scores = line[scores_index].split(';')
else:
scores = None

if only_official:
names = tax.convert_to_official_names(lineage,
taxid2rank,
taxid2name,
scores)
if args.only_official:
names = tax.convert_to_official_names(
lineage, taxid2rank, taxid2name, scores)
else:
names = tax.convert_to_names(lineage,
taxid2rank,
taxid2name,
scores)
names = tax.convert_to_names(
lineage, taxid2rank, taxid2name, scores)

outf1.write('{0}\t{1}\n'.format('\t'.join(line), '\t'.join(names)))

message = 'Names written to {0}!'.format(output_file)
shared.give_user_feedback(message, log_file, quiet)


def run():
args = parse_arguments()
message = 'Names written to {0}!'.format(args.output_file)
shared.give_user_feedback(message, args.log_file, args.quiet)

return


add_names(args)


if __name__ == '__main__':
sys.exit('Please run \'CAT add_names\' to add taxonomic names to CAT or '
'BAT output files.')
sys.exit('Run \'CAT add_names\' to add taxonomic names to CAT or BAT '
'output files.')
Loading

0 comments on commit f76afd0

Please sign in to comment.