diff --git a/README.txt b/README.txt index 6560a0024..9b007dd59 100644 --- a/README.txt +++ b/README.txt @@ -1,17 +1,17 @@ + +.. image:: http://genomicsandhealth.org/files/logo_ga.png + ============================== GA4GH Reference Implementation ============================== -A reference implementation of the APIs defined in the schemas repository. - -************************* -Initial skeleton overview -************************* +This is a prototype for the GA4GH reference client and +server applications. It is under heavy development, and many aspects of +the layout and APIs will change as requirements are better understood. +If you would like to help, please check out our list of +`issues `_! -This is a proposed skeleton layout for the GA4GH reference client and -server applications. As such, nothing is finalised and all aspects of -the design and implementation are open for discussion and debate. The overall -goals of the project are: +Our aims for this implementation are: Simplicity/clarity The main goal of this implementation is to provide an easy to understand @@ -36,45 +36,15 @@ Ease of use make installing the ``ga4gh`` reference code very easy across a range of operating systems. - - -************* -Trying it out -************* - -The project is designed to be published as a `PyPI `_ -package, so ultimately installing the reference client and server programs -should be as easy as:: - - $ pip install ga4gh - -However, the code is currently only a proposal, so it has not been uploaded to -the Python package index. The best way to try out the code right now is to -use `virtualenv `_. After cloning -the git repo, and changing to the project directory, do the following:: - - $ virtualenv testenv - $ source testenv/bin/activate - $ python setup.py install - -This should install the ``ga4gh_server`` and ``ga4gh_client`` scripts into the -virtualenv and update your ``PATH`` so that they are available. When you have -finished trying out the programs you can leave the virtualenv using:: - - $ deactivate - -The virtualenv can be restarted at any time, and can also be deleted -when you no longer need it. - ******************************** Serving variants from a VCF file ******************************** -Two implementations of the variants API is available that can serve data based -on existing VCF files. This backends are based on tabix and `wormtable -`_, which is a Python library -to handle large scale tabular data. See `Wormtable backend`_ for instructions -on serving VCF data from the GA4GH API. +Two implementations of the variants API are available that can serve data based +on existing VCF files. These backends are based on tabix and `wormtable +`_, which is a Python library to +handle large scale tabular data. See `Wormtable backend`_ for instructions on +serving VCF data from the GA4GH API. ***************** Wormtable backend @@ -159,38 +129,41 @@ building and indexing such large tables. Tabix backend ***************** -The tabix backend allows us to serve variants from an arbitrary VCF file. -The VCF file must first be indexed with `tabix `_. -Many projects, including the `1000 genomes project -`_, release files with tabix -indicies already precomputed. This backend can serve such datasets without any -preprocessing via the command: +The tabix backend allows us to serve variants from an arbitrary VCF file. The +VCF file must first be indexed with `tabix +`_. Many projects, including the +`1000 genomes project +`_, release files +with tabix indicies already precomputed. This backend can serve such datasets +without any preprocessing via the command:: - $ python ga4gh/scripts/server.py tabix DATADIR + $ ga4gh_server tabix DATADIR -where DATADIR is a directory that contains folders of tabix-indexed VCF file(s). There cannot -be more than one VCF file in any subdirectory that has data for the same reference contig. +where DATADIR is a directory that contains subdirectories of tabix-indexed VCF +file(s). There cannot be more than one VCF file in any subdirectory that has +data for the same reference contig. ****** Layout ****** -The code for the project is held in the ``ga4gh`` package, which corresponds -to the ``ga4gh`` directory in the project root. Within this package, -the functionality is split between the ``client``, ``server`` and -``protocol`` modules. There is also a subpackage called ``scripts`` -which holds the code defining the command line interfaces for the +The code for the project is held in the ``ga4gh`` package, which corresponds to +the ``ga4gh`` directory in the project root. Within this package, the +functionality is split between the ``client``, ``server``, ``protocol`` and +``cli`` modules. The ``cli`` module contains the definitions for the ``ga4gh_client`` and ``ga4gh_server`` programs. -For development purposes, it is useful to be able to run the command -line programs directly without installing them. To do this, make hard links -to the files in the scripts directory to the project root and run them -from there; e.g:: +For development purposes, it is useful to be able to run the command line +programs directly without installing them. To do this, use the +``server_dev.py`` and ``client_dev.py`` scripts. (These are just shims to +facilitate development, and are not intended to be distributed. The +distributed versions of the programs are packaged using the setuptools +``entry_point`` key word; see ``setup.py`` for details). For example, the run +the server command simply run:: - $ ln ga4gh/scripts/server.py . - $ python server.py - usage: server.py [-h] [--port PORT] [--verbose] {help,simulate} ... - server.py: error: too few arguments + $ python server_dev.py + usage: server_dev.py [-h] [--port PORT] [--verbose] {help,wormtable,tabix} ... + server_dev.py: error: too few arguments ++++++++++++ Coding style @@ -199,6 +172,13 @@ Coding style The code follows the guidelines of `PEP 8 `_ in most cases. The only notable difference is the use of camel case over underscore delimited identifiers; this -is done for consistency with the GA4GH API. The code was checked for compliance +is done for consistency with the GA4GH API. Code should be checked for compliance using the `pep8 `_ tool. + +********** +Deployment +********** + +*TODO* Give simple instructions for deploying the server on common platforms +like Apache and Nginx. diff --git a/client_dev.py b/client_dev.py new file mode 100644 index 000000000..da8848159 --- /dev/null +++ b/client_dev.py @@ -0,0 +1,7 @@ +""" +Simple shim for running the client program during development. +""" +import ga4gh.cli + +if __name__ == "__main__": + ga4gh.cli.client_main() diff --git a/ga4gh/scripts/client.py b/ga4gh/cli.py similarity index 67% rename from ga4gh/scripts/client.py rename to ga4gh/cli.py index 1f5b2b1a8..1d9248b14 100644 --- a/ga4gh/scripts/client.py +++ b/ga4gh/cli.py @@ -1,5 +1,7 @@ """ -Command line interface for the ga4gh reference implementation. +Command line interface programs for the GA4GH reference implementation. + +TODO: document how to use these for development and simple deployment. """ from __future__ import division from __future__ import print_function @@ -7,11 +9,94 @@ import time import argparse +import werkzeug.serving import ga4gh +import ga4gh.server import ga4gh.client import ga4gh.protocol +############################################################################## +# Server +############################################################################## + +class ServerRunner(object): + """ + Superclass of server runner; takes care of functionality common to + all backends. + """ + def __init__(self, args): + backend = self.getBackend(args) + self._port = args.port + self._httpHandler = ga4gh.server.HTTPHandler(backend) + + def run(self): + werkzeug.serving.run_simple( + '', self._port, self._httpHandler.wsgiApplication, + use_reloader=True) + + +class WormtableRunner(ServerRunner): + """ + Runner class to run the server using the wormtable based backend. + """ + def getBackend(self, args): + backend = ga4gh.server.WormtableBackend(args.dataDir) + return backend + + +class TabixRunner(ServerRunner): + """ + Runner class to start the server using a tabix backend. + """ + def getBackend(self, args): + backend = ga4gh.server.TabixBackend(args.dataDir) + return backend + + +def server_main(): + parser = argparse.ArgumentParser(description="GA4GH reference server") + # Add global options + parser.add_argument( + "--port", "-P", default=8000, type=int, + help="The port to listen on") + parser.add_argument('--verbose', '-v', action='count', default=0) + subparsers = parser.add_subparsers(title='subcommands',) + + # help + helpParser = subparsers.add_parser( + "help", + description="ga4gh_server help", + help="show this help message and exit") + # Wormtable backend + wtbParser = subparsers.add_parser( + "wormtable", + description="Serve the API using a wormtable based backend.", + help="Serve data from tables.") + wtbParser.add_argument( + "dataDir", + help="The directory containing the wormtables to be served.") + wtbParser.set_defaults(runner=WormtableRunner) + # Tabix + tabixParser = subparsers.add_parser( + "tabix", + description="Serve the API using a tabix based backend.", + help="Serve data from Tabix indexed VCFs") + tabixParser.add_argument( + "dataDir", + help="The directory containing VCFs") + tabixParser.set_defaults(runner=TabixRunner) + + args = parser.parse_args() + if "runner" not in args: + parser.print_help() + else: + runner = args.runner(args) + runner.run() + +############################################################################## +# Client +############################################################################## class VariantSetSearchRunner(object): """ @@ -130,7 +215,7 @@ def addUrlArgument(parser): parser.add_argument("baseUrl", help="The URL of the API endpoint") -def main(): +def client_main(): parser = argparse.ArgumentParser(description="GA4GH reference client") # Add global options parser.add_argument('--verbose', '-v', action='count', default=0) @@ -173,6 +258,3 @@ def main(): else: runner = args.runner(args) runner.run() - -if __name__ == "__main__": - main() diff --git a/ga4gh/scripts/__init__.py b/ga4gh/scripts/__init__.py deleted file mode 100644 index 53fb90199..000000000 --- a/ga4gh/scripts/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -""" -Package holding code for executable scripts. -""" diff --git a/ga4gh/scripts/server.py b/ga4gh/scripts/server.py deleted file mode 100644 index 29d7abc70..000000000 --- a/ga4gh/scripts/server.py +++ /dev/null @@ -1,91 +0,0 @@ -""" -Command line interface for the ga4gh reference implementation. -""" -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import argparse - -import ga4gh -import ga4gh.server - -import werkzeug.serving - - -class ServerRunner(object): - """ - Superclass of server runner; takes care of functionality common to - all backends. - """ - def __init__(self, args): - backend = self.getBackend(args) - self._port = args.port - self._httpHandler = ga4gh.server.HTTPHandler(backend) - - def run(self): - werkzeug.serving.run_simple( - '', self._port, self._httpHandler.wsgiApplication, - use_reloader=True) - - -class WormtableRunner(ServerRunner): - """ - Runner class to run the server using the wormtable based backend. - """ - def getBackend(self, args): - backend = ga4gh.server.WormtableBackend(args.dataDir) - return backend - - -class TabixRunner(ServerRunner): - """ - Runner class to start the server using a tabix backend. - """ - def getBackend(self, args): - backend = ga4gh.server.TabixBackend(args.dataDir) - return backend - - -def main(): - parser = argparse.ArgumentParser(description="GA4GH reference server") - # Add global options - parser.add_argument( - "--port", "-P", default=8000, type=int, - help="The port to listen on") - parser.add_argument('--verbose', '-v', action='count', default=0) - subparsers = parser.add_subparsers(title='subcommands',) - - # help - helpParser = subparsers.add_parser( - "help", - description="ga4gh_server help", - help="show this help message and exit") - # Wormtable backend - wtbParser = subparsers.add_parser( - "wormtable", - description="Serve the API using a wormtable based backend.", - help="Serve data from tables.") - wtbParser.add_argument( - "dataDir", - help="The directory containing the wormtables to be served.") - wtbParser.set_defaults(runner=WormtableRunner) - # Tabix - tabixParser = subparsers.add_parser( - "tabix", - description="Serve the API using a tabix based backend.", - help="Serve data from Tabix indexed VCFs") - tabixParser.add_argument( - "dataDir", - help="The directory containing VCFs") - tabixParser.set_defaults(runner=TabixRunner) - - args = parser.parse_args() - if "runner" not in args: - parser.print_help() - else: - runner = args.runner(args) - runner.run() - -if __name__ == "__main__": - main() diff --git a/server_dev.py b/server_dev.py new file mode 100644 index 000000000..bc2d04bc3 --- /dev/null +++ b/server_dev.py @@ -0,0 +1,7 @@ +""" +Simple shim for running the server program during development. +""" +import ga4gh.cli + +if __name__ == "__main__": + ga4gh.cli.server_main() diff --git a/setup.py b/setup.py index ab7775b0c..8f9c9c4bf 100644 --- a/setup.py +++ b/setup.py @@ -33,14 +33,14 @@ def parse_version(module_file): name="ga4gh", version=ga4gh_version, long_description=ga4gh_readme, - packages=["ga4gh", "ga4gh.scripts"], + packages=["ga4gh"], author="AUTHOR FIXME", author_email="FIXME@somewhere.org", url="http://pypi.python.org/pypi/ga4gh", entry_points={ 'console_scripts': [ - 'ga4gh_client=ga4gh.scripts.client:main', - 'ga4gh_server=ga4gh.scripts.server:main', + 'ga4gh_client=ga4gh.cli:client_main', + 'ga4gh_server=ga4gh.cli:server_main', ] }, install_requires=requirements,