diff --git a/.gitignore b/.gitignore
new file mode 100644
index 000000000..cd86de2ee
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,12 @@
+/build
+*.pyc
+/src/_compress.c
+*.egg
+*.so
+*.egg-info
+
+# Coverage
+htmlcov
+.coverage
+coverage.xml
+junit.xml
diff --git a/.project b/.project
new file mode 100644
index 000000000..423105518
--- /dev/null
+++ b/.project
@@ -0,0 +1,109 @@
+
+
+ arctic
+
+
+
+
+
+ org.python.pydev.PyDevBuilder
+
+
+
+
+
+ org.python.pydev.pythonNature
+
+
+
+ 1321466330118
+
+ 10
+
+ org.eclipse.ui.ide.multiFilter
+ 1.0-name-matches-false-false-*.egg-info
+
+
+
+ 1321466330219
+
+ 14
+
+ org.eclipse.ui.ide.multiFilter
+ 1.0-name-matches-false-false-*.egg
+
+
+
+ 1321466330229
+
+ 26
+
+ org.eclipse.ui.ide.multiFilter
+ 1.0-name-matches-false-false-__pycache__
+
+
+
+ 1321466330237
+
+ 6
+
+ org.eclipse.ui.ide.multiFilter
+ 1.0-name-matches-false-false-.pydevproject
+
+
+
+ 1321466330246
+
+ 22
+
+ org.eclipse.ui.ide.multiFilter
+ 1.0-name-matches-false-false-*.pyc
+
+
+
+ 1321466330785
+
+ 10
+
+ org.eclipse.ui.ide.multiFilter
+ 1.0-name-matches-false-false-*.egg-info
+
+
+
+ 1321466330808
+
+ 14
+
+ org.eclipse.ui.ide.multiFilter
+ 1.0-name-matches-false-false-*.egg
+
+
+
+ 1321466330816
+
+ 26
+
+ org.eclipse.ui.ide.multiFilter
+ 1.0-name-matches-false-false-__pycache__
+
+
+
+ 1321466330826
+
+ 6
+
+ org.eclipse.ui.ide.multiFilter
+ 1.0-name-matches-false-false-.pydevproject
+
+
+
+ 1321466330835
+
+ 22
+
+ org.eclipse.ui.ide.multiFilter
+ 1.0-name-matches-false-false-*.pyc
+
+
+
+
diff --git a/.pydevproject b/.pydevproject
new file mode 100644
index 000000000..fa5607f9d
--- /dev/null
+++ b/.pydevproject
@@ -0,0 +1,8 @@
+
+
+Default
+python 2.7
+
+/arctic
+
+
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 000000000..e9ab0b39d
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,458 @@
+ GNU LESSER GENERAL PUBLIC LICENSE
+ Version 2.1, February 1999
+
+ Copyright (C) 1991, 1999 Free Software Foundation, Inc.
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+[This is the first released version of the Lesser GPL. It also counts
+ as the successor of the GNU Library Public License, version 2, hence
+ the version number 2.1.]
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+Licenses are intended to guarantee your freedom to share and change
+free software--to make sure the software is free for all its users.
+
+ This license, the Lesser General Public License, applies to some
+specially designated software packages--typically libraries--of the
+Free Software Foundation and other authors who decide to use it. You
+can use it too, but we suggest you first think carefully about whether
+this license or the ordinary General Public License is the better
+strategy to use in any particular case, based on the explanations below.
+
+ When we speak of free software, we are referring to freedom of use,
+not price. Our General Public Licenses are designed to make sure that
+you have the freedom to distribute copies of free software (and charge
+for this service if you wish); that you receive source code or can get
+it if you want it; that you can change the software and use pieces of
+it in new free programs; and that you are informed that you can do
+these things.
+
+ To protect your rights, we need to make restrictions that forbid
+distributors to deny you these rights or to ask you to surrender these
+rights. These restrictions translate to certain responsibilities for
+you if you distribute copies of the library or if you modify it.
+
+ For example, if you distribute copies of the library, whether gratis
+or for a fee, you must give the recipients all the rights that we gave
+you. You must make sure that they, too, receive or can get the source
+code. If you link other code with the library, you must provide
+complete object files to the recipients, so that they can relink them
+with the library after making changes to the library and recompiling
+it. And you must show them these terms so they know their rights.
+
+ We protect your rights with a two-step method: (1) we copyright the
+library, and (2) we offer you this license, which gives you legal
+permission to copy, distribute and/or modify the library.
+
+ To protect each distributor, we want to make it very clear that
+there is no warranty for the free library. Also, if the library is
+modified by someone else and passed on, the recipients should know
+that what they have is not the original version, so that the original
+author's reputation will not be affected by problems that might be
+introduced by others.
+
+ Finally, software patents pose a constant threat to the existence of
+any free program. We wish to make sure that a company cannot
+effectively restrict the users of a free program by obtaining a
+restrictive license from a patent holder. Therefore, we insist that
+any patent license obtained for a version of the library must be
+consistent with the full freedom of use specified in this license.
+
+ Most GNU software, including some libraries, is covered by the
+ordinary GNU General Public License. This license, the GNU Lesser
+General Public License, applies to certain designated libraries, and
+is quite different from the ordinary General Public License. We use
+this license for certain libraries in order to permit linking those
+libraries into non-free programs.
+
+ When a program is linked with a library, whether statically or using
+a shared library, the combination of the two is legally speaking a
+combined work, a derivative of the original library. The ordinary
+General Public License therefore permits such linking only if the
+entire combination fits its criteria of freedom. The Lesser General
+Public License permits more lax criteria for linking other code with
+the library.
+
+ We call this license the "Lesser" General Public License because it
+does Less to protect the user's freedom than the ordinary General
+Public License. It also provides other free software developers Less
+of an advantage over competing non-free programs. These disadvantages
+are the reason we use the ordinary General Public License for many
+libraries. However, the Lesser license provides advantages in certain
+special circumstances.
+
+ For example, on rare occasions, there may be a special need to
+encourage the widest possible use of a certain library, so that it becomes
+a de-facto standard. To achieve this, non-free programs must be
+allowed to use the library. A more frequent case is that a free
+library does the same job as widely used non-free libraries. In this
+case, there is little to gain by limiting the free library to free
+software only, so we use the Lesser General Public License.
+
+ In other cases, permission to use a particular library in non-free
+programs enables a greater number of people to use a large body of
+free software. For example, permission to use the GNU C Library in
+non-free programs enables many more people to use the whole GNU
+operating system, as well as its variant, the GNU/Linux operating
+system.
+
+ Although the Lesser General Public License is Less protective of the
+users' freedom, it does ensure that the user of a program that is
+linked with the Library has the freedom and the wherewithal to run
+that program using a modified version of the Library.
+
+ The precise terms and conditions for copying, distribution and
+modification follow. Pay close attention to the difference between a
+"work based on the library" and a "work that uses the library". The
+former contains code derived from the library, whereas the latter must
+be combined with the library in order to run.
+
+ GNU LESSER GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License Agreement applies to any software library or other
+program which contains a notice placed by the copyright holder or
+other authorized party saying it may be distributed under the terms of
+this Lesser General Public License (also called "this License").
+Each licensee is addressed as "you".
+
+ A "library" means a collection of software functions and/or data
+prepared so as to be conveniently linked with application programs
+(which use some of those functions and data) to form executables.
+
+ The "Library", below, refers to any such software library or work
+which has been distributed under these terms. A "work based on the
+Library" means either the Library or any derivative work under
+copyright law: that is to say, a work containing the Library or a
+portion of it, either verbatim or with modifications and/or translated
+straightforwardly into another language. (Hereinafter, translation is
+included without limitation in the term "modification".)
+
+ "Source code" for a work means the preferred form of the work for
+making modifications to it. For a library, complete source code means
+all the source code for all modules it contains, plus any associated
+interface definition files, plus the scripts used to control compilation
+and installation of the library.
+
+ Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running a program using the Library is not restricted, and output from
+such a program is covered only if its contents constitute a work based
+on the Library (independent of the use of the Library in a tool for
+writing it). Whether that is true depends on what the Library does
+and what the program that uses the Library does.
+
+ 1. You may copy and distribute verbatim copies of the Library's
+complete source code as you receive it, in any medium, provided that
+you conspicuously and appropriately publish on each copy an
+appropriate copyright notice and disclaimer of warranty; keep intact
+all the notices that refer to this License and to the absence of any
+warranty; and distribute a copy of this License along with the
+Library.
+
+ You may charge a fee for the physical act of transferring a copy,
+and you may at your option offer warranty protection in exchange for a
+fee.
+
+ 2. You may modify your copy or copies of the Library or any portion
+of it, thus forming a work based on the Library, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) The modified work must itself be a software library.
+
+ b) You must cause the files modified to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ c) You must cause the whole of the work to be licensed at no
+ charge to all third parties under the terms of this License.
+
+ d) If a facility in the modified Library refers to a function or a
+ table of data to be supplied by an application program that uses
+ the facility, other than as an argument passed when the facility
+ is invoked, then you must make a good faith effort to ensure that,
+ in the event an application does not supply such function or
+ table, the facility still operates, and performs whatever part of
+ its purpose remains meaningful.
+
+ (For example, a function in a library to compute square roots has
+ a purpose that is entirely well-defined independent of the
+ application. Therefore, Subsection 2d requires that any
+ application-supplied function or table used by this function must
+ be optional: if the application does not supply it, the square
+ root function must still compute square roots.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Library,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Library, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote
+it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Library.
+
+In addition, mere aggregation of another work not based on the Library
+with the Library (or with a work based on the Library) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may opt to apply the terms of the ordinary GNU General Public
+License instead of this License to a given copy of the Library. To do
+this, you must alter all the notices that refer to this License, so
+that they refer to the ordinary GNU General Public License, version 2,
+instead of to this License. (If a newer version than version 2 of the
+ordinary GNU General Public License has appeared, then you can specify
+that version instead if you wish.) Do not make any other change in
+these notices.
+
+ Once this change is made in a given copy, it is irreversible for
+that copy, so the ordinary GNU General Public License applies to all
+subsequent copies and derivative works made from that copy.
+
+ This option is useful when you wish to copy part of the code of
+the Library into a program that is not a library.
+
+ 4. You may copy and distribute the Library (or a portion or
+derivative of it, under Section 2) in object code or executable form
+under the terms of Sections 1 and 2 above provided that you accompany
+it with the complete corresponding machine-readable source code, which
+must be distributed under the terms of Sections 1 and 2 above on a
+medium customarily used for software interchange.
+
+ If distribution of object code is made by offering access to copy
+from a designated place, then offering equivalent access to copy the
+source code from the same place satisfies the requirement to
+distribute the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 5. A program that contains no derivative of any portion of the
+Library, but is designed to work with the Library by being compiled or
+linked with it, is called a "work that uses the Library". Such a
+work, in isolation, is not a derivative work of the Library, and
+therefore falls outside the scope of this License.
+
+ However, linking a "work that uses the Library" with the Library
+creates an executable that is a derivative of the Library (because it
+contains portions of the Library), rather than a "work that uses the
+library". The executable is therefore covered by this License.
+Section 6 states terms for distribution of such executables.
+
+ When a "work that uses the Library" uses material from a header file
+that is part of the Library, the object code for the work may be a
+derivative work of the Library even though the source code is not.
+Whether this is true is especially significant if the work can be
+linked without the Library, or if the work is itself a library. The
+threshold for this to be true is not precisely defined by law.
+
+ If such an object file uses only numerical parameters, data
+structure layouts and accessors, and small macros and small inline
+functions (ten lines or less in length), then the use of the object
+file is unrestricted, regardless of whether it is legally a derivative
+work. (Executables containing this object code plus portions of the
+Library will still fall under Section 6.)
+
+ Otherwise, if the work is a derivative of the Library, you may
+distribute the object code for the work under the terms of Section 6.
+Any executables containing that work also fall under Section 6,
+whether or not they are linked directly with the Library itself.
+
+ 6. As an exception to the Sections above, you may also combine or
+link a "work that uses the Library" with the Library to produce a
+work containing portions of the Library, and distribute that work
+under terms of your choice, provided that the terms permit
+modification of the work for the customer's own use and reverse
+engineering for debugging such modifications.
+
+ You must give prominent notice with each copy of the work that the
+Library is used in it and that the Library and its use are covered by
+this License. You must supply a copy of this License. If the work
+during execution displays copyright notices, you must include the
+copyright notice for the Library among them, as well as a reference
+directing the user to the copy of this License. Also, you must do one
+of these things:
+
+ a) Accompany the work with the complete corresponding
+ machine-readable source code for the Library including whatever
+ changes were used in the work (which must be distributed under
+ Sections 1 and 2 above); and, if the work is an executable linked
+ with the Library, with the complete machine-readable "work that
+ uses the Library", as object code and/or source code, so that the
+ user can modify the Library and then relink to produce a modified
+ executable containing the modified Library. (It is understood
+ that the user who changes the contents of definitions files in the
+ Library will not necessarily be able to recompile the application
+ to use the modified definitions.)
+
+ b) Use a suitable shared library mechanism for linking with the
+ Library. A suitable mechanism is one that (1) uses at run time a
+ copy of the library already present on the user's computer system,
+ rather than copying library functions into the executable, and (2)
+ will operate properly with a modified version of the library, if
+ the user installs one, as long as the modified version is
+ interface-compatible with the version that the work was made with.
+
+ c) Accompany the work with a written offer, valid for at
+ least three years, to give the same user the materials
+ specified in Subsection 6a, above, for a charge no more
+ than the cost of performing this distribution.
+
+ d) If distribution of the work is made by offering access to copy
+ from a designated place, offer equivalent access to copy the above
+ specified materials from the same place.
+
+ e) Verify that the user has already received a copy of these
+ materials or that you have already sent this user a copy.
+
+ For an executable, the required form of the "work that uses the
+Library" must include any data and utility programs needed for
+reproducing the executable from it. However, as a special exception,
+the materials to be distributed need not include anything that is
+normally distributed (in either source or binary form) with the major
+components (compiler, kernel, and so on) of the operating system on
+which the executable runs, unless that component itself accompanies
+the executable.
+
+ It may happen that this requirement contradicts the license
+restrictions of other proprietary libraries that do not normally
+accompany the operating system. Such a contradiction means you cannot
+use both them and the Library together in an executable that you
+distribute.
+
+ 7. You may place library facilities that are a work based on the
+Library side-by-side in a single library together with other library
+facilities not covered by this License, and distribute such a combined
+library, provided that the separate distribution of the work based on
+the Library and of the other library facilities is otherwise
+permitted, and provided that you do these two things:
+
+ a) Accompany the combined library with a copy of the same work
+ based on the Library, uncombined with any other library
+ facilities. This must be distributed under the terms of the
+ Sections above.
+
+ b) Give prominent notice with the combined library of the fact
+ that part of it is a work based on the Library, and explaining
+ where to find the accompanying uncombined form of the same work.
+
+ 8. You may not copy, modify, sublicense, link with, or distribute
+the Library except as expressly provided under this License. Any
+attempt otherwise to copy, modify, sublicense, link with, or
+distribute the Library is void, and will automatically terminate your
+rights under this License. However, parties who have received copies,
+or rights, from you under this License will not have their licenses
+terminated so long as such parties remain in full compliance.
+
+ 9. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Library or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Library (or any work based on the
+Library), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Library or works based on it.
+
+ 10. Each time you redistribute the Library (or any work based on the
+Library), the recipient automatically receives a license from the
+original licensor to copy, distribute, link with or modify the Library
+subject to these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties with
+this License.
+
+ 11. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Library at all. For example, if a patent
+license would not permit royalty-free redistribution of the Library by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Library.
+
+If any portion of this section is held invalid or unenforceable under any
+particular circumstance, the balance of the section is intended to apply,
+and the section as a whole is intended to apply in other circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 12. If the distribution and/or use of the Library is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Library under this License may add
+an explicit geographical distribution limitation excluding those countries,
+so that distribution is permitted only in or among countries not thus
+excluded. In such case, this License incorporates the limitation as if
+written in the body of this License.
+
+ 13. The Free Software Foundation may publish revised and/or new
+versions of the Lesser General Public License from time to time.
+Such new versions will be similar in spirit to the present version,
+but may differ in detail to address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Library
+specifies a version number of this License which applies to it and
+"any later version", you have the option of following the terms and
+conditions either of that version or of any later version published by
+the Free Software Foundation. If the Library does not specify a
+license version number, you may choose any version ever published by
+the Free Software Foundation.
+
+ 14. If you wish to incorporate parts of the Library into other free
+programs whose distribution conditions are incompatible with these,
+write to the author to ask for permission. For software which is
+copyrighted by the Free Software Foundation, write to the Free
+Software Foundation; we sometimes make exceptions for this. Our
+decision will be guided by the two goals of preserving the free status
+of all derivatives of our free software and of promoting the sharing
+and reuse of software generally.
+
+ NO WARRANTY
+
+ 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
+WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
+EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
+OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
+KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
+LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
+THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+ 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
+WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
+AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
+FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
+CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
+LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
+RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
+FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
+SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGES.
+
+ END OF TERMS AND CONDITIONS
diff --git a/README.md b/README.md
new file mode 100644
index 000000000..e3c4594ed
--- /dev/null
+++ b/README.md
@@ -0,0 +1,122 @@
+# [Arctic TimeSeries and Tick store](https://github.com/ahlmss/arctic)
+
+Arctic is a high performance datastore for numeric data. It supports [Pandas](http://pandas.pydata.org/),
+[numpy](http://www.numpy.org/) arrays and pickled objects out-of-the-box, with pluggable support for
+other data types and optional versioning.
+
+Arctic can query millions of rows per second per client, achieves ~10x compression on network bandwidth,
+~10x compression on disk, and scales to hundreds of millions of rows per second per
+[MongoDB](https://www.mongodb.org/) instance.
+
+Arctic has been under active development at [Man AHL](http://www.ahl.com/) since 2012.
+
+## Quickstart
+
+
+### Run a MongoDB
+
+```
+mongod --dbpath
+```
+
+### Using VersionStore
+
+```
+from arctic import Arctic
+
+# Connect to Local MONGODB
+store = Arctic('localhost')
+
+# Create the library - defaults to VersionStore
+store.initialize_library('NASDAQ')
+
+# Access the library
+library = store['NASDAQ']
+
+# Load some data - maybe from Quandl
+aapl = Quandl.get("NASDAQ/AAPL", authtoken="your token here")
+
+# Store the data in the library
+library.write('AAPL', aapl, metadata={'source': 'Quandl'})
+
+# Reading the data
+item = library.read('AAPL')
+aapl = item.data
+metadata = item.metadata
+```
+
+VersionStore supports much more: [See the HowTo](howtos/how_to_use_arctic.py)!
+
+
+### Adding your own storage engine
+
+Plugging a custom class in as a library type is straightforward. [This example
+shows how.](howtos/how_to_custom_arctic_library.py)
+
+
+
+## Concepts
+
+### Libraries
+
+Arctic provides namespaced *libraries* of data. These libraries allow
+bucketing data by *source*, *user* or some other metric (for example frequency:
+End-Of-Day; Minute Bars; etc.).
+
+Arctic supports multiple data libraries per user. A user (or namespace)
+maps to a MongoDB database (the granularity of mongo authentication). The library
+itself is composed of a number of collections within the database. Libraries look like:
+
+ * user.EOD
+ * user.ONEMINUTE
+
+A library is mapped to a Python class. All library databases in MongoDB are prefixed with 'arctic_'
+
+### Storage Engines
+
+Arctic includes two storage engines:
+
+ * [VersionStore](arctic/store/version_store.py): a key-value versioned TimeSeries store. It supports:
+ * Pandas data types (other Python types pickled)
+ * Multiple versions of each data item. Can easily read previous versions.
+ * Create point-in-time snapshots across symbols in a library
+ * Soft quota support
+ * Hooks for persisting other data types
+ * Audited writes: API for saving metadata and data before and after a write.
+ * a wide range of TimeSeries data frequencies: End-Of-Day to Minute bars
+ * [See the HowTo](howtos/how_to_use_arctic.py)
+ * [TickStore](arctic/tickstore/tickstore.py): Column oriented tick database. Supports
+ dynamic fields, chunks aren't versioned. Designed for large continuously ticking data.
+
+Arctic storage implementations are **pluggable**. VersionStore is the default.
+
+
+## Requirements
+
+Arctic currently works with:
+
+ * Python 2.7
+ * pymongo >= 3.0
+ * Pandas
+ * MongoDB >= 2.4.x
+
+
+## Acknowledgements
+
+Arctic has been under active development at [Man AHL](http://www.ahl.com/) since 2012.
+
+It wouldn't be possible without the work of the AHL Data Engineering Team including:
+
+ * [Richard Bounds](https://github.com/richardbounds)
+ * [James Blackburn](https://github.com/jamesblackburn)
+ * [Vlad Mereuta](https://github.com/vmereuta)
+ * Tom Taylor
+ * Tope Olukemi
+ * Drake Siard
+ * ... and many others ...
+
+Contributions welcome!
+
+## License
+
+Arctic is licensed under the GNU LGPL v2.1. A copy of which is included in [LICENSE](LICENSE)
diff --git a/arctic/__init__.py b/arctic/__init__.py
new file mode 100644
index 000000000..1eb3bb06a
--- /dev/null
+++ b/arctic/__init__.py
@@ -0,0 +1,12 @@
+""" The Arctic TimeSeries and Tick store."""
+
+from .arctic import Arctic, register_library_type
+from .arctic import VERSION_STORE, TICK_STORE
+from .store.version_store import register_versioned_storage
+from .store._pandas_ndarray_store import PandasDataFrameStore, PandasSeriesStore, PandasPanelStore
+from .store._ndarray_store import NdarrayStore
+
+register_versioned_storage(PandasDataFrameStore)
+register_versioned_storage(PandasSeriesStore)
+register_versioned_storage(PandasPanelStore)
+register_versioned_storage(NdarrayStore)
diff --git a/arctic/_compression.py b/arctic/_compression.py
new file mode 100644
index 000000000..ebd1c5266
--- /dev/null
+++ b/arctic/_compression.py
@@ -0,0 +1,78 @@
+from .logging import logger
+import _compress as clz4
+
+
+USE_LZ4HC = True # switch to use LZ4HC. Default True
+LZ4HC_N_PARALLEL = 5 # No. of elements to use parellel compression in LZ4HC mode
+LZ4_N_PARALLEL = 50 # No. of elements to use parellel compression in LZ4 mode
+
+
+def use_lz4hc(mode):
+ """
+ Set the global LZ4HC mode
+
+ Parameters
+ ----------
+ mode: `bool`
+ True: Use LZ4HC False: Use LZ4
+ """
+ global USE_LZ4HC
+ USE_LZ4HC = mode
+ logger.info("Setting compression mode to %s" % ("LZ4HC" if mode else "LZ4 (no HC)"))
+
+
+def _should_use_lz4hc():
+ return USE_LZ4HC
+
+
+def _is_interactive_mode():
+ # http://stackoverflow.com/questions/2356399/tell-if-python-is-in-interactive-mode
+ # currently unused - but could in-future flip to LZ4 if in interactive mode
+ import __main__ as main
+ return not hasattr(main, '__file__')
+
+
+def compress_array(str_list):
+ """
+ Compress an array of strings
+
+ By default LZ4 mode is standard in interactive mode,
+ and high compresion in applications/scripts
+ """
+ if _should_use_lz4hc():
+ # Less than 5 chunks its quicker to compress sequentially..
+ if len(str_list) > LZ4HC_N_PARALLEL:
+ return clz4.compressarrHC(str_list)
+ else:
+ return [clz4.compressHC(s) for s in str_list]
+ else:
+ # Less than 50 chunks its quicker to compress sequentially..
+ if len(str_list) > LZ4_N_PARALLEL:
+ return clz4.compressarr(str_list)
+ else:
+ return [clz4.compress(s) for s in str_list]
+
+
+def compress(_str):
+ """
+ Compress a string
+
+ By default LZ4 mode is standard in interactive mode,
+ and high compresion in applications/scripts
+ """
+ compressfn = clz4.compressHC if _should_use_lz4hc() else clz4.compress
+ return compressfn(_str)
+
+
+def decompress(_str):
+ """
+ Decompress a string
+ """
+ return clz4.decompress(_str)
+
+
+def decompress_array(str_list):
+ """
+ Decompress a list of strings
+ """
+ return clz4.decompressarr(str_list)
diff --git a/arctic/_util.py b/arctic/_util.py
new file mode 100644
index 000000000..2141908f8
--- /dev/null
+++ b/arctic/_util.py
@@ -0,0 +1,54 @@
+from datetime import datetime
+from pandas import DataFrame
+from pandas.util.testing import assert_frame_equal
+from pymongo.errors import OperationFailure
+import string
+
+from .logging import logger
+
+
+def indent(s, num_spaces):
+ s = string.split(s, '\n')
+ s = [(num_spaces * ' ') + line for line in s]
+ s = string.join(s, '\n')
+ return s
+
+
+def are_equals(o1, o2, **kwargs):
+ try:
+ if isinstance(o1, DataFrame):
+ assert_frame_equal(o1, o2, kwargs)
+ return True
+ return o1 == o2
+ except Exception:
+ return False
+
+
+def enable_sharding(arctic, library_name, hashed=False):
+ c = arctic._conn
+ lib = arctic[library_name]._arctic_lib
+ dbname = lib._db.name
+ library_name = lib.get_top_level_collection().name
+ try:
+ c.admin.command('enablesharding', dbname)
+ except OperationFailure, e:
+ if not 'failed: already enabled' in str(e):
+ raise
+ if not hashed:
+ logger.info("Range sharding 'symbol' on: " + dbname + '.' + library_name)
+ c.admin.command('shardCollection', dbname + '.' + library_name, key={'symbol': 1})
+ else:
+ logger.info("Hash sharding 'symbol' on: " + dbname + '.' + library_name)
+ c.admin.command('shardCollection', dbname + '.' + library_name, key={'symbol': 'hashed'})
+
+
+def enable_powerof2sizes(arctic, library_name):
+ lib = arctic[library_name]._arctic_lib
+ collection = lib.get_top_level_collection()
+ lib._db.command({"collMod" : collection.name, 'usePowerOf2Sizes': "true"})
+ logger.info("usePowerOf2Sizes enabled for %s", collection.name)
+
+ for coll in collection.database.collection_names():
+ if coll.startswith("%s." % collection.name):
+ lib._db.command({"collMod" : coll, 'usePowerOf2Sizes': "true"})
+ logger.info("usePowerOf2Sizes enabled for %s", coll)
diff --git a/arctic/arctic.py b/arctic/arctic.py
new file mode 100644
index 000000000..e5864c16b
--- /dev/null
+++ b/arctic/arctic.py
@@ -0,0 +1,444 @@
+import pymongo
+from pymongo.errors import OperationFailure, AutoReconnect
+from pymongo.read_preferences import ReadPreference
+
+from .auth import authenticate, get_auth
+from .hooks import get_mongodb_uri
+from .logging import logger
+from .decorators import mongo_retry
+from ._util import indent
+
+from .exceptions import LibraryNotFoundException, ArcticException, QuotaExceededException
+from .store import version_store
+from .tickstore import tickstore
+from .tickstore import toplevel
+
+__all__ = ['Arctic', 'VERSION_STORE', 'TICK_STORE', 'register_library_type']
+
+# Default Arctic application name: 'arctic'
+APPLICATION_NAME = 'arctic'
+VERSION_STORE = version_store.VERSION_STORE_TYPE
+TICK_STORE = tickstore.TICK_STORE_TYPE
+LIBRARY_TYPES = {version_store.VERSION_STORE_TYPE: version_store.VersionStore,
+ tickstore.TICK_STORE_TYPE: tickstore.TickStore,
+ toplevel.TICK_STORE_TYPE: toplevel.TopLevelTickStore
+ }
+
+
+def register_library_type(name, type_):
+ """
+ Register a Arctic Library Type handler
+ """
+ if name in LIBRARY_TYPES:
+ raise ArcticException("Library %s already registered as %s" % (name, LIBRARY_TYPES[name]))
+ LIBRARY_TYPES[name] = type_
+
+
+class Arctic(object):
+ """
+ The Arctic class is a top-level God object, owner of all arctic_ databases
+ accessible in Mongo.
+ Each database contains one or more ArcticLibrarys which may have implementation
+ specific functionality.
+
+ Current Mongo Library types:
+ - arctic.VERSION_STORE - Versioned store for chunked Pandas and numpy objects
+ (other Python types are pickled)
+ - arctic.TICK_STORE - Tick specific library. Supports 'snapshots', efficiently
+ stores updates, not versioned.
+
+ Arctic and ArcticLibrary are responsible for Connection setup, authentication,
+ dispatch to the appropriate library implementation, and quotas.
+ """
+ DB_PREFIX = 'arctic'
+ METADATA_COLL = "ARCTIC"
+ METADATA_DOC_ID = "ARCTIC_META"
+
+ _MAX_CONNS = 4
+ __conn = None
+
+ def __init__(self, mongo_host, app_name=APPLICATION_NAME, allow_secondary=False,
+ socketTimeoutMS=10 * 60 * 1000, connectTimeoutMS=2 * 1000,
+ serverSelectionTimeoutMS=30 * 1000):
+ """
+ Constructs a Arctic Datastore.
+
+ Parameters:
+ -----------
+ mongo_host: A MongoDB hostname, alias or Mongo Connection
+
+ app_name: `str` is the name of application used for resolving credentials when
+ authenticating against the mongo_host.
+ We will fetch credentials using the authentication hook.
+ Teams should override this such that different applications don't accidentally
+ run with privileges to other applications' databases
+
+ allow_secondary: `bool` indicates if we allow reads against
+ secondary members in the cluster. These reads may be
+ a few seconds behind (but are usually split-second up-to-date).
+
+ serverSelectionTimeoutMS: `int` the main tunable used for configuring how long
+ the pymongo driver will spend on MongoDB cluster discovery. This parameter
+ takes precedence over connectTimeoutMS: https://jira.mongodb.org/browse/DRIVERS-222
+
+ """
+ self._application_name = app_name
+ self._library_cache = {}
+ self._allow_secondary = allow_secondary
+ self._socket_timeout = socketTimeoutMS
+ self._connect_timeout = connectTimeoutMS
+ self._server_selection_timeout = serverSelectionTimeoutMS
+
+ if isinstance(mongo_host, basestring):
+ self.mongo_host = mongo_host
+ else:
+ self.__conn = mongo_host
+ # Workaround for: https://jira.mongodb.org/browse/PYTHON-927
+ mongo_host.server_info()
+ self.mongo_host = ",".join(["{}:{}".format(x[0], x[1]) for x in mongo_host.nodes])
+ self._adminDB = self._conn.admin
+
+ @property
+ def _conn(self):
+ if self.__conn is None:
+ host = get_mongodb_uri(self.mongo_host)
+ logger.info("Connecting to mongo: {0} ({1})".format(self.mongo_host, host))
+ self.__conn = mongo_retry(pymongo.MongoClient)(host=host,
+ maxPoolSize=self._MAX_CONNS,
+ socketTimeoutMS=self._socket_timeout,
+ connectTimeoutMS=self._connect_timeout,
+ serverSelectionTimeoutMS=self._server_selection_timeout)
+ self._adminDB = self.__conn.admin
+
+ # Authenticate against admin for the user
+ auth = get_auth(self.mongo_host, self._application_name, 'admin')
+ if auth:
+ authenticate(self._adminDB, auth.user, auth.password)
+
+ # Accessing _conn is synchronous. The new PyMongo driver may be lazier than the previous.
+ # Force a connection.
+ self.__conn.server_info()
+
+ return self.__conn
+
+ def __str__(self):
+ return "" % (hex(id(self)), str(self._conn))
+
+ def __repr__(self):
+ return str(self)
+
+ def __getstate__(self):
+ return {'mongo_host': self.mongo_host, 'allow_secondary': self._allow_secondary}
+
+ def __setstate__(self, state):
+ return Arctic.__init__(self, **state)
+
+ @mongo_retry
+ def list_libraries(self):
+ """
+ Returns
+ -------
+ list of Arctic library names
+ """
+ libs = []
+ for db in self._conn.database_names():
+ if db.startswith(self.DB_PREFIX + '_'):
+ for coll in self._conn[db].collection_names():
+ if coll.endswith(self.METADATA_COLL):
+ libs.append(db[len(self.DB_PREFIX) + 1:] + "." + coll[:-1 * len(self.METADATA_COLL) - 1])
+ elif db == self.DB_PREFIX:
+ for coll in self._conn[db].collection_names():
+ if coll.endswith(self.METADATA_COLL):
+ libs.append(coll[:-1 * len(self.METADATA_COLL) - 1])
+ return libs
+
+ @mongo_retry
+ def initialize_library(self, library, lib_type=VERSION_STORE, **kwargs):
+ """
+ Create an Arctic Library or a particular type.
+
+ Parameters
+ ----------
+ library : `str`
+ The name of the library. e.g. 'library' or 'user.library'
+
+ lib_type : `str`
+ The type of the library. e.g. arctic.VERSION_STORE or arctic.TICK_STORE
+ Or any type registered with register_library_type
+ Default: arctic.VERSION_STORE
+
+ kwargs :
+ Arguments passed to the Library type for initialization.
+ """
+ l = ArcticLibraryBinding(self, library)
+ # Check that we don't create too many namespaces
+ if len(self._conn[l.database_name].collection_names()) > 3000:
+ raise ArcticException("Too many namespaces %s, not creating: %s" %
+ (len(self._conn[l.database_name].collection_names()), library))
+ l.set_library_type(lib_type)
+ LIBRARY_TYPES[lib_type].initialize_library(l, **kwargs)
+ # Add a 10G quota just in case the user is calling this with API.
+ if not l.get_quota():
+ l.set_quota(10 * 1024 * 1024 * 1024)
+
+ @mongo_retry
+ def delete_library(self, library):
+ """
+ Delete an Arctic Library, and all associated collections in the MongoDB.
+
+ Parameters
+ ----------
+ library : `str`
+ The name of the library. e.g. 'library' or 'user.library'
+ """
+ l = ArcticLibraryBinding(self, library)
+ colname = l.get_top_level_collection().name
+ logger.info('Dropping collection: %s' % colname)
+ l._db.drop_collection(colname)
+ for coll in l._db.collection_names():
+ if coll.startswith(colname + '.'):
+ logger.info('Dropping collection: %s' % coll)
+ l._db.drop_collection(coll)
+ if library in self._library_cache:
+ del self._library_cache[library]
+ del self._library_cache[l.get_name()]
+
+ def get_library(self, library):
+ """
+ Return the library instance. Can generally use slicing to return the library:
+ arctic_store[library]
+
+ Parameters
+ ----------
+ library : `str`
+ The name of the library. e.g. 'library' or 'user.library'
+ """
+ if library in self._library_cache:
+ return self._library_cache[library]
+
+ try:
+ error = None
+ l = ArcticLibraryBinding(self, library)
+ lib_type = l.get_library_type()
+ except (OperationFailure, AutoReconnect), e:
+ error = e
+
+ if error or not lib_type:
+ raise LibraryNotFoundException("Library %s was not correctly initialized in %s.\nReason: %s" % (library, self, error))
+ elif lib_type not in LIBRARY_TYPES:
+ raise LibraryNotFoundException("Couldn't load LibraryType '%s' for '%s' (has the class been registered?)" %
+ (lib_type, library))
+ instance = LIBRARY_TYPES[lib_type](l)
+ self._library_cache[library] = instance
+ # The library official name may be different from 'library': e.g. 'library' vs 'user.library'
+ self._library_cache[l.get_name()] = instance
+ return self._library_cache[library]
+
+ def __getitem__(self, key):
+ if isinstance(key, basestring):
+ return self.get_library(key)
+ else:
+ raise ArcticException("Unrecognised library specification - use [libraryName]")
+
+ def set_quota(self, library, quota):
+ """
+ Set a quota (in bytes) on this user library. The quota is 'best effort',
+ and should be set conservatively.
+
+ Parameters
+ ----------
+ library : `str`
+ The name of the library. e.g. 'library' or 'user.library'
+
+ quota : `int`
+ Advisory quota for the library - in bytes
+ """
+ l = ArcticLibraryBinding(self, library)
+ l.set_quota(quota)
+
+ def get_quota(self, library):
+ """
+ Return the quota currently set on the library.
+
+ Parameters
+ ----------
+ library : `str`
+ The name of the library. e.g. 'library' or 'user.library'
+ """
+ l = ArcticLibraryBinding(self, library)
+ return l.get_quota()
+
+ def check_quota(self, library):
+ """
+ Check the quota on the library, as would be done during normal writes.
+
+ Parameters
+ ----------
+ library : `str`
+ The name of the library. e.g. 'library' or 'user.library'
+
+ Raises
+ ------
+ arctic.exceptions.QuotaExceededException if the quota has been exceeded
+ """
+ l = ArcticLibraryBinding(self, library)
+ l.check_quota()
+
+
+class ArcticLibraryBinding(object):
+ """
+ The ArcticLibraryBinding type holds the binding between the library name and the
+ concrete implementation of the library.
+
+ Also provides access to additional metadata about the library
+ - Access to the library's top-level collection
+ - Enforces quota on the library
+ - Access to custom metadata about the library
+ """
+ DB_PREFIX = Arctic.DB_PREFIX
+ TYPE_FIELD = "TYPE"
+ QUOTA = 'QUOTA'
+
+ quota = None
+ quota_countdown = 0
+
+ @classmethod
+ def _parse_db_lib(clz, library):
+ """
+ Returns the canonical (database_name, library) for the passed in
+ string 'library'.
+ """
+ database_name = library.split('.', 2)
+ if len(database_name) == 2:
+ library = database_name[1]
+ if database_name[0].startswith(clz.DB_PREFIX):
+ database_name = database_name[0]
+ else:
+ database_name = clz.DB_PREFIX + '_' + database_name[0]
+ else:
+ database_name = clz.DB_PREFIX
+ return database_name, library
+
+ def __init__(self, arctic, library):
+ self.arctic = arctic
+ database_name, library = self._parse_db_lib(library)
+ self.library = library
+ self.database_name = database_name
+ self._db = self.arctic._conn[database_name]
+ self._auth(self._db)
+ self._library_coll = self._db[library]
+
+ def __str__(self):
+ return """
+%s""" % (hex(id(self)), self._db.name, self._library_coll.name, indent(str(self.arctic), 4))
+
+ def __repr__(self):
+ return str(self)
+
+ def __getstate__(self):
+ return {'arctic': self.arctic, 'library': '.'.join([self.database_name, self.library])}
+
+ def __setstate__(self, state):
+ return ArcticLibraryBinding.__init__(self, state['arctic'], state['library'])
+
+ @mongo_retry
+ def _auth(self, database):
+ #Get .mongopass details here
+ if not hasattr(self.arctic, 'mongo_host'):
+ return
+
+ auth = get_auth(self.arctic.mongo_host, self.arctic._application_name, database.name)
+ if auth:
+ authenticate(self._db, auth.user, auth.password)
+ self.arctic._conn.close()
+
+ def get_name(self):
+ return self._db.name + '.' + self._library_coll.name
+
+ def get_top_level_collection(self):
+ return self._library_coll
+
+ def set_quota(self, quota_bytes):
+ """
+ Set a quota (in bytes) on this user library. The quota is 'best effort',
+ and should be set conservatively.
+
+ A quota of 0 is 'unlimited'
+ """
+ self.set_library_metadata(ArcticLibraryBinding.QUOTA, quota_bytes)
+ self.quota = quota_bytes
+ self.quota_countdown = 0
+
+ def get_quota(self):
+ """
+ Get the current quota on this user library.
+ """
+ return self.get_library_metadata(ArcticLibraryBinding.QUOTA)
+
+ def check_quota(self):
+ """
+ Check whether the user is within quota. Should be called before
+ every write. Will raise() if the library has exceeded its allotted
+ quota.
+ """
+ # Don't check on every write
+ if self.quota is None:
+ self.quota = self.get_library_metadata(ArcticLibraryBinding.QUOTA)
+ if self.quota is None:
+ self.quota = 0
+
+ if self.quota == 0:
+ return
+
+ # Don't check on every write, that would be slow
+ if self.quota_countdown > 0:
+ self.quota_countdown -= 1
+ return
+
+ # Figure out whether the user has exceeded their quota
+ library = self.arctic[self.get_name()]
+ stats = library.stats()
+
+ def to_gigabytes(bytes):
+ return bytes / 1024. / 1024. / 1024.
+
+ # Have we exceeded our quota?
+ size = stats['totals']['size']
+ count = stats['totals']['count']
+ if size >= self.quota:
+ raise QuotaExceededException("Quota Exceeded: %.3f / %.0f GB used" %
+ (to_gigabytes(size),
+ to_gigabytes(self.quota)))
+
+ # Quota not exceeded, print an informational message and return
+ avg_size = size / count if count > 1 else 100 * 1024
+ remaining = self.quota - size
+ remaining_count = remaining / avg_size
+ if remaining_count < 100:
+ logger.warn("Mongo Quota: %.3f / %.0f GB used" % (to_gigabytes(size),
+ to_gigabytes(self.quota)))
+ else:
+ logger.info("Mongo Quota: %.3f / %.0f GB used" % (to_gigabytes(size),
+ to_gigabytes(self.quota)))
+
+ # Set-up a timer to prevent us for checking for a few writes.
+ self.quota_countdown = max(remaining_count / 2, 1)
+
+ def get_library_type(self):
+ return self.get_library_metadata(ArcticLibraryBinding.TYPE_FIELD)
+
+ def set_library_type(self, lib_type):
+ self.set_library_metadata(ArcticLibraryBinding.TYPE_FIELD, lib_type)
+
+ @mongo_retry
+ def get_library_metadata(self, field):
+ lib_metadata = self._library_coll[self.arctic.METADATA_COLL].find_one({"_id": self.arctic.METADATA_DOC_ID})
+ if lib_metadata is not None:
+ return lib_metadata.get(field)
+ else:
+ return None
+
+ @mongo_retry
+ def set_library_metadata(self, field, value):
+ self._library_coll[self.arctic.METADATA_COLL].update_one({'_id': self.arctic.METADATA_DOC_ID},
+ {'$set': {field: value}}, upsert=True)
diff --git a/arctic/auth.py b/arctic/auth.py
new file mode 100644
index 000000000..ec0786871
--- /dev/null
+++ b/arctic/auth.py
@@ -0,0 +1,28 @@
+from collections import namedtuple
+
+from .logging import logger
+
+
+def authenticate(db, user, password):
+ """
+ Return True / False on authentication success.
+
+ PyMongo 2.6 changed the auth API to raise on Auth failure.
+ """
+ from pymongo.errors import PyMongoError
+ try:
+ logger.debug("Authenticating {} with {}".format(db, user))
+ return db.authenticate(user, password)
+ except PyMongoError, e:
+ logger.debug("Auth Error %s" % e)
+ return False
+
+
+Credential = namedtuple("MongoCredentials", ['database', 'user', 'password'])
+
+
+def get_auth(host, app_name, database_name):
+ """
+ Authentication hook to allow plugging in custom authentication credential providers
+ """
+ return None
diff --git a/arctic/date/__init__.py b/arctic/date/__init__.py
new file mode 100644
index 000000000..709df92b7
--- /dev/null
+++ b/arctic/date/__init__.py
@@ -0,0 +1,5 @@
+from ._daterange import DateRange
+from ._generalslice import OPEN_CLOSED, CLOSED_OPEN, OPEN_OPEN, CLOSED_CLOSED
+from ._util import datetime_to_ms, ms_to_datetime
+from ._util import string_to_daterange, to_pandas_closed_closed
+from ._mktz import mktz, TimezoneError
diff --git a/arctic/date/_daterange.py b/arctic/date/_daterange.py
new file mode 100644
index 000000000..df494f34d
--- /dev/null
+++ b/arctic/date/_daterange.py
@@ -0,0 +1,194 @@
+import datetime
+from datetime import timedelta
+from dateutil.tz import tzlocal
+
+from ..logging import logger
+from ._generalslice import OPEN_OPEN, CLOSED_CLOSED, OPEN_CLOSED, CLOSED_OPEN, GeneralSlice
+from ._parse import parse
+
+
+INTERVAL_LOOKUP = {(True, True): OPEN_OPEN,
+ (False, False): CLOSED_CLOSED,
+ (True, False): OPEN_CLOSED,
+ (False, True): CLOSED_OPEN
+ }
+
+
+class DateRange(GeneralSlice):
+ """
+ Represents a bounded datetime range.
+
+ Ranges may be bounded on either end if a date is
+ specified for the start or end of the range, or unbounded
+ if None is specified for either value. Unbounded ranges will allow
+ all available data to pass through when used as a filter argument
+ on function or method.
+
+ ===== ==== ============================ ===============================
+ start end interval Meaning
+ ----- ---- ---------------------------- -------------------------------
+ None None any date
+ a None CLOSED_CLOSED or CLOSED_OPEN date >= a
+ a None OPEN_CLOSED or OPEN_OPEN date > a
+ None b CLOSED_CLOSED or OPEN_CLOSED date <= b
+ None b CLOSED_OPEN or OPEN_OPEN date < b
+ a b CLOSED_CLOSED date >= a and date <= b
+ a b OPEN_CLOSED date > a and date <= b
+ a b CLOSED_OPEN date >= a and date < b
+ a b OPEN_OPEN date > a and date < b
+ ===== ==== ============================ ===============================
+
+ Parameters
+ ----------
+ start : `int`, `str` or `datetime.datetime`
+ lower bound date value as an integer, string or datetime object.
+
+ end : `int`, `str` or `datetime.datetime`
+ upper bound date value as an integer, string or datetime object.
+
+ interval : `int`
+ CLOSED_CLOSED, OPEN_CLOSED, CLOSED_OPEN or OPEN_OPEN.
+ **Default is CLOSED_CLOSED**.
+ """
+ def __init__(self, start=None, end=None, interval=CLOSED_CLOSED):
+
+ def _is_dt_type(x):
+ return isinstance(x, (datetime.datetime, datetime.date))
+
+ def _compute_bound(value, desc):
+ if isinstance(value, (int, str)):
+ return parse(str(value))
+ elif _is_dt_type(value):
+ return value
+ elif value is None:
+ return None
+ else:
+ raise TypeError('unsupported type for %s: %s' % (desc, type(value)))
+
+ super(DateRange, self).__init__(_compute_bound(start, "start"), _compute_bound(end, "end"), 1, interval)
+
+ if _is_dt_type(self.start) and _is_dt_type(self.end):
+ if self.start > self.end:
+ raise ValueError('start date (%s) cannot be greater than end date (%s)!'
+ % (self.start, self.end))
+
+ @property
+ def unbounded(self):
+ """True if range is unbounded on either or both ends, False otherwise."""
+ return self.start is None or self.end is None
+
+ def intersection(self, other):
+ """
+ Create a new DateRange representing the maximal range enclosed by this range and other
+ """
+ startopen = other.startopen if self.start is None \
+ else self.startopen if other.start is None \
+ else other.startopen if self.start < other.start \
+ else self.startopen if self.start > other.start \
+ else (self.startopen and other.startopen)
+ endopen = other.endopen if self.end is None \
+ else self.endopen if other.end is None \
+ else other.endopen if self.end > other.end \
+ else self.endopen if self.end < other.end \
+ else (self.endopen and other.endopen)
+
+ new_start = self.start if other.start is None \
+ else other.start if self.start is None \
+ else max(self.start, other.start)
+ new_end = self.end if other.end is None \
+ else other.end if self.end is None \
+ else min(self.end, other.end)
+
+ interval = INTERVAL_LOOKUP[(startopen, endopen)]
+
+ return DateRange(new_start, new_end, interval)
+
+ def as_dates(self):
+ """
+ Create a new DateRange with the datetimes converted to dates and changing to CLOSED/CLOSED.
+ """
+ new_start = self.start.date() if self.start and isinstance(self.start, datetime.datetime) else self.start
+ new_end = self.end.date() if self.end and isinstance(self.end, datetime.datetime) else self.end
+ return DateRange(new_start, new_end, CLOSED_CLOSED)
+
+ def mongo_query(self):
+ """
+ Convert a DateRange into a MongoDb query string. FIXME: Mongo can only handle
+ datetimes in queries, so we should make this handle the case where start/end are
+ datetime.date and extend accordingly (being careful about the interval logic).
+ """
+ comps = {OPEN_CLOSED: ('t', 'te'), OPEN_OPEN: ('t', 't'),
+ CLOSED_OPEN: ('te', 't'), CLOSED_CLOSED: ('te', 'te')}
+ query = {}
+ comp = comps[self.interval]
+ if self.start:
+ query['$g' + comp[0]] = self.start
+ if self.end:
+ query['$l' + comp[1]] = self.end
+ return query
+
+ def get_date_bounds(self):
+ """
+ Return the upper and lower bounds along
+ with operators that are needed to do an 'in range' test.
+ Useful for SQL commands.
+
+ Returns
+ -------
+ tuple: (`str`, `date`, `str`, `date`)
+ (date_gt, start, date_lt, end)
+ e.g.:
+ ('>=', start_date, '<', end_date)
+ """
+ start = end = None
+ date_gt = '>='
+ date_lt = '<='
+ if self:
+ if self.start:
+ start = self.start
+ if self.end:
+ end = self.end
+ if self.startopen:
+ date_gt = '>'
+ if self.endopen:
+ date_lt = '<'
+
+ return date_gt, start, date_lt, end
+
+ def __contains__(self, d):
+ if self.interval == CLOSED_CLOSED:
+ return (self.start is None or d >= self.start) and (self.end is None or d <= self.end)
+ elif self.interval == CLOSED_OPEN:
+ return (self.start is None or d >= self.start) and (self.end is None or d < self.end)
+ elif self.interval == OPEN_CLOSED:
+ return (self.start is None or d > self.start) and (self.end is None or d <= self.end)
+
+ return (self.start is None or d > self.start) and (self.end is None or d < self.end)
+
+ def __repr__(self):
+ return 'DateRange(start=%r, end=%r)' % (self.start, self.end)
+
+ def __eq__(self, rhs):
+ if rhs is None or not (hasattr(rhs, "end") and hasattr(rhs, "start")):
+ return False
+ return self.end == rhs.end and self.start == rhs.start
+
+ def __hash__(self):
+ return hash((self.start, self.end, self.step, self.interval))
+
+ def __getitem__(self, key):
+ if key == 0:
+ return self.start
+ elif key == 1:
+ return self.end
+ else:
+ raise IndexError('Index %s not in range (0:1)' % key)
+
+ __str__ = __repr__
+
+ def __setstate__(self, state):
+ """Called by pickle, PyYAML etc to set state."""
+ self.start = state['start']
+ self.end = state['end']
+ self.interval = state.get('interval') or CLOSED_CLOSED
+ self.step = 1
diff --git a/arctic/date/_generalslice.py b/arctic/date/_generalslice.py
new file mode 100644
index 000000000..3fe1f99cc
--- /dev/null
+++ b/arctic/date/_generalslice.py
@@ -0,0 +1,43 @@
+from enum import Enum
+
+
+class Intervals(Enum):
+ (OPEN_CLOSED, CLOSED_OPEN, OPEN_OPEN, CLOSED_CLOSED) = range(1101, 1105)
+(OPEN_CLOSED, CLOSED_OPEN, OPEN_OPEN, CLOSED_CLOSED) = INTERVALS = Intervals.__members__.values()
+
+
+class GeneralSlice(object):
+ """General slice object, supporting open/closed ranges:
+
+ ===== ==== ============================ ===============================
+ start end interval Meaning
+ ----- ---- ---------------------------- -------------------------------
+ None None any item
+ a None CLOSED_CLOSED or CLOSED_OPEN item >= a
+ a None OPEN_CLOSED or OPEN_OPEN item > a
+ None b CLOSED_CLOSED or OPEN_CLOSED item <= b
+ None b CLOSED_OPEN or OPEN_OPEN item < b
+ a b CLOSED_CLOSED item >= a and item <= b
+ a b OPEN_CLOSED item > a and item <= b
+ a b CLOSED_OPEN item >= a and item < b
+ a b OPEN_OPEN item > a and item < b
+ ===== ==== ============================ ===============================
+ """
+
+ def __init__(self, start, end, step=None, interval=CLOSED_CLOSED):
+ self.start = start
+ self.end = end
+ self.step = step
+ self.interval = interval
+
+ @property
+ def startopen(self):
+ """True if the start of the range is open (item > start),
+ False if the start of the range is closed (item >= start)."""
+ return self.interval in (OPEN_CLOSED, OPEN_OPEN)
+
+ @property
+ def endopen(self):
+ """True if the end of the range is open (item < end),
+ False if the end of the range is closed (item <= end)."""
+ return self.interval in (CLOSED_OPEN, OPEN_OPEN)
diff --git a/arctic/date/_mktz.py b/arctic/date/_mktz.py
new file mode 100644
index 000000000..284f8905f
--- /dev/null
+++ b/arctic/date/_mktz.py
@@ -0,0 +1,76 @@
+import bisect
+import os
+import dateutil
+from decorator import decorator
+import time
+import tzlocal
+
+DEFAULT_TIME_ZONE_NAME = tzlocal.get_localzone().zone # 'Europe/London'
+TIME_ZONE_DATA_SOURCE = '/usr/share/zoneinfo/'
+
+
+class TimezoneError(Exception):
+ pass
+
+
+class tzfile(dateutil.tz.tzfile):
+
+ def _find_ttinfo(self, dtm, laststd=0):
+ """Faster version of parent class's _find_ttinfo() as this uses bisect rather than a linear search."""
+ if dtm is None:
+ # This will happen, for example, when a datetime.time object gets utcoffset() called.
+ raise ValueError('tzinfo object can not calculate offset for date %s' % dtm)
+ ts = ((dtm.toordinal() - dateutil.tz.EPOCHORDINAL) * 86400
+ + dtm.hour * 3600
+ + dtm.minute * 60
+ + dtm.second)
+ idx = bisect.bisect_right(self._trans_list, ts)
+ if len(self._trans_list) == 0 or idx == len(self._trans_list):
+ return self._ttinfo_std
+ if idx == 0:
+ return self._ttinfo_before
+ if laststd:
+ while idx > 0:
+ tti = self._trans_idx[idx - 1]
+ if not tti.isdst:
+ return tti
+ idx -= 1
+ else:
+ return self._ttinfo_std
+ else:
+ return self._trans_idx[idx - 1]
+
+
+def mktz(zone=None):
+ """
+ Return a new timezone based on the zone using the python-dateutil
+ package. This convenience method is useful for resolving the timezone
+ names as dateutil.tz.tzfile requires the full path.
+
+ The concise name 'mktz' is for convenient when using it on the
+ console.
+
+ Parameters
+ ----------
+ zone : `String`
+ The zone for the timezone. This defaults to 'local'.
+
+ Returns
+ -------
+ An instance of a timezone which implements the tzinfo interface.
+
+ Raises
+ - - - - - -
+ TimezoneError : Raised if a user inputs a bad timezone name.
+ """
+
+ if zone is None:
+ zone = DEFAULT_TIME_ZONE_NAME
+ _path = os.path.join(TIME_ZONE_DATA_SOURCE, zone)
+ try:
+ tz = tzfile(_path)
+ except (ValueError, IOError) as err:
+ raise TimezoneError('Timezone "%s" can not be read, error: "%s"' % (zone, err))
+ # Stash the zone name as an attribute (as pytz does)
+ tz.zone = zone if not zone.startswith(TIME_ZONE_DATA_SOURCE) else zone[len(TIME_ZONE_DATA_SOURCE):]
+ return tz
diff --git a/arctic/date/_parse.py b/arctic/date/_parse.py
new file mode 100644
index 000000000..598a09b39
--- /dev/null
+++ b/arctic/date/_parse.py
@@ -0,0 +1,10 @@
+from dateutil.parser import parse as _parse
+
+
+def parse(string, agnostic=False, **kwargs):
+ parsed = _parse(string, **kwargs)
+ if agnostic or (parsed == _parse(string, yearfirst=True, **kwargs)
+ == _parse(string, dayfirst=True, **kwargs)):
+ return parsed
+ else:
+ raise ValueError("The date was ambiguous: %s" % string)
diff --git a/arctic/date/_util.py b/arctic/date/_util.py
new file mode 100644
index 000000000..b2f25ea7d
--- /dev/null
+++ b/arctic/date/_util.py
@@ -0,0 +1,123 @@
+import calendar
+import datetime
+from datetime import timedelta
+
+from ..logging import logger
+from ._daterange import DateRange
+from ._generalslice import OPEN_OPEN, CLOSED_CLOSED, OPEN_CLOSED, CLOSED_OPEN, GeneralSlice
+from ._parse import parse
+from ._mktz import mktz
+
+
+# Support standard brackets syntax for open/closed ranges.
+Ranges = {'()': OPEN_OPEN,
+ '(]': OPEN_CLOSED,
+ '[)': CLOSED_OPEN,
+ '[]': CLOSED_CLOSED}
+
+
+def string_to_daterange(str_range, delimiter='-', as_dates=False, interval=CLOSED_CLOSED):
+ """
+ Convert a string to a DateRange type. If you put only one date, it generates the
+ relevant range for just that date or datetime till 24 hours later. You can optionally
+ use mixtures of []/() around the DateRange for OPEN/CLOSED interval behaviour.
+
+ Parameters
+ ----------
+ str_range : `String`
+ The range as a string of dates separated by one delimiter.
+
+ delimiter : `String`
+ The separator between the dates, using '-' as default.
+
+ as_dates : `Boolean`
+ True if you want the date-range to use datetime.date rather than datetime.datetime.
+
+ interval : `int`
+ CLOSED_CLOSED, OPEN_CLOSED, CLOSED_OPEN or OPEN_OPEN.
+ **Default is CLOSED_CLOSED**.
+
+ Returns
+ -------
+ `arctic.date.DateRange` : the DateRange parsed from the string.
+
+ Examples
+ --------
+ >>> from arctic.date import string_to_daterange
+ >>> string_to_daterange('20111020', as_dates=True)
+ DateRange(start=datetime.date(2011, 10, 20), end=datetime.date(2011, 10, 21))
+
+ >>> string_to_daterange('201110201030')
+ DateRange(start=datetime.datetime(2011, 10, 20, 10, 30), end=datetime.datetime(2011, 10, 21, 10, 30))
+
+ >>> string_to_daterange('20111020-20120120', as_dates=True)
+ DateRange(start=datetime.date(2011, 10, 20), end=datetime.date(2012, 1, 20))
+
+ >>> string_to_daterange('[20111020-20120120)', as_dates=True)
+ DateRange(start=datetime.date(2011, 10, 20), end=datetime.date(2012, 1, 20))
+ """
+ num_dates = str_range.count(delimiter) + 1
+ if num_dates > 2:
+ raise ValueError('Too many dates in input string [%s] with delimiter (%s)' % (str_range, delimiter))
+
+ # Allow the user to use the [date-date), etc. range syntax to specify the interval.
+ range_mode = Ranges.get(str_range[0] + str_range[-1], None)
+ if range_mode:
+ return string_to_daterange(str_range[1:-1], delimiter, as_dates, interval=range_mode)
+
+ if as_dates:
+ parse_dt = lambda s: parse(s).date() if s else None
+ else:
+ parse_dt = lambda s: parse(s) if s else None
+ if num_dates == 2:
+ d = [parse_dt(x) for x in str_range.split(delimiter)]
+ oc = interval
+ else:
+ start = parse_dt(str_range)
+ d = [start, start + datetime.timedelta(1)]
+ oc = CLOSED_OPEN # Always use closed-open for a single date/datetime.
+ return DateRange(d[0], d[1], oc)
+
+
+def to_pandas_closed_closed(date_range):
+ """
+ Pandas DateRange slicing is CLOSED-CLOSED inclusive at both ends.
+
+ Returns a date_range with start-end suitable for slicing in pandas.
+ """
+ if not date_range:
+ return None
+ start = date_range.start
+ end = date_range.end
+ if start:
+ if date_range.startopen:
+ start += timedelta(milliseconds=1)
+ if end:
+ if date_range.endopen:
+ end -= timedelta(milliseconds=1)
+ return DateRange(start, end)
+
+
+def ms_to_datetime(ms, tzinfo=None):
+ """Convert a millisecond time value to an offset-aware Python datetime object."""
+ if not isinstance(ms, (int, long)):
+ raise TypeError('expected integer, not %s' % type(ms))
+
+ if tzinfo in (None, mktz()):
+ return datetime.datetime.fromtimestamp(ms * 1e-3, mktz()).replace(tzinfo=None)
+
+ return datetime.datetime.fromtimestamp(ms * 1e-3, tzinfo)
+
+
+def _add_tzone(dtm):
+ if dtm.tzinfo is None:
+ dtm = dtm.replace(tzinfo=mktz())
+ return dtm
+
+
+def datetime_to_ms(d):
+ """Convert a Python datetime object to a millisecond epoch (UTC) time value."""
+ try:
+ return long((calendar.timegm(_add_tzone(d).utctimetuple()) + d.microsecond / 1000000.0) * 1e3)
+ except AttributeError:
+ raise TypeError('expect Python datetime object, not %s' % type(d))
diff --git a/arctic/decorators.py b/arctic/decorators.py
new file mode 100644
index 000000000..635d0e2e8
--- /dev/null
+++ b/arctic/decorators.py
@@ -0,0 +1,87 @@
+from datetime import datetime
+from functools import wraps
+import os
+from pymongo.errors import AutoReconnect, OperationFailure, DuplicateKeyError, ServerSelectionTimeoutError
+import sys
+from time import sleep
+
+from .logging import logger
+from .hooks import _log_exception_hook as _log_exception
+
+_MAX_RETRIES = 15
+
+
+def _get_host(store):
+ ret = {}
+ if store:
+ try:
+ if isinstance(store, (list, tuple)):
+ store = store[0]
+ ret['l'] = store._arctic_lib.get_name()
+ ret['mnodes'] = ["{}:{}".format(h, p) for h, p in store._collection.database.client.nodes]
+ ret['mhost'] = "{}".format(store._arctic_lib.arctic.mongo_host)
+ except Exception:
+ # Sometimes get_name(), for example, fails if we're not connected to MongoDB.
+ pass
+ return ret
+
+_in_retry = False
+_retry_count = 0
+
+
+def mongo_retry(f):
+ """
+ Catch-all decorator that handles AutoReconnect and OperationFailure
+ errors from PyMongo
+ """
+ log_all_exceptions = 'arctic' in f.__module__ if f.__module__ else False
+
+ @wraps(f)
+ def f_retry(*args, **kwargs):
+ global _retry_count, _in_retry
+ top_level = not _in_retry
+ _in_retry = True
+ try:
+ while True:
+ try:
+ return f(*args, **kwargs)
+ except (DuplicateKeyError, ServerSelectionTimeoutError) as e:
+ # Re-raise errors that won't go away.
+ _handle_error(f, e, _retry_count, **_get_host(args))
+ raise
+ except (OperationFailure, AutoReconnect) as e:
+ _retry_count += 1
+ _handle_error(f, e, _retry_count, **_get_host(args))
+ except Exception as e:
+ if log_all_exceptions:
+ _log_exception(f.__name__, e, _retry_count, **_get_host(args))
+ raise
+ finally:
+ if top_level:
+ _in_retry = False
+ _retry_count = 0
+ return f_retry
+
+
+def dump_bad_documents(*document):
+ """
+ Dump bad documents to disk
+ """
+ id = str(document[0]['_id'])
+ with open('/tmp/mongo_debug_' + str(os.getpid()) + '_' + id + '_' + str(datetime.now()), 'a') as f:
+ for d in document:
+ f.write(str(d) + '\n')
+
+
+def _handle_error(f, e, retry_count, **kwargs):
+ if retry_count > _MAX_RETRIES:
+ logger.error('Too many retries %s [%s], raising' % (f.__name__, e))
+ e.traceback = sys.exc_info()[2]
+ raise
+ log_fn = logger.warn if retry_count > 2 else logger.debug
+ log_fn('%s %s [%s], retrying %i' % (type(e), f.__name__, e, retry_count))
+ # Log operation failure errors
+ _log_exception(f.__name__, e, retry_count, **kwargs)
+# if 'unauthorized' in str(e):
+# raise
+ sleep(0.01 * min((3 ** retry_count), 50)) # backoff...
diff --git a/arctic/exceptions.py b/arctic/exceptions.py
new file mode 100644
index 000000000..d18acc4ec
--- /dev/null
+++ b/arctic/exceptions.py
@@ -0,0 +1,42 @@
+class ArcticException(Exception):
+ pass
+
+
+class NoDataFoundException(ArcticException):
+ pass
+
+
+class UnhandledDtypeException(ArcticException):
+ pass
+
+
+class LibraryNotFoundException(ArcticException):
+ pass
+
+
+class DuplicateSnapshotException(ArcticException):
+ pass
+
+
+class StoreNotInitializedException(ArcticException):
+ pass
+
+
+class OptimisticLockException(ArcticException):
+ pass
+
+
+class ConcurrentModificationException(ArcticException):
+ pass
+
+
+class QuotaExceededException(ArcticException):
+ pass
+
+
+class UnorderedAppendException(ArcticException):
+ pass
+
+
+class OverlappingDataException(ArcticException):
+ pass
diff --git a/arctic/fixtures/__init__.py b/arctic/fixtures/__init__.py
new file mode 100644
index 000000000..44e28ce84
--- /dev/null
+++ b/arctic/fixtures/__init__.py
@@ -0,0 +1,3 @@
+"""
+Common Test fixtures so you don't need them in your own module...
+"""
diff --git a/arctic/fixtures/arctic.py b/arctic/fixtures/arctic.py
new file mode 100644
index 000000000..63313507a
--- /dev/null
+++ b/arctic/fixtures/arctic.py
@@ -0,0 +1,100 @@
+import getpass
+import pytest as pytest
+
+from .. import arctic as m
+from ..logging import logger
+from ..decorators import mongo_retry
+from ..tickstore.tickstore import TICK_STORE_TYPE
+
+from .mongo import mongo_proc, mongodb
+
+mongo_proc2 = mongo_proc(executable="mongod", port="?",
+ params='--nojournal '
+ '--noauth '
+ '--nohttpinterface '
+ '--noprealloc '
+ '--nounixsocket '
+ '--smallfiles '
+ '--syncdelay 0 '
+ '--nssize=1 '
+ '--quiet '
+ )
+mongodb = mongodb('mongo_proc2')
+
+
+#
+# TODO: Using mongo_server_session here would be more efficient
+#
+
+@pytest.fixture(scope="function")
+def mongo_host(mongo_proc2):
+ return mongo_proc2.host + ":" + str(mongo_proc2.port)
+
+
+@pytest.fixture(scope="function")
+def arctic(mongodb):
+ logger.info('arctic.fixtures: arctic init()')
+ mongodb.drop_database('arctic')
+ mongodb.drop_database('arctic_{}'.format(getpass.getuser()))
+ arctic = m.Arctic(mongo_host=mongodb)
+ # Do not add global libraries here: use specific fixtures below.
+ # Remember, for testing it does not usually matter what your libraries are called.
+ return arctic
+
+
+# A arctic which allows reads to hit the secondary
+@pytest.fixture(scope="function")
+def arctic_secondary(mongodb, arctic):
+ arctic = m.Arctic(mongo_host=mongodb, allow_secondary=True)
+ return arctic
+
+
+@pytest.fixture(scope="function")
+def library_name():
+ return 'test.TEST'
+
+
+@pytest.fixture(scope="function")
+def user_library_name():
+ return "{}.TEST".format(getpass.getuser())
+
+
+@pytest.fixture(scope="function")
+def overlay_library_name():
+ return "test.OVERLAY"
+
+
+@pytest.fixture(scope="function")
+def library(arctic, library_name):
+ # Add a single test library
+ arctic.initialize_library(library_name, m.VERSION_STORE, segment='month')
+ return arctic.get_library(library_name)
+
+
+@pytest.fixture(scope="function")
+def library_secondary(arctic_secondary, library_name):
+ arctic_secondary.initialize_library(library_name, m.VERSION_STORE, segment='month')
+ return arctic_secondary.get_library(library_name)
+
+
+@pytest.fixture(scope="function")
+def user_library(arctic, user_library_name):
+ arctic.initialize_library(user_library_name, m.VERSION_STORE, segment='month')
+ return arctic.get_library(user_library_name)
+
+
+@pytest.fixture(scope="function")
+def overlay_library(arctic, overlay_library_name):
+ """ Overlay library fixture, returns a pair of libs, read-write: ${name} and read-only: ${name}_RAW
+ """
+ rw_name = overlay_library_name
+ ro_name = '{}_RAW'.format(overlay_library_name)
+ arctic.initialize_library(rw_name, m.VERSION_STORE, segment='year')
+ arctic.initialize_library(ro_name, m.VERSION_STORE, segment='year')
+ return arctic.get_library(rw_name), arctic.get_library(ro_name)
+
+
+@pytest.fixture(scope="function")
+def tickstore_lib(arctic, library_name):
+ arctic.initialize_library(library_name, TICK_STORE_TYPE)
+ return arctic.get_library(library_name)
diff --git a/arctic/fixtures/mongo.py b/arctic/fixtures/mongo.py
new file mode 100644
index 000000000..0f059e061
--- /dev/null
+++ b/arctic/fixtures/mongo.py
@@ -0,0 +1,144 @@
+# Copyright (C) 2013 by Clearcode
+# and associates (see AUTHORS).
+
+# This file is part of pytest-dbfixtures.
+
+# pytest-dbfixtures is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# pytest-dbfixtures is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Lesser General Public License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with pytest-dbfixtures. If not, see .
+
+import os
+import pytest
+
+from path import path
+from tempfile import mkdtemp
+
+from pytest_dbfixtures.executors import TCPExecutor
+from pytest_dbfixtures.port import get_port
+from pytest_dbfixtures.utils import get_config, try_import, get_process_fixture
+
+
+def mongo_proc(executable=None, params=None, host=None, port=None,
+ logs_prefix=''):
+ """
+ Mongo process factory.
+
+ :param str executable: path to mongod
+ :param str params: params
+ :param str host: hostname
+ :param str port: exact port (e.g. '8000')
+ or randomly selected port:
+ '?' - any random available port
+ '2000-3000' - random available port from a given range
+ '4002,4003' - random of 4002 or 4003 ports
+ :param str logs_prefix: prefix for log filename
+ :rtype: func
+ :returns: function which makes a mongo process
+ """
+
+ @pytest.fixture(scope='function')
+ def mongo_proc_fixture(request):
+ """
+ #. Get config.
+ #. Run a ``mongod`` process.
+ #. Stop ``mongod`` process after tests.
+
+ .. note::
+ `mongod `_
+
+ :param FixtureRequest request: fixture request object
+ :rtype: pytest_dbfixtures.executors.TCPExecutor
+ :returns: tcp executor
+ """
+ config = get_config(request)
+
+ # make a temporary directory for tests and delete it
+ # if tests have been finished
+ tmp = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'tmp')
+ if not os.path.exists(tmp):
+ os.mkdir(tmp)
+ tmpdir = path(mkdtemp(prefix='mongo_pytest_fixture', dir=tmp))
+ request.addfinalizer(lambda: tmpdir.exists() and tmpdir.rmtree())
+
+ mongo_exec = executable or config.mongo.mongo_exec
+ mongo_params = params or config.mongo.params
+
+ mongo_host = host or config.mongo.host
+ mongo_port = get_port(port or config.mongo.port)
+
+ logsdir = path(request.config.getvalue('logsdir'))
+ mongo_logpath = logsdir / '{prefix}mongo.{port}.log'.format(
+ prefix=logs_prefix,
+ port=mongo_port
+ )
+
+ mongo_executor = TCPExecutor(
+ '{mongo_exec} --bind_ip {host} --port {port} --dbpath {dbpath} --logpath {logpath} {params}'.format( # noqa
+ mongo_exec=mongo_exec,
+ params=mongo_params,
+ host=mongo_host,
+ port=mongo_port,
+ dbpath=tmpdir,
+ logpath=mongo_logpath,
+ ),
+ host=mongo_host,
+ port=mongo_port,
+ )
+ mongo_executor.start()
+
+ request.addfinalizer(mongo_executor.stop)
+
+ return mongo_executor
+
+ return mongo_proc_fixture
+
+
+def mongodb(process_fixture_name):
+ """
+ Mongo database factory.
+
+ :param str process_fixture_name: name of the process fixture
+ :rtype: func
+ :returns: function which makes a connection to mongo
+ """
+
+ @pytest.fixture
+ def mongodb_factory(request):
+ """
+ #. Get pymongo module and config.
+ #. Get connection to mongo.
+ #. Drop collections before and after tests.
+
+ :param FixtureRequest request: fixture request object
+ :rtype: pymongo.connection.Connection
+ :returns: connection to mongo database
+ """
+ proc_fixture = get_process_fixture(request, process_fixture_name)
+
+ pymongo, config = try_import('pymongo', request)
+
+ mongo_host = proc_fixture.host
+ mongo_port = proc_fixture.port
+
+ try:
+ client = pymongo.MongoClient
+ except AttributeError:
+ client = pymongo.Connection
+
+ mongo_conn = client(mongo_host, mongo_port)
+
+ return mongo_conn
+
+ return mongodb_factory
+
+
+__all__ = [mongodb, mongo_proc]
diff --git a/arctic/hooks.py b/arctic/hooks.py
new file mode 100644
index 000000000..2b80c19bb
--- /dev/null
+++ b/arctic/hooks.py
@@ -0,0 +1,31 @@
+
+
+_resolve_mongodb_hook = lambda env: env
+_log_exception_hook = lambda *args, **kwargs: None
+
+
+def get_mongodb_uri(host):
+ """
+ Return the MongoDB URI for the passed in host-alias / environment.
+
+ Allows an indirection point for mapping aliases to particular
+ MongoDB instances.
+ """
+ return _resolve_mongodb_hook(host)
+
+
+def register_resolve_mongodb_hook(hook):
+ global _resolve_mongodb_hook
+ _mongodb_resolve_hook = hook
+
+
+def log_exception(fn_name, exception, retry_count, **kwargs):
+ """
+ External exception logging hook.
+ """
+ _log_exception_hook(fn_name, exception, retry_count, **kwargs)
+
+
+def register_log_exception_hook(hook):
+ global _log_exception_hook
+ _log_exception_hook = hook
diff --git a/arctic/hosts.py b/arctic/hosts.py
new file mode 100644
index 000000000..5857aceb3
--- /dev/null
+++ b/arctic/hosts.py
@@ -0,0 +1,58 @@
+"""
+Utilities to resolve a string to Mongo host, or a Arctic library.
+"""
+import ConfigParser
+from ConfigParser import NoOptionError, NoSectionError
+import os
+import re
+from weakref import WeakValueDictionary
+
+from .logging import logger
+
+__all__ = ['get_arctic_lib', 'get_arctic_for_library']
+
+
+
+# Application environment variables
+arctic_cache = WeakValueDictionary()
+
+
+CONNECTION_STR = re.compile(r"(^\w+\.?\w+)@([^\s:]+:?\w+)$")
+
+
+def get_arctic_lib(connection_string, **kwargs):
+ """
+ Returns a mongo library for the given connection string
+
+ Parameters
+ ---------
+ connection_string: `str`
+ Format must be one of the following:
+ library@trading for known mongo servers
+ library@hostname:port
+
+ Returns:
+ --------
+ Arctic library
+ """
+ from .arctic import Arctic
+ m = CONNECTION_STR.match(connection_string)
+ if not m:
+ raise ValueError("connection string incorrectly formed: %s" % connection_string)
+ library, host = m.group(1), m.group(2)
+ return _get_arctic(host, **kwargs)[library]
+
+
+def _get_arctic(instance, **kwargs):
+ # Consider any kwargs passed to the Arctic as discriminators for the cache
+ key = instance, frozenset(kwargs.iteritems())
+
+ # Don't create lots of Arctic instances
+ arctic = arctic_cache.get(key, None)
+ if not arctic:
+ # Create the instance. Note that Arctic now connects
+ # lazily so this doesn't connect until on creation.
+ from .arctic import Arctic
+ arctic = Arctic(instance, **kwargs)
+ arctic_cache[key] = arctic
+ return arctic
diff --git a/arctic/logging.py b/arctic/logging.py
new file mode 100644
index 000000000..c07c0a6a9
--- /dev/null
+++ b/arctic/logging.py
@@ -0,0 +1,6 @@
+from __future__ import absolute_import
+
+import logging as logger
+
+logger.basicConfig(format='%(asctime)s %(message)s', level='INFO')
+logger = logger.getLogger('arctic')
diff --git a/arctic/scripts/__init__.py b/arctic/scripts/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/arctic/scripts/arctic_copy_data.py b/arctic/scripts/arctic_copy_data.py
new file mode 100644
index 000000000..4bac99ffe
--- /dev/null
+++ b/arctic/scripts/arctic_copy_data.py
@@ -0,0 +1,100 @@
+import argparse
+import os
+import re
+from multiprocessing import Pool
+import pwd
+
+from arctic.decorators import _get_host
+from arctic.store.audit import ArcticTransaction
+
+from ..logging import logger
+from ..hosts import get_arctic_lib
+from ..date import DateRange, to_pandas_closed_closed, CLOSED_OPEN, OPEN_CLOSED
+
+# Use the UID rather than environment variables for auditing
+USER = pwd.getpwuid(os.getuid())[0]
+
+
+def copy_symbols_helper(src, dest, log, force, splice):
+ def _copy_symbol(symbols):
+ for symbol in symbols:
+ with ArcticTransaction(dest, symbol, USER, log) as mt:
+ existing_data = dest.has_symbol(symbol)
+ if existing_data:
+ if force:
+ logger.warn("Symbol: %s already exists in destination, OVERWRITING" % symbol)
+ elif splice:
+ logger.warn("Symbol: %s already exists in destination, splicing in new data" % symbol)
+ else:
+ logger.warn("Symbol: {} already exists in {}@{}, use --force to overwrite or --splice to join with existing data".format(symbol,
+ _get_host(dest).get('l'),
+ _get_host(dest).get('mhost')))
+ continue
+
+ version = src.read(symbol)
+ new_data = version.data
+
+ if existing_data and splice:
+ original_data = dest.read(symbol).data
+ before = original_data.ix[:to_pandas_closed_closed(DateRange(None, new_data.index[0].to_pydatetime(), interval=CLOSED_OPEN)).end]
+ after = original_data.ix[to_pandas_closed_closed(DateRange(new_data.index[-1].to_pydatetime(), None, interval=OPEN_CLOSED)).start:]
+ new_data = before.append(new_data).append(after)
+
+ mt.write(symbol, new_data, metadata=version.metadata)
+ return _copy_symbol
+
+
+def main():
+ usage = """
+ Copy data from one MongoDB instance to another.
+
+ Example:
+ arctic_copy_data --log "Copying data" --src user.library@host1 --dest user.library@host2 symbol1 symbol2
+ """
+ p = argparse.ArgumentParser(usage=usage)
+ p.add_argument("--src", required=True, help="Source MongoDB like: library@hostname:port")
+ p.add_argument("--dest", required=True, help="Destination MongoDB like: library@hostname:port")
+ p.add_argument("--log", required=True, help="Data CR")
+ p.add_argument("--force", default=False, action='store_true', help="Force overwrite of existing data for symbol.")
+ p.add_argument("--splice", default=False, action='store_true', help="Keep existing data before and after the new data.")
+ p.add_argument("--parallel", default=1, type=int, help="Number of imports to run in parallel.")
+ p.add_argument("symbols", nargs='+', type=str, help="List of symbol regexes to copy from source to dest.")
+
+ opts = p.parse_args()
+
+ src = get_arctic_lib(opts.src)
+ dest = get_arctic_lib(opts.dest)
+
+ logger.info("Copying data from %s -> %s" % (opts.src, opts.dest))
+
+ # Prune the list of symbols from the library according to the list of symbols.
+ required_symbols = set()
+ for symbol in opts.symbols:
+ required_symbols.update(src.list_symbols(regex=symbol))
+ required_symbols = sorted(required_symbols)
+
+ logger.info("Copying: {} symbols".format(len(required_symbols)))
+ if len(required_symbols) < 1:
+ logger.warn("No symbols found that matched those provided.")
+ return
+
+ # Function we'll call to do the data copying
+ copy_symbol = copy_symbols_helper(src, dest, opts.log, opts.force, opts.splice)
+
+ if opts.parallel > 1:
+ logger.info("Starting: {} jobs".format(opts.parallel))
+ pool = Pool(processes=opts.parallel)
+ # Break the jobs into chunks for multiprocessing
+ chunk_size = len(required_symbols) / opts.parallel
+ chunk_size = max(chunk_size, 1)
+ chunks = [required_symbols[offs:offs + chunk_size] for offs in
+ range(0, len(required_symbols), chunk_size)]
+ assert sum(len(x) for x in chunks) == len(required_symbols)
+ pool.apply(copy_symbol, chunks)
+ else:
+ copy_symbol(required_symbols)
+
+
+
+if __name__ == '__main__':
+ main()
diff --git a/arctic/scripts/arctic_create_user.py b/arctic/scripts/arctic_create_user.py
new file mode 100644
index 000000000..de7e8d13e
--- /dev/null
+++ b/arctic/scripts/arctic_create_user.py
@@ -0,0 +1,61 @@
+import optparse
+import pymongo
+import uuid
+import base64
+import sys
+
+from ..auth import get_auth, authenticate
+from ..hooks import get_mongodb_uri
+
+
+def main():
+ usage = """usage: %prog [options] username ...
+
+ Create the user's personal Arctic database, and adds them, read-only
+ to the central admin database.
+ """
+
+ parser = optparse.OptionParser(usage=usage)
+ parser.add_option("--host", default='localhost', help="Hostname, or clustername. Default: localhost")
+ parser.add_option("--password",dest="password", default=None, help="Password. Default: random")
+ parser.add_option("--admin-write", dest="admin", action='store_false', default=True,
+ help="Give write access to the admin DB. Default: False")
+ parser.add_option("--dryrun", "-n", dest="dryrun", action="store_true", help="Don't really do anything", default=False)
+ parser.add_option("--verbose", "-v", dest="verbose", action="store_true", help="Print some commentary", default=False)
+ parser.add_option("--nodb", dest="nodb", help="Don't create a 'personal' database", action="store_true", default=False)
+
+ (opts, args) = parser.parse_args()
+
+ c = pymongo.MongoClient(get_mongodb_uri(opts.host))
+ credentials = get_auth(opts.host, 'admin', 'admin')
+ if not credentials:
+ print >>sys.stderr, "You have no admin credentials for instance '%s'" % (opts.host)
+ return
+
+ if not authenticate(c.admin, credentials.user, credentials.password):
+ print >>sys.stderr, "Failed to authenticate to '%s' as '%s'" % (opts.host, credentials.user)
+ return
+
+ for user in args:
+
+ p = opts.password
+
+ if p is None:
+ p = base64.b64encode(uuid.uuid4().bytes).replace('/', '')[:12]
+
+ if not opts.dryrun:
+ if opts.verbose:
+ print "Adding user %s to DB %s" % (user, opts.host)
+ if not opts.nodb:
+ if opts.verbose:
+ print "Adding database arctic_%s to DB %s" % (user, opts.host)
+ c['arctic_' + user].add_user(user, p)
+ c.admin.add_user(user, p, read_only=opts.admin)
+ else:
+ print "DRYRUN: add user %s readonly %s nodb %s" % (user, opts.admin, opts.nodb)
+
+ if not opts.password:
+ print "%-16s %s" % (user, p)
+
+if __name__ == '__main__':
+ main()
diff --git a/arctic/scripts/arctic_delete_library.py b/arctic/scripts/arctic_delete_library.py
new file mode 100644
index 000000000..449efc689
--- /dev/null
+++ b/arctic/scripts/arctic_delete_library.py
@@ -0,0 +1,40 @@
+import optparse
+import pymongo
+
+from ..logging import logger
+from ..hooks import get_mongodb_uri
+from ..arctic import Arctic
+from .utils import do_db_auth
+
+
+def main():
+ usage = """usage: %prog [options]
+
+ Deletes the named library from a user's database.
+
+ Example:
+ %prog --host=hostname --library=arctic_jblackburn.my_library
+ """
+
+ parser = optparse.OptionParser(usage=usage)
+ parser.add_option("--host", default='localhost', help="Hostname, or clustername. Default: localhost")
+ parser.add_option("--library", help="The name of the library. e.g. 'arctic_jblackburn.lib'")
+
+ (opts, _) = parser.parse_args()
+
+ if not opts.library:
+ parser.error('Must specify the full path of the library e.g. arctic_jblackburn.lib!')
+
+ print "Deleting: %s on mongo %s" % (opts.library, opts.host)
+ c = pymongo.MongoClient(get_mongodb_uri(opts.host))
+
+ db_name = opts.library[:opts.library.index('.')] if '.' in opts.library else None
+ do_db_auth(opts.host, c, db_name)
+ store = Arctic(c)
+ store.delete_library(opts.library)
+
+ logger.info("Library %s deleted" % opts.library)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/arctic/scripts/arctic_enable_sharding.py b/arctic/scripts/arctic_enable_sharding.py
new file mode 100644
index 000000000..7ad79ee67
--- /dev/null
+++ b/arctic/scripts/arctic_enable_sharding.py
@@ -0,0 +1,37 @@
+import optparse
+import pymongo
+
+from ..arctic import Arctic
+from ..auth import get_auth
+from ..hooks import get_mongodb_uri
+from .._util import enable_sharding
+from ..auth import authenticate
+
+
+def main():
+ usage = """usage: %prog [options] arg1=value, arg2=value
+
+ Enables sharding on the specified arctic library.
+ """
+
+ parser = optparse.OptionParser(usage=usage)
+ parser.add_option("--host", default='localhost', help="Hostname, or clustername. Default: localhost")
+ parser.add_option("--library", help="The name of the library. e.g. 'arctic_jblackburn.lib'")
+
+ (opts, _) = parser.parse_args()
+
+ if not opts.library or '.' not in opts.library:
+ parser.error('must specify the full path of the library e.g. arctic_jblackburn.lib!')
+
+ print "Enabling-sharding: %s on mongo %s" % (opts.library, opts.host)
+
+ c = pymongo.MongoClient(get_mongodb_uri(opts.host))
+ credentials = get_auth(opts.host, 'admin', 'admin')
+ if credentials:
+ authenticate(c.admin, credentials.user, credentials.password)
+ store = Arctic(c)
+ enable_sharding(store, opts.library)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/arctic/scripts/arctic_fsck.py b/arctic/scripts/arctic_fsck.py
new file mode 100644
index 000000000..016f17611
--- /dev/null
+++ b/arctic/scripts/arctic_fsck.py
@@ -0,0 +1,72 @@
+import logging
+import argparse
+
+from ..logging import logger
+from ..hooks import get_mongodb_uri
+from ..arctic import Arctic, ArcticLibraryBinding
+from .utils import do_db_auth
+
+
+def main():
+ usage = """
+ Check a Arctic Library for inconsistencies.
+ """
+
+ parser = argparse.ArgumentParser(usage=usage)
+ parser.add_argument("--host", default='localhost', help="Hostname, or clustername. Default: localhost")
+ parser.add_argument("--library", nargs='+', required=True, help="The name of the library. e.g. 'arctic_jblackburn.lib'")
+ parser.add_argument("-v", action='store_true', help="Verbose mode")
+ parser.add_argument("-f", action='store_true', help="Force ; Cleanup any problems found. (Default is dry-run.)")
+ parser.add_argument("-n", action='store_true', help="No FSCK ; just print stats.)")
+
+ opts = parser.parse_args()
+
+ if opts.v:
+ logger.setLevel(logging.DEBUG)
+
+ if not opts.f:
+ logger.info("DRY-RUN: No changes will be made.")
+
+ logger.info("FSCK'ing: %s on mongo %s" % (opts.library, opts.host))
+ store = Arctic(get_mongodb_uri(opts.host))
+
+ for lib in opts.library:
+ # Auth to the DB for making changes
+ if opts.f:
+ database_name, _ = ArcticLibraryBinding._parse_db_lib(lib)
+ do_db_auth(opts.host, store._conn, database_name)
+
+ orig_stats = store[lib].stats()
+
+ logger.info('----------------------------')
+ if not opts.n:
+ store[lib]._fsck(not opts.f)
+ logger.info('----------------------------')
+
+ final_stats = store[lib].stats()
+ logger.info('Stats:')
+ logger.info('Sharded: %s' % final_stats['chunks'].get('sharded', False))
+ logger.info('Symbols: %10d' % len(store[lib].list_symbols()))
+ logger.info('Versions: %10d Change(+/-) %6d (av: %.2fMB)' %
+ (final_stats['versions']['count'],
+ final_stats['versions']['count'] - orig_stats['versions']['count'],
+ final_stats['versions'].get('avgObjSize', 0) / 1024. / 1024.))
+ logger.info("Versions: %10.2fMB Change(+/-) %.2fMB" %
+ (final_stats['versions']['size'] / 1024. / 1024.,
+ (final_stats['versions']['size'] - orig_stats['versions']['size']) / 1024. / 1024.))
+ logger.info('Chunk Count: %7d Change(+/-) %6d (av: %.2fMB)' %
+ (final_stats['chunks']['count'],
+ final_stats['chunks']['count'] - orig_stats['chunks']['count'],
+ final_stats['chunks'].get('avgObjSize', 0) / 1024. / 1024.))
+ logger.info("Chunks: %12.2fMB Change(+/-) %6.2fMB" %
+ (final_stats['chunks']['size'] / 1024. / 1024.,
+ (final_stats['chunks']['size'] - orig_stats['chunks']['size']) / 1024. / 1024.))
+ logger.info('----------------------------')
+
+ if not opts.f:
+ logger.info("Done: DRY-RUN: No changes made. (Use -f to fix any problems)")
+ else:
+ logger.info("Done.")
+
+if __name__ == '__main__':
+ main()
diff --git a/arctic/scripts/arctic_init_library.py b/arctic/scripts/arctic_init_library.py
new file mode 100644
index 000000000..a6ca3685e
--- /dev/null
+++ b/arctic/scripts/arctic_init_library.py
@@ -0,0 +1,53 @@
+import argparse
+import pymongo
+
+from ..logging import logger
+from ..hooks import get_mongodb_uri
+from ..arctic import Arctic, VERSION_STORE, LIBRARY_TYPES, \
+ ArcticLibraryBinding
+from .utils import do_db_auth
+
+
+def main():
+ usage = """Initializes a named library in a user's database. Note that it will enable sharding on the underlying
+ collection if it can. To do this you must have admin credentials in arctic:
+
+ Example:
+ arctic_init_library --host=hostname --library=arctic_jblackburn.my_library
+ """
+
+ parser = argparse.ArgumentParser(usage=usage)
+ parser.add_argument("--host", default='localhost', help="Hostname, or clustername. Default: localhost")
+ parser.add_argument("--library", help="The name of the library. e.g. 'arctic_jblackburn.lib'")
+ parser.add_argument("--type", default=VERSION_STORE, choices=sorted(LIBRARY_TYPES.keys()),
+ help="The type of the library, as defined in "
+ "arctic.py. Default: %s" % VERSION_STORE)
+ parser.add_argument("--quota", default=10, help="Quota for the library in GB. A quota of 0 is unlimited."
+ "Default: 10")
+ parser.add_argument("--hashed", action="store_true", default=False, help="Use hashed based sharding. Useful where SYMBOLs share a common prefix (e.g. Bloomberg BBGXXXX symbols)"
+ "Default: False")
+
+ opts = parser.parse_args()
+
+ if not opts.library or '.' not in opts.library \
+ or not opts.library.startswith('arctic'):
+ parser.error('Must specify the full path of the library e.g. arctic_jblackburn.library!')
+ db_name, _ = ArcticLibraryBinding._parse_db_lib(opts.library)
+
+ print "Initializing: %s on mongo %s" % (opts.library, opts.host)
+ c = pymongo.MongoClient(get_mongodb_uri(opts.host))
+
+ if not do_db_auth(opts.host, c, db_name):
+ logger.error('Authentication Failed. Exiting.')
+ return
+
+ store = Arctic(c)
+ store.initialize_library("%s" % opts.library, opts.type, hashed=opts.hashed)
+ logger.info("Library %s created" % opts.library)
+
+ logger.info("Setting quota to %sG" % opts.quota)
+ store.set_quota(opts.library, int(opts.quota) * 1024 * 1024 * 1024)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/arctic/scripts/arctic_list_libraries.py b/arctic/scripts/arctic_list_libraries.py
new file mode 100644
index 000000000..bb9eca69a
--- /dev/null
+++ b/arctic/scripts/arctic_list_libraries.py
@@ -0,0 +1,31 @@
+from __future__ import print_function
+import optparse
+
+from ..arctic import Arctic
+
+print = print
+
+
+def main():
+ usage = """usage: %prog [options] [prefix ...]
+
+ Lists the libraries available in a user's database. If any prefix parameters
+ are given, list only libraries with names that start with one of the prefixes.
+
+ Example:
+ %prog --host=hostname rgautier
+ """
+
+ parser = optparse.OptionParser(usage=usage)
+ parser.add_option("--host", default='localhost', help="Hostname, or clustername. Default: localhost")
+
+ (opts, args) = parser.parse_args()
+
+ store = Arctic(opts.host)
+ for name in sorted(store.list_libraries()):
+ if (not args) or [n for n in args if name.startswith(n)]:
+ print(name)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/arctic/scripts/arctic_prune_versions.py b/arctic/scripts/arctic_prune_versions.py
new file mode 100644
index 000000000..8ef01e3e2
--- /dev/null
+++ b/arctic/scripts/arctic_prune_versions.py
@@ -0,0 +1,59 @@
+import optparse
+import pymongo
+
+from ..logging import logger
+from ..hooks import get_mongodb_uri
+from ..arctic import Arctic, ArcticLibraryBinding
+from .utils import do_db_auth
+
+
+def prune_versions(lib, symbol, keep_mins):
+ lib._prune_previous_versions(symbol, keep_mins=keep_mins)
+
+
+def main():
+ usage = """usage: %prog [options]
+
+ Prunes (i.e. deletes) versions of data that are not the most recent, and are older than 10 minutes,
+ and are not in use by snapshots. Must be used on a Arctic VersionStore library instance.
+
+ Example:
+ arctic_prune_versions --host=hostname --library=arctic_jblackburn.my_library
+ """
+
+ parser = optparse.OptionParser(usage=usage)
+ parser.add_option("--host", default='localhost', help="Hostname, or clustername. Default: localhost")
+ parser.add_option("--library", help="The name of the library. e.g. 'arctic_jblackburn.library'")
+ parser.add_option("--symbols", help="The symbols to prune - comma separated (default all)")
+ parser.add_option("--keep-mins", default=10, help="Ensure there's a version at least keep-mins old. Default:10")
+
+
+ (opts, _) = parser.parse_args()
+
+ if not opts.library:
+ parser.error('Must specify the Arctic library e.g. arctic_jblackburn.library!')
+ db_name, _ = ArcticLibraryBinding._parse_db_lib(opts.library)
+
+ print "Pruning (old) versions in : %s on mongo %s" % (opts.library, opts.host)
+ print "Keeping all versions <= %s mins old" % (opts.keep_mins)
+ c = pymongo.MongoClient(get_mongodb_uri(opts.host))
+
+ if not do_db_auth(opts.host, c, db_name):
+ logger.error('Authentication Failed. Exiting.')
+ return
+ lib = Arctic(c)[opts.library]
+
+ if opts.symbols:
+ symbols = opts.symbols.split(',')
+ else:
+ symbols = lib.list_symbols(all_symbols=True)
+ logger.info("Found %s symbols" % len(symbols))
+
+ for s in symbols:
+ logger.info("Pruning %s" % s)
+ prune_versions(lib, s, opts.keep_mins)
+ logger.info("Done")
+
+
+if __name__ == '__main__':
+ main()
diff --git a/arctic/scripts/utils.py b/arctic/scripts/utils.py
new file mode 100644
index 000000000..9b34c8451
--- /dev/null
+++ b/arctic/scripts/utils.py
@@ -0,0 +1,34 @@
+from ..logging import logger
+from ..auth import get_auth, authenticate
+
+
+def do_db_auth(host, connection, db_name):
+ """
+ Attempts to authenticate against the mongo instance.
+
+ Tries:
+ - Auth'ing against admin as 'admin' ; credentials: /arctic/admin/admin
+ - Auth'ing against db_name (which may be None if auth'ing against admin above)
+
+ returns True if authentication succeeded.
+ """
+ admin_creds = get_auth(host, 'admin', 'admin')
+ user_creds = get_auth(host, 'arctic', db_name)
+
+ # Attempt to authenticate the connection
+ # Try at 'admin level' first as this allows us to enableSharding, which we want
+ if admin_creds is None:
+ # Get ordinary credentials for authenticating against the DB
+ if user_creds is None:
+ logger.error("You need credentials for db '%s' on '%s', or admin credentials" % (db_name, host))
+ return False
+ if not authenticate(connection[db_name], user_creds.user, user_creds.password):
+ logger.error("Failed to authenticate to db '%s' on '%s', using user credentials" % (db_name, host))
+ return False
+ return True
+ elif not authenticate(connection.admin, admin_creds.user, admin_creds.password):
+ logger.error("Failed to authenticate to '%s' as Admin. Giving up." % (host))
+ return False
+ # Ensure we attempt to auth against the user DB, for non-priviledged users to get access
+ authenticate(connection[db_name], user_creds.user, user_creds.password)
+ return True
diff --git a/arctic/store/__init__.py b/arctic/store/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/arctic/store/_ndarray_store.py b/arctic/store/_ndarray_store.py
new file mode 100644
index 000000000..8642ec9ad
--- /dev/null
+++ b/arctic/store/_ndarray_store.py
@@ -0,0 +1,393 @@
+from bson.binary import Binary
+import hashlib
+import numpy as np
+import pprint
+from pymongo import ReadPreference
+import pymongo
+from pymongo.errors import OperationFailure, DuplicateKeyError
+
+from ..logging import logger
+from ..decorators import mongo_retry, dump_bad_documents
+from ..exceptions import UnhandledDtypeException
+from ._version_store_utils import checksum
+
+from .._compression import compress_array, decompress
+from ..exceptions import ConcurrentModificationException
+
+_CHUNK_SIZE = 2 * 1024 * 1024 - 2048 # ~2 MB (a bit less for usePowerOf2Sizes)
+_APPEND_SIZE = 1 * 1024 * 1024 # 1MB
+_APPEND_COUNT = 60 # 1 hour of 1 min data
+
+
+def _promote_struct_dtypes(dtype1, dtype2):
+ if not set(dtype1.names).issuperset(set(dtype2.names)):
+ raise Exception("Removing columns from dtype not handled")
+
+ def _promote(type1, type2):
+ if type2 is None:
+ return type1
+ if type1.shape is not None:
+ if not type1.shape == type2.shape:
+ raise Exception("We do not handle changes to dtypes that have shape")
+ return np.promote_types(type1.base, type2.base), type1.shape
+ return np.promote_types(type1, type2)
+ return np.dtype([(n, _promote(dtype1.fields[n][0], dtype2.fields.get(n, (None,))[0])) for n in dtype1.names])
+
+
+class NdarrayStore(object):
+ """Chunked store for arbitrary ndarrays, supporting append."""
+ TYPE = 'ndarray'
+
+ @classmethod
+ def initialize_library(cls, *args, **kwargs):
+ pass
+
+ @staticmethod
+ def _ensure_index(collection):
+ try:
+ collection.create_index([('symbol', pymongo.HASHED)], background=True)
+ collection.create_index([('symbol', pymongo.ASCENDING),
+ ('sha', pymongo.ASCENDING)], unique=True, background=True)
+ collection.create_index([('symbol', pymongo.ASCENDING),
+ ('parent', pymongo.ASCENDING),
+ ('segment', pymongo.ASCENDING)], unique=True, background=True)
+ except OperationFailure, e:
+ if "can't use unique indexes" in str(e):
+ return
+ raise
+
+
+ @mongo_retry
+ def can_delete(self, version, symbol):
+ return self.can_read(version, symbol)
+
+ def can_read(self, version, symbol):
+ return version['type'] == self.TYPE
+
+ def can_write(self, version, symbol, data):
+ return isinstance(data, np.ndarray) and not data.dtype.hasobject
+
+ def _dtype(self, string, metadata=None):
+ if metadata is None:
+ metadata = {}
+ if string.startswith('['):
+ return np.dtype(eval(string), metadata=metadata)
+ return np.dtype(string, metadata=metadata)
+
+
+ def _index_range(self, version, symbol, from_version=None, **kwargs):
+ """
+ Tuple describing range to read from the ndarray - closed:open
+ """
+ from_index = None
+ if from_version:
+ if version['base_sha'] != from_version['base_sha']:
+ #give up - the data has been overwritten, so we can't tail this
+ raise ConcurrentModificationException("Concurrent modification - data has been overwritten")
+ from_index = from_version['up_to']
+ return from_index, None
+
+ def get_info(self, arctic_lib, version, symbol, **kwargs):
+ collection = arctic_lib.get_top_level_collection()
+ dtype = self._dtype(version['dtype'], version.get('dtype_metadata', {}))
+ length = int(version['up_to'])
+
+ spec = {'symbol': symbol,
+ 'parent': version.get('base_version_id', version['_id']),
+ 'segment': {'$lt': length}}
+
+ n_segments = collection.find(spec).count()
+
+ est_size = dtype.itemsize * length
+ return """Handler: %s
+
+dtype: %s
+
+%d rows in %d segments
+Data size: %s bytes
+
+Version document:
+%s""" % (self.__class__.__name__, dtype, length, n_segments, est_size, pprint.pformat(version))
+
+
+ def read(self, arctic_lib, version, symbol, read_preference=None, **kwargs):
+ index_range = self._index_range(version, symbol, **kwargs)
+ collection = arctic_lib.get_top_level_collection()
+ if read_preference:
+ collection = collection.with_options(read_preference=read_preference)
+ return self._do_read(collection, version, symbol, index_range=index_range)
+
+ def _do_read(self, collection, version, symbol, index_range=None):
+ from_index = index_range[0] if index_range else None
+ to_index = index_range[1] if index_range and index_range[1] is not None \
+ and index_range[1] < version['up_to'] else version['up_to']
+ segment_count = None
+
+ spec = {'symbol': symbol,
+ 'parent': version.get('base_version_id', version['_id']),
+ 'segment': {'$lt': to_index}}
+ if from_index:
+ spec['segment'] = {'$lt': version['up_to'], '$gte': from_index}
+ else:
+ segment_count = version.get('segment_count', None)
+
+ segments = []
+ i = -1
+ for i, x in enumerate(collection.find(spec, sort=[('segment', pymongo.ASCENDING)],)):
+ try:
+ segments.append(decompress(x['data']) if x['compressed'] else x['data'])
+ except Exception:
+ dump_bad_documents(x, collection.find_one({'_id': x['_id']}),
+ collection.find_one({'_id': x['_id']}),
+ collection.find_one({'_id': x['_id']}))
+ raise
+ data = ''.join(segments)
+
+ # Check that the correct number of segments has been returned
+ if segment_count is not None and i + 1 != segment_count:
+ raise OperationFailure("Incorrect number of segments returned for {}:{}. Expected: {}, but got {}. {}".format(
+ symbol, version['version'], segment_count, i + 1, collection.database.name + '.' + collection.name))
+
+ dtype = self._dtype(version['dtype'], version.get('dtype_metadata', {}))
+ rtn = np.fromstring(data, dtype=dtype).reshape(version.get('shape', (-1)))
+ return rtn
+
+ def _promote_types(self, item, dtype_str):
+ if dtype_str == str(item.dtype):
+ return item.dtype
+ prev_dtype = self._dtype(dtype_str)
+ if item.dtype.names is None:
+ rtn = np.promote_types(item.dtype, prev_dtype)
+ else:
+ rtn = _promote_struct_dtypes(item.dtype, prev_dtype)
+ rtn = np.dtype(rtn, metadata=dict(item.dtype.metadata or {}))
+ return rtn
+
+ def append(self, arctic_lib, version, symbol, item, previous_version):
+ collection = arctic_lib.get_top_level_collection()
+ if previous_version.get('shape', [-1]) != [-1, ] + list(item.shape)[1:]:
+ raise UnhandledDtypeException()
+
+ if previous_version['up_to'] == 0:
+ dtype = item.dtype
+ elif len(item) == 0:
+ dtype = self._dtype(previous_version['dtype'])
+ else:
+ dtype = self._promote_types(item, previous_version['dtype'])
+ item = item.astype(dtype)
+ if str(dtype) != previous_version['dtype']:
+ logger.debug('Converting %s from %s to %s' % (symbol, previous_version['dtype'], str(dtype)))
+ if item.dtype.hasobject:
+ raise UnhandledDtypeException()
+ version['dtype'] = str(item.dtype)
+ version['dtype_metadata'] = dict(item.dtype.metadata or {})
+ version['type'] = self.TYPE
+
+ old_arr = self._do_read(collection, previous_version, symbol).astype(dtype)
+ # missing float columns should default to nan rather than zero
+ old_dtype = self._dtype(previous_version['dtype'])
+ if dtype.names is not None and old_dtype.names is not None:
+ new_columns = set(dtype.names) - set(old_dtype.names)
+ _is_float_type = lambda _dtype: _dtype.type in (np.float32, np.float64)
+ _is_void_float_type = lambda _dtype: _dtype.type == np.void and _is_float_type(_dtype.subdtype[0])
+ _is_float_or_void_float_type = lambda _dtype: _is_float_type(_dtype) or _is_void_float_type(_dtype)
+ _is_float = lambda column: _is_float_or_void_float_type(dtype.fields[column][0])
+ for new_column in filter(_is_float, new_columns):
+ old_arr[new_column] = np.nan
+
+ item = np.concatenate([old_arr, item])
+ version['up_to'] = len(item)
+ version['sha'] = self.checksum(item)
+ version['base_sha'] = version['sha']
+ self._do_write(collection, version, symbol, item, previous_version)
+ else:
+ version['dtype'] = previous_version['dtype']
+ version['dtype_metadata'] = previous_version['dtype_metadata']
+ version['type'] = self.TYPE
+ self._do_append(collection, version, symbol, item, previous_version)
+
+ def _do_append(self, collection, version, symbol, item, previous_version):
+
+ data = item.tostring()
+ version['base_sha'] = previous_version['base_sha']
+ version['up_to'] = previous_version['up_to'] + len(item)
+ if len(item) > 0:
+ version['segment_count'] = previous_version['segment_count'] + 1
+ version['append_count'] = previous_version['append_count'] + 1
+ version['append_size'] = previous_version['append_size'] + len(data)
+ else:
+ version['segment_count'] = previous_version['segment_count']
+ version['append_count'] = previous_version['append_count']
+ version['append_size'] = previous_version['append_size']
+
+ #_CHUNK_SIZE is probably too big if we're only appending single rows of data - perhaps something smaller,
+ #or also look at number of appended segments?
+ if version['append_count'] < _APPEND_COUNT and version['append_size'] < _APPEND_SIZE:
+ version['base_version_id'] = previous_version.get('base_version_id', previous_version['_id'])
+
+ if len(item) > 0:
+
+ segment = {'data': Binary(data), 'compressed': False}
+ segment['segment'] = version['up_to'] - 1
+ try:
+ collection.update_one({'symbol': symbol,
+ 'sha': checksum(symbol, segment)},
+ {'$set': segment,
+ '$addToSet': {'parent': version['base_version_id']}},
+ upsert=True)
+ except DuplicateKeyError:
+ '''If we get a duplicate key error here, this segment has the same symbol/parent/segment
+ as another chunk, but a different sha. This means that we have 'forked' history.
+ If we concat_and_rewrite here, new chunks will have a different parent id (the _id of this version doc)
+ ...so we can safely write them.
+ '''
+ self._concat_and_rewrite(collection, version, symbol, item, previous_version)
+ return
+
+ if 'segment_index' in previous_version:
+ segment_index = self._segment_index(item,
+ existing_index=previous_version.get('segment_index'),
+ start=previous_version['up_to'],
+ new_segments=[segment['segment'], ])
+ if segment_index:
+ version['segment_index'] = segment_index
+ logger.debug("Appended segment %d for parent %s" % (segment['segment'], version['_id']))
+ else:
+ if 'segment_index' in previous_version:
+ version['segment_index'] = previous_version['segment_index']
+
+ else: # Too much data has been appended now, so rewrite (and compress/chunk).
+ self._concat_and_rewrite(collection, version, symbol, item, previous_version)
+
+ def _concat_and_rewrite(self, collection, version, symbol, item, previous_version):
+
+ version.pop('base_version_id', None)
+
+ # Figure out which is the last 'full' chunk
+ spec = {'symbol': symbol,
+ 'parent': previous_version.get('base_version_id', previous_version['_id']),
+ 'segment': {'$lt': version['up_to']}}
+
+ read_index_range = [0, None]
+ unchanged_segment_ids = list(collection.find(spec, projection={'_id':1, 'segment':1},
+ sort=[('segment', pymongo.ASCENDING)],))\
+ [:-1 * (previous_version['append_count'] + 1)]
+ if unchanged_segment_ids:
+ read_index_range[0] = unchanged_segment_ids[-1]['segment'] + 1
+
+ old_arr = self._do_read(collection, previous_version, symbol, index_range=read_index_range)
+ if len(item) == 0:
+ logger.debug('Rewrite and compress/chunk item %s, rewrote old_arr' % symbol)
+ self._do_write(collection, version, symbol, old_arr, previous_version, segment_offset=read_index_range[0])
+ elif len(old_arr) == 0:
+ logger.debug('Rewrite and compress/chunk item %s, wrote item' % symbol)
+ self._do_write(collection, version, symbol, item, previous_version, segment_offset=read_index_range[0])
+ else:
+ logger.debug("Rewrite and compress/chunk %s, np.concatenate %s to %s" % (symbol,
+ item.dtype, old_arr.dtype))
+ self._do_write(collection, version, symbol, np.concatenate([old_arr, item]), previous_version, segment_offset=read_index_range[0])
+ if unchanged_segment_ids:
+ collection.update_many({'symbol': symbol, '_id': {'$in': [x['_id'] for x in unchanged_segment_ids]}},
+ {'$addToSet': {'parent': version['_id']}})
+ version['segment_count'] = version['segment_count'] + len(unchanged_segment_ids)
+
+ def check_written(self, collection, symbol, version):
+ # Check all the chunks are in place
+ seen_chunks = collection.find({'symbol': symbol, 'parent': version['_id']},
+ ).count()
+
+ if seen_chunks != version['segment_count']:
+ segments = [x['segment'] for x in collection.find({'symbol': symbol, 'parent': version['_id']},
+ projection={'segment': 1},
+ )]
+ raise pymongo.errors.OperationFailure("Failed to write all the Chunks. Saw %s expecting %s"
+ "Parent: %s \n segments: %s" %
+ (seen_chunks, version['segment_count'], version['_id'], segments))
+
+ def checksum(self, item):
+ sha = hashlib.sha1()
+ sha.update(item.tostring())
+ return Binary(sha.digest())
+
+ def write(self, arctic_lib, version, symbol, item, previous_version):
+ collection = arctic_lib.get_top_level_collection()
+ if item.dtype.hasobject:
+ raise UnhandledDtypeException()
+
+ version['dtype'] = str(item.dtype)
+ version['shape'] = (-1,) + item.shape[1:]
+ version['dtype_metadata'] = dict(item.dtype.metadata or {})
+ version['type'] = self.TYPE
+ version['up_to'] = len(item)
+ version['sha'] = self.checksum(item)
+
+ if previous_version:
+ if version['dtype'] == str(item.dtype) \
+ and 'sha' in previous_version \
+ and self.checksum(item[:previous_version['up_to']]) == previous_version['sha']:
+ #The first n rows are identical to the previous version, so just append.
+ self._do_append(collection, version, symbol, item[previous_version['up_to']:], previous_version)
+ return
+
+ version['base_sha'] = version['sha']
+ self._do_write(collection, version, symbol, item, previous_version)
+
+ def _do_write(self, collection, version, symbol, item, previous_version, segment_offset=0):
+
+ sze = int(item.dtype.itemsize * np.prod(item.shape[1:]))
+
+ # chunk and store the data by (uncompressed) size
+ chunk_size = _CHUNK_SIZE / sze
+
+ previous_shas = []
+ if previous_version:
+ previous_shas = set([x['sha'] for x in
+ collection.find({'symbol': symbol},
+ projection={'sha': 1, '_id': 0},
+ )
+ ])
+
+ length = len(item)
+
+ if segment_offset > 0 and 'segment_index' in previous_version:
+ existing_index = previous_version['segment_index']
+ else:
+ existing_index = None
+
+ segment_index = []
+ i = -1
+
+ # Compress
+ idxs = xrange(int(np.ceil(float(length) / chunk_size)))
+ chunks = [(item[i * chunk_size: (i + 1) * chunk_size]).tostring() for i in idxs]
+ compressed_chunks = compress_array(chunks)
+
+ # Write
+ bulk = collection.initialize_unordered_bulk_op()
+ for i, chunk in zip(idxs, compressed_chunks):
+ segment = {'data': Binary(chunk), 'compressed':True}
+ segment['segment'] = min((i + 1) * chunk_size - 1, length - 1) + segment_offset
+ segment_index.append(segment['segment'])
+ sha = checksum(symbol, segment)
+ if sha not in previous_shas:
+ segment['sha'] = sha
+ bulk.find({'symbol': symbol, 'sha': sha, 'segment': segment['segment']}
+ ).upsert().update_one({'$set': segment, '$addToSet': {'parent': version['_id']}})
+ else:
+ bulk.find({'symbol': symbol, 'sha': sha, 'segment': segment['segment']}
+ ).update_one({'$addToSet': {'parent': version['_id']}})
+ if i != -1:
+ bulk.execute()
+
+ segment_index = self._segment_index(item, existing_index=existing_index, start=segment_offset, new_segments=segment_index)
+ if segment_index:
+ version['segment_index'] = segment_index
+ version['segment_count'] = i + 1
+ version['append_size'] = 0
+ version['append_count'] = 0
+
+ self.check_written(collection, symbol, version)
+
+ def _segment_index(self, item, existing_index, start, new_segments):
+ pass
+
diff --git a/arctic/store/_pandas_ndarray_store.py b/arctic/store/_pandas_ndarray_store.py
new file mode 100644
index 000000000..50296ec17
--- /dev/null
+++ b/arctic/store/_pandas_ndarray_store.py
@@ -0,0 +1,209 @@
+from _ndarray_store import NdarrayStore
+from pandas import DataFrame, MultiIndex, Series, DatetimeIndex, Panel
+from pandas.tslib import Timestamp, get_timezone
+import numpy as np
+
+from ..logging import logger as log
+
+
+def _to_primitive(arr):
+ if arr.dtype.hasobject:
+ if len(arr) > 0:
+ if isinstance(arr[0], Timestamp):
+ return arr.astype('datetime64[ns]')
+ return np.array(list(arr))
+ return arr
+
+
+class PandasStore(NdarrayStore):
+
+ def _index_to_records(self, df):
+ metadata = {}
+ index = df.index
+
+ if isinstance(index, MultiIndex):
+ # array of tuples to numpy cols. copy copy copy
+ if len(df) > 0:
+ ix_vals = map(np.array, zip(*index.values))
+ else:
+ # empty multi index has no size, create empty arrays for recarry..
+ ix_vals = [np.array([]) for n in index.names]
+ else:
+ ix_vals = [index.values]
+
+ count = 0
+ index_names = list(index.names)
+ if isinstance(index, MultiIndex):
+ for i, n in enumerate(index_names):
+ if n is None:
+ index_names[i] = 'level_%d' % count
+ count += 1
+ elif index_names[0] is None:
+ index_names = ['index']
+
+ metadata['index'] = index_names
+
+ if isinstance(index, DatetimeIndex) and index.tz is not None:
+ metadata['index_tz'] = get_timezone(index.tz)
+
+ return index_names, ix_vals, metadata
+
+ def _index_from_records(self, recarr):
+ index = recarr.dtype.metadata['index']
+ rtn = MultiIndex.from_arrays([recarr[str(i)] for i in index], names=index)
+
+ if isinstance(rtn, DatetimeIndex) and 'index_tz' in recarr.dtype.metadata:
+ rtn = rtn.tz_localize('UTC').tz_convert(recarr.dtype.metadata['index_tz'])
+
+ return rtn
+
+ def to_records(self, df):
+ """
+ Similar to DataFrame.to_records()
+ Differences:
+ Attempt type conversion for pandas columns stored as objects (e.g. strings),
+ as we can only store primitives in the ndarray.
+ Use dtype metadata to store column and index names.
+ """
+
+ index_names, ix_vals, metadata = self._index_to_records(df)
+ columns, column_vals = self._column_data(df)
+
+ metadata['columns'] = columns
+ names = index_names + columns
+ arrays = ix_vals + column_vals
+ arrays = map(_to_primitive, arrays)
+ dtype = np.dtype([(str(x), v.dtype) if len(v.shape) == 1 else (str(x), v.dtype, v.shape[1]) for x, v in zip(names, arrays)],
+ metadata=metadata)
+ rtn = np.rec.fromarrays(arrays, dtype=dtype, names=names)
+ #For some reason the dtype metadata is lost in the line above.
+ rtn.dtype = dtype
+ return rtn
+
+ def can_convert_to_records_without_objects(self, df, symbol):
+ # We can't easily distinguish string columns from objects
+ try:
+ arr = self.to_records(df)
+ except Exception as e:
+ # This exception will also occur when we try to write the object so we fall-back to saving using Pickle
+ log.info('Pandas dataframe %s caused exception "%s" when attempting to convert to records. Saving as Blob.'
+ % (symbol, repr(e)))
+ return False
+ else:
+ if arr.dtype.hasobject:
+ log.info('Pandas dataframe %s contains Objects, saving as Blob' % symbol)
+ # Will fall-back to saving using Pickle
+ return False
+ elif any([len(x[0].shape) for x in arr.dtype.fields.values()]):
+ log.info('Pandas dataframe %s contains >1 dimensional arrays, saving as Blob' % symbol)
+ return False
+ else:
+ return True
+
+
+class PandasSeriesStore(PandasStore):
+ TYPE = 'pandasseries'
+
+ def _column_data(self, s):
+ columns = [s.name if s.name else 'values']
+ column_vals = [s.values]
+ return columns, column_vals
+
+ def from_records(self, recarr):
+ index = self._index_from_records(recarr)
+ name = recarr.dtype.names[-1]
+ return Series.from_array(recarr[name], index=index, name=name)
+
+ def can_write(self, version, symbol, data):
+ if isinstance(data, Series):
+ if data.dtype == np.object_ or data.index.dtype == np.object_:
+ return self.can_convert_to_records_without_objects(data, symbol)
+ return True
+ return False
+
+ def write(self, arctic_lib, version, symbol, item, previous_version):
+ item = self.to_records(item)
+ super(PandasSeriesStore, self).write(arctic_lib, version, symbol, item, previous_version)
+
+ def append(self, arctic_lib, version, symbol, item, previous_version):
+ item = self.to_records(item)
+ super(PandasSeriesStore, self).append(arctic_lib, version, symbol, item, previous_version)
+
+ def read(self, arctic_lib, version, symbol, **kwargs):
+ item = super(PandasSeriesStore, self).read(arctic_lib, version, symbol, **kwargs)
+ return self.from_records(item)
+
+
+class PandasDataFrameStore(PandasStore):
+ TYPE = 'pandasdf'
+
+ def _column_data(self, df):
+ columns = list(map(str, df.columns))
+ column_vals = [df[c].values for c in df.columns]
+ return columns, column_vals
+
+
+ def from_records(self, recarr):
+ index = self._index_from_records(recarr)
+ column_fields = [x for x in recarr.dtype.names if x not in recarr.dtype.metadata['index']]
+ if len(recarr) == 0:
+ rdata = recarr[column_fields] if len(column_fields) > 0 else None
+ return DataFrame(rdata, index=index)
+
+ columns = recarr.dtype.metadata['columns']
+ return DataFrame(data=recarr[column_fields], index=index, columns=columns)
+
+ def can_write(self, version, symbol, data):
+ if isinstance(data, DataFrame):
+ if np.any(data.dtypes.values == 'object'):
+ return self.can_convert_to_records_without_objects(data, symbol)
+ return True
+ return False
+
+ def write(self, arctic_lib, version, symbol, item, previous_version):
+ item = self.to_records(item)
+ super(PandasDataFrameStore, self).write(arctic_lib, version, symbol, item, previous_version)
+
+ def append(self, arctic_lib, version, symbol, item, previous_version):
+ item = self.to_records(item)
+ super(PandasDataFrameStore, self).append(arctic_lib, version, symbol, item, previous_version)
+
+ def read(self, arctic_lib, version, symbol, **kwargs):
+ item = super(PandasDataFrameStore, self).read(arctic_lib, version, symbol, **kwargs)
+ return self.from_records(item)
+
+class PandasPanelStore(PandasDataFrameStore):
+ TYPE = 'pandaspan'
+
+ def can_write(self, version, symbol, data):
+ if isinstance(data, Panel):
+ frame = data.to_frame()
+ if np.any(frame.dtypes.values == 'object'):
+ return self.can_convert_to_records_without_objects(frame, symbol)
+ return True
+ return False
+
+ def write(self, arctic_lib, version, symbol, item, previous_version):
+ if np.product(item.shape) == 0:
+ # Currently not supporting zero size panels as they drop indices when converting to dataframes
+ # Plan is to find a better solution in due course.
+ raise ValueError('Cannot insert a zero size panel into mongo.')
+ if not np.all(len(i.names) == 1 for i in item.axes):
+ raise ValueError('Cannot insert panels with multiindexes')
+ item = item.to_frame()
+ if len(set(item.dtypes)) == 1:
+ # If all columns have the same dtype, we support non-string column names.
+ # We know from above check that columns is not a multiindex.
+ item = DataFrame(item.stack())
+ elif item.columns.dtype != np.dtype('object'):
+ raise ValueError('Cannot support non-object dtypes for columns')
+ super(PandasPanelStore, self).write(arctic_lib, version, symbol, item, previous_version)
+
+ def read(self, arctic_lib, version, symbol, **kwargs):
+ item = super(PandasPanelStore, self).read(arctic_lib, version, symbol, **kwargs)
+ if len(item.index.names) == 3:
+ return item.iloc[:, 0].unstack().to_panel()
+ return item.to_panel()
+
+ def append(self, arctic_lib, version, symbol, item, previous_version):
+ raise ValueError('Appending not supported for pandas.Panel')
diff --git a/arctic/store/_pickle_store.py b/arctic/store/_pickle_store.py
new file mode 100644
index 000000000..80e0717b5
--- /dev/null
+++ b/arctic/store/_pickle_store.py
@@ -0,0 +1,62 @@
+import bson
+from bson.binary import Binary
+from bson.errors import InvalidDocument
+import cPickle
+import lz4
+import pymongo
+import pprint
+
+from ._version_store_utils import checksum
+
+_MAGIC_CHUNKED = '__chunked__'
+_CHUNK_SIZE = 15 * 1024 * 1024 # 15MB
+
+
+class PickleStore(object):
+
+ @classmethod
+ def initialize_library(cls, *args, **kwargs):
+ pass
+
+ def get_info(self, arctic_lib, version, symbol, **kwargs):
+ if 'blob' in version:
+ if version['blob'] != _MAGIC_CHUNKED:
+ version['blob'] = ""
+
+ return """Handler: %s\n\nVersion document:\n%s""" % (self.__class__.__name__, pprint.pformat(version))
+
+ def read(self, arctic_lib, version, symbol, **kwargs):
+ if 'blob' in version:
+ if version['blob'] == _MAGIC_CHUNKED:
+ collection = arctic_lib.get_top_level_collection()
+ data = ''.join([x['data'] for x in collection.find({'symbol': symbol,
+ 'parent': version['_id']},
+ sort=[('segment', pymongo.ASCENDING)])])
+ else:
+ data = version['blob']
+ # Backwards compatibility
+ return cPickle.loads(lz4.decompress(data))
+ return version['data']
+
+ def write(self, arctic_lib, version, symbol, item, previous_version):
+ try:
+ # If it's encodeable, then ship it
+ bson.BSON.encode({'data': item})
+ version['data'] = item
+ return
+ except InvalidDocument:
+ pass
+
+ # Pickle, chunk and store the data
+ collection = arctic_lib.get_top_level_collection()
+ # Try to pickle it. This is best effort
+ version['blob'] = _MAGIC_CHUNKED
+ pickled = lz4.compressHC(cPickle.dumps(item, protocol=cPickle.HIGHEST_PROTOCOL))
+
+ for i in xrange(len(pickled) / _CHUNK_SIZE + 1):
+ segment = {'data': Binary(pickled[i * _CHUNK_SIZE : (i + 1) * _CHUNK_SIZE])}
+ sha = checksum(symbol, segment)
+ segment['segment'] = i
+ collection.update_one({'symbol': symbol, 'sha': sha}, {'$set': segment,
+ '$addToSet': {'parent': version['_id']}},
+ upsert=True)
diff --git a/arctic/store/_version_store_utils.py b/arctic/store/_version_store_utils.py
new file mode 100644
index 000000000..5ea572958
--- /dev/null
+++ b/arctic/store/_version_store_utils.py
@@ -0,0 +1,55 @@
+from bson import Binary
+import hashlib
+import numpy as np
+
+def _split_arrs(array_2d, slices):
+ """
+ Equivalent to numpy.split(array_2d, slices),
+ but avoids fancy indexing
+ """
+ if len(array_2d) == 0:
+ return np.empty(0, dtype=np.object)
+
+ rtn = np.empty(len(slices) + 1, dtype=np.object)
+ start = 0
+ for i, s in enumerate(slices):
+ rtn[i] = array_2d[start:s]
+ start = s
+ rtn[-1] = array_2d[start:]
+ return rtn
+
+
+def checksum(symbol, doc):
+ """
+ Checksum the passed in dictionary
+ """
+ sha = hashlib.sha1()
+ sha.update(symbol)
+ for k in sorted(doc.iterkeys(), reverse=True):
+ sha.update(str(doc[k]))
+ return Binary(sha.digest())
+
+
+def cleanup(arctic_lib, symbol, version_ids):
+ """
+ Helper method for cleaning up chunks from a version store
+ """
+ collection = arctic_lib.get_top_level_collection()
+
+ # Remove any chunks which contain just the parents, at the outset
+ # We do this here, because $pullALL will make an empty array: []
+ # and the index which contains the parents field will fail the unique constraint.
+ for v in version_ids:
+ # Remove all documents which only contain the parent
+ collection.delete_many({'symbol': symbol,
+ 'parent': {'$all': [v],
+ '$size': 1},
+ })
+ # Pull the parent from the parents field
+ collection.update_many({'symbol': symbol,
+ 'parent': v},
+ {'$pull': {'parent': v}})
+
+ # Now remove all chunks which aren't parented - this is unlikely, as they will
+ # have been removed by the above
+ collection.delete_one({'symbol': symbol, 'parent': {'$size': 0}})
diff --git a/arctic/store/audit.py b/arctic/store/audit.py
new file mode 100644
index 000000000..e233a5a4a
--- /dev/null
+++ b/arctic/store/audit.py
@@ -0,0 +1,137 @@
+"""
+Handle audited data changes.
+"""
+from functools import partial
+
+from pymongo.errors import OperationFailure
+
+from .._util import are_equals
+from ..decorators import _get_host
+from ..exceptions import NoDataFoundException, ConcurrentModificationException
+from ..logging import logger
+from .versioned_item import VersionedItem, ChangedItem
+
+
+class DataChange(object):
+ """
+ Object representing incoming data change
+ """
+ def __init__(self, date_range, new_data):
+ self.date_range = date_range
+ self.new_data = new_data
+
+
+class ArcticTransaction(object):
+ '''Use this context manager if you want to modify data in a version store while ensuring that no other writes
+ interfere with your own.
+
+ To use, base your modifications on the `base_ts` context manager field and put your newly created timeseries and
+ call the `write` method of the context manager to output changes. The changes will only be written when the block
+ exits.
+
+ NB changes are audited.
+
+ Example:
+ -------
+ with ArcticTransaction(Arctic('hostname')['some_library'], 'symbol') as mt:
+ ts_version_info = mt.base_ts
+ # do some processing, come up with a new ts for 'symbol' called new_symbol_ts, presumably based on ts_version_info.data
+ mt.write('symbol', new_symbol_ts, metadata=new_symbol_metadata)
+
+ The block will raise a ConcurrentModificationException if an inconsistency has been detected. You will have to
+ retry the whole block should that happens, as the assumption is that you need to base your changes on a different
+ starting timeseries.
+ '''
+ def __init__(self, version_store, symbol, user, log, modify_timeseries=None, *args, **kwargs):
+ '''
+ Parameters
+ ----------
+ version_store: `VersionStore` Arctic Library
+ Needs to support write, read, list_versions, _delete_version this is the underlying store that we'll
+ be securing for write
+
+ symbol: `str`
+ symbol name for the item that's being modified
+
+ user: `str`
+ user making the change
+
+ log: `str`
+ Log message for the change
+
+ modify_timeseries:
+ if given, it will check the assumption that this is the latest data available for symbol in version_store
+ Should not this be the case, a ConcurrentModificationException will be raised. Use this if you're
+ interacting with code that read in the data already and for some reason you cannot refactor the read-write
+ operation to be contained within this context manager
+
+ all other args:
+ Will be passed into the initial read
+ '''
+ self._version_store = version_store
+ self._symbol = symbol
+ self._user = user
+ self._log = log
+ logger.info("MT: {}@{}: [{}] {}: {}".format(_get_host(version_store).get('l'),
+ _get_host(version_store).get('mhost'),
+ user, log, symbol)
+ )
+ try:
+ self.base_ts = self._version_store.read(self._symbol, *args, **kwargs)
+ except NoDataFoundException:
+ versions = [x['version'] for x in self._version_store.list_versions(self._symbol, latest_only=True)]
+ versions.append(0)
+ self.base_ts = VersionedItem(symbol=self._symbol, library=None,
+ version=versions[0], metadata=None, data=None)
+ except OperationFailure:
+ #TODO: Current errors in mongo "Incorrect Number of Segments Returned"
+ # This workaround should be removed once underlying problem is resolved.
+ self.base_ts = self._version_store.read_metadata(symbol=self._symbol)
+
+ if modify_timeseries is not None and not are_equals(modify_timeseries, self.base_ts.data):
+ raise ConcurrentModificationException()
+ self._do_write = False
+
+ def change(self, symbol, data_changes, **kwargs):
+ """
+ Change, and audit 'data' under the specified 'symbol' name to this library.
+
+ Parameters
+ ----------
+ symbol: `str`
+ symbol name for the item
+
+ data_changes: `list DataChange`
+ list of DataChange objects
+ """
+ pass
+
+ def write(self, symbol, data, prune_previous_version=True, metadata=None, **kwargs):
+ '''Records a write request to be actioned on context exit. Takes exactly the same parameters as the regular
+ library write call.
+ '''
+ if data is not None:
+ # We only write data if existing data is None or the Timeseries data has changed or metadata has changed
+ if self.base_ts.data is None or not are_equals(data, self.base_ts.data) or metadata != self.base_ts.metadata:
+ self._do_write = True
+ self._write = partial(self._version_store.write, symbol, data, prune_previous_version=prune_previous_version,
+ metadata=metadata, **kwargs)
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, *args, **kwargs):
+ if self._do_write:
+ written_ver = self._write()
+ versions = [x['version'] for x in self._version_store.list_versions(self._symbol)]
+ versions.append(0)
+ versions.reverse()
+ base_offset = versions.index(self.base_ts.version)
+ new_offset = versions.index(written_ver.version)
+ if len(versions[base_offset: new_offset + 1]) != 2:
+ self._version_store._delete_version(self._symbol, written_ver.version)
+ raise ConcurrentModificationException("Inconsistent Versions: {}: {}->{}".format(
+ self._symbol, self.base_ts.version, written_ver.version))
+
+ changed = ChangedItem(self._symbol, self.base_ts, written_ver, None)
+ self._version_store._write_audit(self._user, self._log, changed)
diff --git a/arctic/store/version_store.py b/arctic/store/version_store.py
new file mode 100644
index 000000000..76a41bf92
--- /dev/null
+++ b/arctic/store/version_store.py
@@ -0,0 +1,867 @@
+from datetime import datetime as dt, timedelta
+import pprint
+
+import bson
+from pymongo import ReadPreference
+import pymongo
+from pymongo.errors import OperationFailure, AutoReconnect
+
+from .._util import indent, enable_powerof2sizes, \
+ enable_sharding
+from ..date import mktz, datetime_to_ms, ms_to_datetime
+from ..decorators import mongo_retry
+from ..exceptions import NoDataFoundException, DuplicateSnapshotException, \
+ OptimisticLockException, ArcticException
+from ..hooks import log_exception
+from ..logging import logger
+from ._pickle_store import PickleStore
+from ._version_store_utils import cleanup
+from .versioned_item import VersionedItem
+
+
+VERSION_STORE_TYPE = 'VersionStore'
+_TYPE_HANDLERS = []
+
+
+def register_versioned_storage(storageClass):
+ existing_instances = [i for i, v in enumerate(_TYPE_HANDLERS) if str(v.__class__) == str(storageClass)]
+ if existing_instances:
+ for i in existing_instances:
+ _TYPE_HANDLERS[i] = storageClass()
+ else:
+ _TYPE_HANDLERS.append(storageClass())
+ return storageClass
+
+
+
+class VersionStore(object):
+
+ _bson_handler = PickleStore()
+
+ @classmethod
+ def initialize_library(cls, arctic_lib, hashed=False, **kwargs):
+ c = arctic_lib.get_top_level_collection()
+
+ if '%s.changes' % c.name not in mongo_retry(c.database.collection_names)():
+ # 32MB buffer for change notifications
+ mongo_retry(c.database.create_collection)('%s.changes' % c.name, capped=True, size=32 * 1024 * 1024)
+
+ for th in _TYPE_HANDLERS:
+ th.initialize_library(arctic_lib, **kwargs)
+ VersionStore._bson_handler.initialize_library(arctic_lib, **kwargs)
+ VersionStore(arctic_lib)._ensure_index()
+
+ logger.info("Trying to enable usePowerOf2Sizes...")
+ try:
+ enable_powerof2sizes(arctic_lib.arctic, arctic_lib.get_name())
+ except OperationFailure, e:
+ logger.error("Library created, but couldn't enable usePowerOf2Sizes: %s" % str(e))
+
+ logger.info("Trying to enable sharding...")
+ try:
+ enable_sharding(arctic_lib.arctic, arctic_lib.get_name(), hashed=hashed)
+ except OperationFailure, e:
+ logger.warn("Library created, but couldn't enable sharding: %s. This is OK if you're not 'admin'" % str(e))
+
+ @mongo_retry
+ def _ensure_index(self):
+ collection = self._collection
+ collection.snapshots.create_index([('name', pymongo.ASCENDING)], unique=True,
+ background=True)
+ collection.versions.create_index([('symbol', pymongo.ASCENDING), ('_id', pymongo.DESCENDING)],
+ background=True)
+ collection.versions.create_index([('symbol', pymongo.ASCENDING), ('version', pymongo.DESCENDING)], unique=True,
+ background=True)
+ collection.version_nums.create_index('symbol', unique=True, background=True)
+ for th in _TYPE_HANDLERS:
+ th._ensure_index(collection)
+
+ @mongo_retry
+ def __init__(self, arctic_lib):
+ self._arctic_lib = arctic_lib
+
+ # Do we allow reading from secondaries
+ self._allow_secondary = self._arctic_lib.arctic._allow_secondary
+
+ # The default collections
+ self._collection = arctic_lib.get_top_level_collection()
+ self._audit = self._collection.audit
+ self._snapshots = self._collection.snapshots
+ self._versions = self._collection.versions
+ self._version_nums = self._collection.version_nums
+ self._publish_changes = '%s.changes' % self._collection.name in self._collection.database.collection_names()
+ if self._publish_changes:
+ self._changes = self._collection.changes
+
+ def __getstate__(self):
+ return {'arctic_lib': self._arctic_lib}
+
+ def __setstate__(self, state):
+ return VersionStore.__init__(self, state['arctic_lib'])
+
+ def __str__(self):
+ return """<%s at %s>
+%s""" % (self.__class__.__name__, hex(id(self)), indent(str(self._arctic_lib), 4))
+
+ def __repr__(self):
+ return str(self)
+
+ @mongo_retry
+ def list_symbols(self, all_symbols=False, snapshot=None, regex=None, **kwargs):
+ """
+ Return the symbols in this library.
+
+ Parameters
+ ----------
+ all_symbols : `bool`
+ If True returns all symbols under all snapshots, even if the symbol has been deleted
+ in the current version (i.e. it exists under a snapshot... Default: False
+ snapshot : `str`
+ Return the symbols available under the snapshot.
+ regex : `str`
+ filter symbols by the passed in regular expression
+ kwargs :
+ kwarg keys are used as fields to query for symbols with metadata matching
+ the kwargs query
+
+ Returns
+ -------
+ String list of symbols in the library
+ """
+ query = {}
+ if regex is not None:
+ query ['symbol'] = {'$regex' : regex}
+ if kwargs:
+ for k, v in kwargs.iteritems():
+ query['metadata.' + k] = v
+ if snapshot is not None:
+ try:
+ query['parent'] = self._snapshots.find_one({'name': snapshot})['_id']
+ except TypeError:
+ raise NoDataFoundException('No snapshot %s in library %s' % (snapshot, self._arctic_lib.get_name()))
+ elif all_symbols:
+ return self._versions.find(query).distinct('symbol')
+
+ # Return just the symbols which aren't deleted in the 'trunk' of this library
+ pipeline = []
+ if query:
+ # Match based on user criteria first
+ pipeline.append({'$match': query})
+ pipeline.extend([
+ # Id is by insert time which matches version order
+ {'$sort': {'_id':-1}},
+ # Group by 'symbol'
+ {'$group': {'_id': '$symbol',
+ 'deleted': {'$first': '$metadata.deleted'},
+ },
+ },
+ # Don't include symbols which are part of some snapshot, but really deleted...
+ {'$match': {'deleted': {'$ne': True}}},
+ {'$project': {'_id': 0,
+ 'symbol': '$_id',
+ }
+ }])
+
+ results = self._versions.aggregate(pipeline)
+ return sorted([x['symbol'] for x in results])
+
+ @mongo_retry
+ def has_symbol(self, symbol, as_of=None):
+ """
+ Return True if the 'symbol' exists in this library AND the symbol
+ isn't deleted in the specified as_of.
+
+ It's possible for a deleted symbol to exist in older snapshots.
+
+ Parameters
+ ----------
+ symbol : `str`
+ symbol name for the item
+ """
+ try:
+ self._read_metadata(symbol, as_of=as_of)
+ return True
+ except NoDataFoundException:
+ return False
+
+ def read_audit_log(self, symbol):
+ """
+ Return the audit log associated with a given symbol
+
+ Parameters
+ ----------
+ symbol : `str`
+ symbol name for the item
+ """
+ query = {'symbol': symbol}
+ return list(self._audit.find(query, sort=[('_id', -1)],
+ projection={'_id': False}))
+
+ def list_versions(self, symbol=None, snapshot=None, latest_only=False):
+ """
+ Return a list of versions filtered by the passed in parameters.
+
+ Parameters
+ ----------
+ symbol : `str`
+ Symbol to return versions for. If None returns versions across all
+ symbols in the library.
+ snapshot : `str`
+ Return the versions contained in the named snapshot
+ latest_only : `bool`
+ Only include the latest version for a specific symbol
+
+ Returns
+ -------
+ List of dictionaries describing the discovered versions in the library
+ """
+ if symbol is None:
+ symbols = self.list_symbols()
+ else:
+ symbols = [symbol]
+
+ query = {}
+
+ if snapshot is not None:
+ try:
+ query['parent'] = self._snapshots.find_one({'name': snapshot})['_id']
+ except TypeError:
+ raise NoDataFoundException('No snapshot %s in library %s' % (snapshot, self._arctic_lib.get_name()))
+
+ versions = []
+ for symbol in symbols:
+ query['symbol'] = symbol
+ seen_symbols = set()
+ for version in self._versions.find(query, projection=['symbol', 'version', 'parent'], sort=[('version', -1)]):
+ if latest_only and version['symbol'] in seen_symbols:
+ continue
+ seen_symbols.add(version['symbol'])
+ versions.append({'symbol': version['symbol'], 'version': version['version'],
+ # We return naive datetimes in London Time.
+ 'date': ms_to_datetime(datetime_to_ms(version['_id'].generation_time)),
+ 'snapshots': self._find_snapshots(version.get('parent', []))})
+ return versions
+
+ def _find_snapshots(self, parent_ids):
+ snapshots = []
+ for p in parent_ids:
+ snap = self._snapshots.find_one({'_id': p})
+ if snap:
+ snapshots.append(snap['name'])
+ else:
+ snapshots.append(str(p))
+ return snapshots
+
+ def _read_handler(self, version, symbol):
+ handler = None
+ for h in _TYPE_HANDLERS:
+ if h.can_read(version, symbol):
+ handler = h
+ break
+ if handler is None:
+ handler = self._bson_handler
+ return handler
+
+ def _write_handler(self, version, symbol, data, **kwargs):
+ handler = None
+ for h in _TYPE_HANDLERS:
+ if h.can_write(version, symbol, data, **kwargs):
+ handler = h
+ break
+ if handler is None:
+ version['type'] = 'default'
+ handler = self._bson_handler
+ return handler
+
+ def read(self, symbol, as_of=None, from_version=None, **kwargs):
+ """
+ Read data for the named symbol. Returns a VersionedItem object with
+ a data and metdata element (as passed into write).
+
+ Parameters
+ ----------
+ symbol : `str`
+ symbol name for the item
+ as_of : `str` or int or `datetime.datetime`
+ Return the data as it was as_of the point in time.
+ `int` : specific version number
+ `str` : snapshot name which contains the version
+ `datetime.datetime` : the version of the data that existed as_of the requested point in time
+
+ Returns
+ -------
+ VersionedItem namedtuple which contains a .data and .metadata element
+ """
+ try:
+ _version = self._read_metadata(symbol, as_of=as_of)
+ read_preference = ReadPreference.NEAREST if self._allow_secondary else None
+ return self._do_read(symbol, _version, from_version, read_preference=read_preference, **kwargs)
+ except (OperationFailure, AutoReconnect) as e:
+ # Log the exception so we know how often this is happening
+ log_exception('read', e, 1)
+ # If we've failed to read from the secondary, then it's possible the
+ # secondary has lagged. In this case direct the query to the primary.
+ _version = mongo_retry(self._read_metadata)(symbol, as_of=as_of,
+ read_preference=ReadPreference.PRIMARY)
+ return self._do_read_retry(symbol, _version, from_version,
+ read_preference=ReadPreference.PRIMARY,
+ **kwargs)
+ except Exception, e:
+ log_exception('read', e, 1)
+ raise
+
+ @mongo_retry
+ def _show_info(self, symbol, as_of=None):
+ """
+ Print details on the stored symbol: the underlying storage handler
+ and the version_document corresponding to the specified version.
+
+ Parameters
+ ----------
+ symbol : `str`
+ symbol name for the item
+ as_of : `str` or int or `datetime.datetime`
+ Return the data as it was as_of the point in time.
+ `int` : specific version number
+ `str` : snapshot name which contains the version
+ `datetime.datetime` : the version of the data that existed as_of the requested point in time
+ """
+ print self._get_info(symbol, as_of)
+
+ def _get_info(self, symbol, as_of=None):
+ _version = self._read_metadata(symbol, as_of=as_of)
+ handler = self._read_handler(_version, symbol)
+ if hasattr(handler, "get_info"):
+ return handler.get_info(self._arctic_lib, _version, symbol)
+ else:
+ return """Handler: %s\n\nVersion document:\n%s""" % (handler.__class__.__name__, pprint.pformat(_version))
+
+ def _do_read(self, symbol, version, from_version=None, **kwargs):
+ handler = self._read_handler(version, symbol)
+ data = handler.read(self._arctic_lib, version, symbol, from_version=from_version, **kwargs)
+ if data is None:
+ raise NoDataFoundException("No data found for %s in library %s" % (symbol, self._arctic_lib.get_name()))
+ return VersionedItem(symbol=symbol, library=self._arctic_lib.get_name(), version=version['version'],
+ metadata=version.pop('metadata', None), data=data)
+ _do_read_retry = mongo_retry(_do_read)
+
+ @mongo_retry
+ def read_metadata(self, symbol, as_of=None):
+ """
+ Return the metadata saved for a symbol. This method is fast as it doesn't
+ actually load the data.
+
+ Parameters
+ ----------
+ symbol : `str`
+ symbol name for the item
+ as_of : `str` or int or `datetime.datetime`
+ Return the data as it was as_of the point in time.
+ `int` : specific version number
+ `str` : snapshot name which contains the version
+ `datetime.datetime` : the version of the data that existed as_of the requested point in time
+ """
+ _version = self._read_metadata(symbol, as_of=as_of)
+ return VersionedItem(symbol=symbol, library=self._arctic_lib.get_name(), version=_version['version'],
+ metadata=_version.pop('metadata', None), data=None)
+
+ def _read_metadata(self, symbol, as_of=None, read_preference=None):
+ if read_preference is None:
+ # We want to hit the PRIMARY if querying secondaries is disabled. If we're allowed to query secondaries,
+ # then we want to hit the secondary for metadata. We maintain ordering of chunks vs. metadata, such that
+ # if metadata is available, we guarantee that chunks will be available. (Within a 10 minute window.)
+ read_preference = ReadPreference.PRIMARY_PREFERRED if not self._allow_secondary else ReadPreference.SECONDARY_PREFERRED
+
+ versions_coll = self._versions.with_options(read_preference=read_preference)
+
+ _version = None
+ if as_of is None:
+ _version = versions_coll.find_one({'symbol': symbol}, sort=[('version', pymongo.DESCENDING)])
+ elif isinstance(as_of, basestring):
+ # as_of is a snapshot
+ snapshot = self._snapshots.find_one({'name': as_of})
+ if snapshot:
+ _version = versions_coll.find_one({'symbol': symbol, 'parent': snapshot['_id']})
+ elif isinstance(as_of, dt):
+ # as_of refers to a datetime
+ if not as_of.tzinfo:
+ as_of = as_of.replace(tzinfo=mktz())
+ _version = versions_coll.find_one({'symbol': symbol,
+ '_id': {'$lt': bson.ObjectId.from_datetime(as_of + timedelta(seconds=1))}},
+ sort=[('_id', pymongo.DESCENDING)])
+ else:
+ # Backward compatibility - as of is a version number
+ _version = versions_coll.find_one({'symbol': symbol, 'version': as_of})
+
+ if not _version:
+ raise NoDataFoundException("No data found for %s in library %s" % (symbol, self._arctic_lib.get_name()))
+
+ # if the item has been deleted, don't return any metadata
+ metadata = _version.get('metadata', None)
+ if metadata is not None and metadata.get('deleted', False) is True:
+ raise NoDataFoundException("No data found for %s in library %s" % (symbol, self._arctic_lib.get_name()))
+
+ return _version
+
+ @mongo_retry
+ def append(self, symbol, data, metadata=None, prune_previous_version=True, upsert=True, **kwargs):
+ """
+ Append 'data' under the specified 'symbol' name to this library.
+ The exact meaning of 'append' is left up to the underlying store implementation.
+
+ Parameters
+ ----------
+ symbol : `str`
+ symbol name for the item
+ data :
+ to be persisted
+ metadata : `dict`
+ an optional dictionary of metadata to persist along with the symbol.
+ prune_previous_version : `bool`
+ Removes previous (non-snapshotted) versions from the database.
+ Default: True
+ upsert : `bool`
+ Write 'data' if no previous version exists.
+ """
+ self._ensure_index()
+ self._arctic_lib.check_quota()
+ version = {'_id': bson.ObjectId()}
+ version['symbol'] = symbol
+ spec = {'symbol': symbol}
+ previous_version = self._versions.find_one(spec,
+ sort=[('version', pymongo.DESCENDING)])
+
+ if len(data) == 0 and previous_version is not None:
+ return VersionedItem(symbol=symbol, library=self._arctic_lib.get_name(), version=previous_version,
+ metadata=version.pop('metadata', None), data=None)
+
+ if upsert and previous_version is None:
+ return self.write(symbol=symbol, data=data, prune_previous_version=prune_previous_version, metadata=metadata)
+
+ assert previous_version is not None
+
+ next_ver = self._version_nums.find_one({'symbol': symbol, 'version': previous_version['version']})
+
+ if next_ver is None:
+ raise ArcticException('''version_nums is out of sync with previous version document.
+ This probably means that either a version document write has previously failed, or the previous version has been deleted.
+ Append not possible - please call write() to get versions back in sync''')
+
+ # if the symbol has previously been deleted then overwrite
+ previous_metadata = previous_version.get('metadata', None)
+ if upsert and previous_metadata is not None and previous_metadata.get('deleted', False) is True:
+ return self.write(symbol=symbol, data=data, prune_previous_version=prune_previous_version,
+ metadata=metadata)
+
+ handler = self._read_handler(previous_version, symbol)
+
+ if metadata is not None:
+ version['metadata'] = metadata
+ elif 'metadata' in previous_version:
+ version['metadata'] = previous_version['metadata']
+
+ if handler and hasattr(handler, 'append'):
+ mongo_retry(handler.append)(self._arctic_lib, version, symbol, data, previous_version, **kwargs)
+ else:
+ raise Exception("Append not implemented for handler %s" % handler)
+
+ next_ver = self._version_nums.find_one_and_update({'symbol': symbol, 'version': previous_version['version']},
+ {'$inc': {'version': 1}},
+ upsert=False, new=True)
+
+ if next_ver is None:
+ #Latest version has changed during this operation
+ raise OptimisticLockException()
+
+ version['version'] = next_ver['version']
+
+ # Insert the new version into the version DB
+ mongo_retry(self._versions.insert_one)(version)
+
+ self._publish_change(symbol, version)
+
+ if prune_previous_version and previous_version:
+ self._prune_previous_versions(symbol)
+
+ return VersionedItem(symbol=symbol, library=self._arctic_lib.get_name(), version=version['version'],
+ metadata=version.pop('metadata', None), data=None)
+
+ def _publish_change(self, symbol, version):
+ if self._publish_changes:
+ mongo_retry(self._changes.insert_one)(version)
+
+ @mongo_retry
+ def write(self, symbol, data, metadata=None, prune_previous_version=True, **kwargs):
+ """
+ Write 'data' under the specified 'symbol' name to this library.
+
+ Parameters
+ ----------
+ symbol : `str`
+ symbol name for the item
+ data :
+ to be persisted
+ metadata : `dict`
+ an optional dictionary of metadata to persist along with the symbol.
+ Default: None
+ prune_previous_version : `bool`
+ Removes previous (non-snapshotted) versions from the database.
+ Default: True
+ kwargs :
+ passed through to the write handler
+
+ Returns
+ -------
+ VersionedItem named tuple containing the metadata and verison number
+ of the written symbol in the store.
+ """
+ self._ensure_index()
+ self._arctic_lib.check_quota()
+ version = {'_id': bson.ObjectId()}
+ version['symbol'] = symbol
+ version['version'] = self._version_nums.find_one_and_update({'symbol': symbol},
+ {'$inc': {'version': 1}},
+ upsert=True, new=True)['version']
+ version['metadata'] = metadata
+
+ previous_version = self._versions.find_one({'symbol': symbol, 'version': {'$lt': version['version']}},
+ sort=[('version', pymongo.DESCENDING)],
+ )
+
+ handler = self._write_handler(version, symbol, data, **kwargs)
+ mongo_retry(handler.write)(self._arctic_lib, version, symbol, data, previous_version, **kwargs)
+
+ # Insert the new version into the version DB
+ mongo_retry(self._versions.insert_one)(version)
+
+ if prune_previous_version and previous_version:
+ self._prune_previous_versions(symbol)
+
+ logger.debug('Finished writing versions for %s', symbol)
+
+ self._publish_change(symbol, version)
+
+ return VersionedItem(symbol=symbol, library=self._arctic_lib.get_name(), version=version['version'],
+ metadata=version.pop('metadata', None), data=None)
+
+ def _prune_previous_versions(self, symbol, keep_mins=120):
+ """
+ Prune versions, not pointed at by snapshots which are at least keep_mins old.
+ """
+ # Find all non-snapshotted versions older than a version that's at least keep_mins minutes old
+ # Based on documents available on the secondary
+ versions_find = mongo_retry(self._versions.with_options(read_preference=ReadPreference.SECONDARY_PREFERRED if keep_mins > 0 else
+ ReadPreference.PRIMARY)
+ .find)
+ versions = list(versions_find({ # Find versions of this symbol
+ 'symbol': symbol,
+ # Not snapshotted
+ '$or': [{'parent': {'$exists': False}}, {'parent': {'$size': 0}}],
+ # At least 'keep_mins' old
+ '_id': {'$lt': bson.ObjectId.from_datetime(
+ dt.utcnow()
+ # Add one second as the ObjectId str has random fuzz
+ + timedelta(seconds=1)
+ - timedelta(minutes=keep_mins))
+ }
+ },
+ # Using version number here instead of _id as there's a very unlikely case
+ # where the versions are created on different hosts or processes at exactly
+ # the same time.
+ sort=[('version', pymongo.DESCENDING)],
+ # Keep one, that's at least 10 mins old, around
+ # (cope with replication delay)
+ skip=1,
+ projection=['_id', 'type'],
+ ))
+ if not versions:
+ return
+ version_ids = [v['_id'] for v in versions]
+
+ #Find any version_ids that are the basis of other, 'current' versions - don't prune these.
+ base_versions = set([x['base_version_id'] for x in mongo_retry(self._versions.find)({
+ 'symbol': symbol,
+ '_id': {'$nin': version_ids},
+ 'base_version_id':{'$exists':True},
+ },
+ projection=['base_version_id'],
+ )])
+
+ version_ids = list(set(version_ids) - base_versions)
+
+ if not version_ids:
+ return
+
+ # Delete the version documents
+ mongo_retry(self._versions.delete_many)({'_id': {'$in': version_ids}})
+ # Cleanup any chunks
+ cleanup(self._arctic_lib, symbol, version_ids)
+
+ @mongo_retry
+ def _delete_version(self, symbol, version_num, do_cleanup=True):
+ """
+ Delete the n'th version of this symbol from the historical collection.
+ """
+ version = self._versions.find_one({'symbol': symbol, 'version': version_num})
+ if not version:
+ logger.error("Can't delete %s:%s as not found in DB" % (symbol, version_num))
+ return
+ # If the version is pointed to by a snapshot, then can't delete
+ if version.get('parent', None):
+ for parent in version['parent']:
+ snap_name = self._snapshots.find_one({'_id': parent})
+ if snap_name:
+ snap_name = snap_name['name']
+ logger.error("Can't delete: %s:%s as pointed to by snapshot: %s" % (symbol, version['version'],
+ snap_name))
+ return
+ self._versions.delete_one({'_id': version['_id']})
+ if do_cleanup:
+ cleanup(self._arctic_lib, symbol, [version['_id']])
+
+ @mongo_retry
+ def delete(self, symbol):
+ """
+ Delete all versions of the item from the current library which aren't
+ currently part of some snapshot.
+
+ Parameters
+ ----------
+ symbol : `str`
+ symbol name to delete
+ """
+ logger.warn("Deleting data item: %r from %r" % (symbol, self._arctic_lib.get_name()))
+ # None is the magic sentinel value that indicates an item has been deleted.
+ sentinel = self.write(symbol, None, prune_previous_version=False, metadata={'deleted': True})
+ self._prune_previous_versions(symbol, 0)
+
+ # If there aren't any other versions, then we don't need the sentinel empty value
+ # so delete the sentinel version altogether
+ snapped_version = self._versions.find_one({'symbol': symbol,
+ 'metadata.deleted': {'$ne': True}})
+ if not snapped_version:
+ self._delete_version(symbol, sentinel.version)
+ assert not self.has_symbol(symbol)
+
+ def _write_audit(self, user, message, changed_version):
+ """
+ Creates an audit entry, which is much like a snapshot in that
+ it references versions and provides some history of the changes made.
+ """
+ audit = {'_id': bson.ObjectId(),
+ 'user': user,
+ 'message': message,
+ 'symbol': changed_version.symbol
+ }
+ orig_version = changed_version.orig_version.version
+ new_version = changed_version.new_version.version
+ audit['orig_v'] = orig_version
+ audit['new_v'] = new_version
+ # Update the versions to contain the audit
+ mongo_retry(self._versions.update_many)({'symbol': changed_version.symbol,
+ 'version': {'$in': [orig_version, new_version]}
+ },
+ {'$addToSet': {'parent': audit['_id']}})
+ # Create the audit entry
+ mongo_retry(self._audit.insert_one)(audit)
+
+ def snapshot(self, snap_name, metadata=None, skip_symbols=None):
+ """
+ Snapshot the current versions of symbols in the library. Can be used like:
+
+ Parameters
+ ----------
+ snap_name : `str`
+ name of the snapshot
+ metadata : `dict`
+ an optional dictionary of metadata to persist along with the symbol.
+ skip_symbols : `collections.Iterable`
+ optional symbols to be excluded from the snapshot
+ """
+ # Ensure the user doesn't insert duplicates
+ snapshot = self._snapshots.find_one({'name': snap_name})
+ if snapshot:
+ raise DuplicateSnapshotException("Snapshot '%s' already exists." % snap_name)
+
+ # Create a snapshot version document
+ snapshot = {'_id': bson.ObjectId()}
+ snapshot['name'] = snap_name
+ snapshot['metadata'] = metadata
+
+ if skip_symbols is None:
+ skip_symbols = set()
+ else:
+ skip_symbols = set(skip_symbols)
+
+ # Loop over, and snapshot all versions except those we've been asked to skip
+ for sym in set(self.list_symbols()) - skip_symbols:
+ try:
+ sym = self._read_metadata(sym, read_preference=ReadPreference.PRIMARY)
+ # Update the parents field of the version document
+ mongo_retry(self._versions.update_one)({'_id': sym['_id']},
+ {'$addToSet': {'parent': snapshot['_id']}})
+ except NoDataFoundException:
+ # Version has been deleted, not included in the snapshot
+ pass
+ mongo_retry(self._snapshots.insert_one)(snapshot)
+
+ def delete_snapshot(self, snap_name):
+ """
+ Delete a named snapshot
+
+ Parameters
+ ----------
+ symbol : `str`
+ The snapshot name to delete
+ """
+ snapshot = self._snapshots.find_one({'name': snap_name})
+ if not snapshot:
+ raise NoDataFoundException("Snapshot %s not found!" % snap_name)
+
+ # Find all the versions pointed at by the snapshot
+ versions = list(self._versions
+ .find({'parent': snapshot['_id']}, projection=['symbol', 'version']))
+ # Remove the snapshot Id as a parent of versions
+ self._versions.update_many({'parent': snapshot['_id']},
+ {'$pull': {'parent': snapshot['_id']}})
+
+ self._snapshots.delete_one({'name': snap_name})
+
+ def list_snapshots(self):
+ """
+ List the snapshots in the library
+
+ Returns
+ -------
+ string list of snapshot names
+ """
+ return dict((i['name'], i['metadata']) for i in self._snapshots.find())
+
+ def stats(self):
+ """
+ Return storage statistics about the library
+
+ Returns
+ -------
+ dictionary of storage stats
+ """
+
+ res = {}
+ db = self._collection.database
+ conn = db.connection
+ res['sharding'] = {}
+ try:
+ sharding = conn.config.databases.find_one({'_id': db.name})
+ if sharding:
+ res['sharding'].update(sharding)
+ res['sharding']['collections'] = list(conn.config.collections.find({'_id': {'$regex': '^' + db.name + "\..*"}}))
+ except OperationFailure:
+ # Access denied
+ pass
+ res['dbstats'] = db.command('dbstats')
+ res['chunks'] = db.command('collstats', self._collection.name)
+ res['versions'] = db.command('collstats', self._versions.name)
+ res['snapshots'] = db.command('collstats', self._snapshots.name)
+ res['totals'] = {'count': res['chunks']['count'],
+ 'size': res['chunks']['size'] + res['versions']['size'] + res['snapshots']['size'],
+ }
+ return res
+
+ def _fsck(self, dry_run):
+ """
+ Run a consistency check on this VersionStore library.
+ """
+ # Cleanup Orphaned Chunks
+ self._cleanup_orphaned_chunks(dry_run)
+ # Cleanup Orphaned Snapshots
+ self._cleanup_orphaned_versions(dry_run)
+
+ def _cleanup_orphaned_chunks(self, dry_run):
+ """
+ Fixes any chunks who have parent pointers to missing versions.
+ Removes the broken parent pointer and, if there are no other parent pointers for the chunk,
+ removes the chunk.
+ """
+ lib = self
+ chunks_coll = lib._collection
+ versions_coll = chunks_coll.versions
+
+ logger.info("ORPHANED CHUNK CHECK: %s" % self._arctic_lib.get_name())
+ for symbol in chunks_coll.distinct('symbol'):
+ logger.debug('Checking %s' % symbol)
+ # Be liberal with the generation time.
+ gen_time = dt.now() - timedelta(days=1)
+ parent_id_constraint = {'$lt': bson.ObjectId.from_datetime(gen_time)}
+
+ # For each symbol, grab all 'real' versions
+ versions = set(versions_coll.find({'symbol': symbol,
+ '_id': parent_id_constraint}).distinct('_id'))
+ # Using aggregate so we can unwind, and pull out 'parent', where 'parent' is older than a day.
+ parents = chunks_coll.aggregate([{'$match': {'symbol': symbol}},
+ {'$project': {'parent': True}},
+ {'$unwind': '$parent'},
+ {'$match': {'parent': parent_id_constraint}},
+ {'$group': {'_id': '$parent'}},
+ ])
+ parent_ids = set([x['_id'] for x in parents])
+
+ leaked_versions = sorted(parent_ids - versions)
+ if len(leaked_versions):
+ logger.info("%s leaked %d versions" % (symbol, len(leaked_versions)))
+ for x in leaked_versions:
+ chunk_count = chunks_coll.find({'symbol': symbol, 'parent': x}).count()
+ logger.info("%s: Missing Version %s (%s) ; %s chunks ref'd" % (symbol,
+ x.generation_time,
+ x,
+ chunk_count
+ ))
+ if versions_coll.find_one({'symbol': symbol, '_id': x}) is not None:
+ raise Exception("Error: version (%s) is found for (%s), but shouldn't be!" %
+ (x, symbol))
+ # Now cleanup the leaked versions
+ if not dry_run:
+ cleanup(lib._arctic_lib, symbol, leaked_versions)
+
+ def _cleanup_orphaned_versions(self, dry_run):
+ """
+ Fixes any versions who have parent pointers to missing snapshots.
+ Note, doesn't delete the versions, just removes the parent pointer if it no longer
+ exists in snapshots.
+ """
+ lib = self
+ versions_coll = lib._collection.versions
+ snapshots_coll = lib._collection.snapshots
+
+ logger.info("ORPHANED SNAPSHOT CHECK: %s" % self._arctic_lib.get_name())
+
+ # Be liberal with the generation time.
+ gen_time = dt.now() - timedelta(days=1)
+ parent_id_constraint = {'$lt': bson.ObjectId.from_datetime(gen_time)}
+
+ # For each symbol, grab all 'real' snapshots and audit entries
+ snapshots = set(snapshots_coll.distinct('_id'))
+ snapshots |= set(lib._audit.distinct('_id'))
+ # Using aggregate so we can unwind, and pull out 'parent', where 'parent' is older than a day.
+ parents = versions_coll.aggregate([{'$project': {'parent': True}},
+ {'$unwind': '$parent'},
+ {'$match': {'parent': parent_id_constraint}},
+ {'$group': {'_id': '$parent'}},
+ ])
+ parent_ids = set([x['_id'] for x in parents])
+
+ leaked_snaps = sorted(parent_ids - snapshots)
+ if len(leaked_snaps):
+ logger.info("leaked %d snapshots" % (len(leaked_snaps)))
+ for x in leaked_snaps:
+ ver_count = versions_coll.find({'parent': x}).count()
+ logger.info("Missing Snapshot %s (%s) ; %s versions ref'd" % (x.generation_time,
+ x,
+ ver_count
+ ))
+ if snapshots_coll.find_one({'_id': x}) is not None:
+ raise Exception("Error: snapshot (%s) is found, but shouldn't be!" %
+ (x))
+ # Now cleanup the leaked snapshots
+ if not dry_run:
+ versions_coll.update_many({'parent': x},
+ {'$pull': {'parent': x}})
diff --git a/arctic/store/versioned_item.py b/arctic/store/versioned_item.py
new file mode 100644
index 000000000..25c7594ba
--- /dev/null
+++ b/arctic/store/versioned_item.py
@@ -0,0 +1,19 @@
+from collections import namedtuple
+
+
+class VersionedItem(namedtuple('VersionedItem', ['symbol', 'library', 'data', 'version', 'metadata'])):
+ """
+ Class representing a Versioned object in VersionStore.
+ """
+ def metadata_dict(self):
+ return {'symbol': self.symbol, 'library': self.library, 'version': self.version}
+
+ def __repr__(self):
+ return str(self)
+
+ def __str__(self):
+ return "VersionedItem(symbol=%s,library=%s,data=%s,version=%s,metadata=%s" % \
+ (self.symbol, self.library, type(self.data), self.version, self.metadata)
+
+
+ChangedItem = namedtuple('ChangedItem', ['symbol', 'orig_version', 'new_version', 'changes'])
diff --git a/arctic/tickstore/__init__.py b/arctic/tickstore/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/arctic/tickstore/tickstore.py b/arctic/tickstore/tickstore.py
new file mode 100644
index 000000000..997914632
--- /dev/null
+++ b/arctic/tickstore/tickstore.py
@@ -0,0 +1,604 @@
+from bson.binary import Binary
+from datetime import datetime as dt, timedelta
+import lz4
+import numpy as np
+import pandas as pd
+from pandas.core.frame import _arrays_to_mgr
+import pymongo
+from pymongo.errors import OperationFailure
+import pytz
+
+from ..date import DateRange, to_pandas_closed_closed, mktz, datetime_to_ms, ms_to_datetime
+from ..decorators import mongo_retry
+from ..exceptions import OverlappingDataException, \
+ NoDataFoundException, UnhandledDtypeException, ArcticException
+from ..logging import logger
+from .._util import indent
+
+
+# Example-Schema:
+# --------------
+# {ID: ObjectId('52b1d39eed5066ab5e87a56d'),
+# SYMBOL: u'symbol'
+# INDEX: Binary('...', 0),
+# IMAGE_DOC: { IMAGE: {
+# 'ASK': 10.
+# ...
+# }
+# 's':
+# 't': DateTime(...)
+# }
+# COLUMNS: {
+# 'ACT_FLAG1': {
+# DATA: Binary('...', 0),
+# DTYPE: u'U1',
+# ROWMASK: Binary('...', 0)},
+# 'ACVOL_1': {
+# DATA: Binary('...', 0),
+# DTYPE: u'float64',
+# ROWMASK: Binary('...', 0)},
+# ...
+# }
+# START: DateTime(...),
+# END: DateTime(...),
+# END_SEQ: 31553879L,
+# SEGMENT: 1386933906826L,
+# SHA: 1386933906826L,
+# VERSION: 3,
+# }
+
+TICK_STORE_TYPE = 'TickStoreV3'
+
+ID = '_id'
+SYMBOL = 'sy'
+INDEX = 'i'
+START = 's'
+END = 'e'
+START_SEQ = 'sS'
+END_SEQ = 'eS'
+SEGMENT = 'se'
+SHA = 'sh'
+IMAGE_DOC = 'im'
+IMAGE = 'i'
+
+COLUMNS = 'cs'
+DATA = 'd'
+DTYPE = 't'
+ROWMASK = 'm'
+
+COUNT = 'c'
+VERSION = 'v'
+
+CHUNK_VERSION_NUMBER = 3
+
+
+class TickStore(object):
+
+ chunk_size = 100000
+
+ @classmethod
+ def initialize_library(cls, arctic_lib, **kwargs):
+ TickStore(arctic_lib)._ensure_index()
+
+ @mongo_retry
+ def _ensure_index(self):
+ collection = self._collection
+ collection.create_index([(SYMBOL, pymongo.ASCENDING),
+ (START, pymongo.ASCENDING)], background=True)
+ collection.create_index([(START, pymongo.ASCENDING)], background=True)
+
+ def __init__(self, arctic_lib):
+ self._arctic_lib = arctic_lib
+
+ # Do we allow reading from secondaries
+ self._allow_secondary = self._arctic_lib.arctic._allow_secondary
+
+ # The default collections
+ self._collection = arctic_lib.get_top_level_collection()
+
+ def __getstate__(self):
+ return {'arctic_lib': self._arctic_lib}
+
+ def __setstate__(self, state):
+ return TickStore.__init__(self, state['arctic_lib'])
+
+ def __str__(self):
+ return """<%s at %s>
+%s""" % (self.__class__.__name__, hex(id(self)), indent(str(self._arctic_lib), 4))
+
+ def __repr__(self):
+ return str(self)
+
+ def delete(self, symbol, date_range=None):
+ """
+ Delete all chunks for a symbol.
+
+ Which are, for the moment, fully contained in the passed in
+ date_range.
+
+ Parameters
+ ----------
+ symbol : `str`
+ symbol name for the item
+ date_range : `date.DateRange`
+ DateRange to delete ticks in
+ """
+ query = {SYMBOL: symbol}
+ date_range = to_pandas_closed_closed(date_range)
+ if date_range is not None:
+ assert date_range.start and date_range.end
+ if date_range.start:
+ start = self._to_dt(date_range.start)
+ if date_range.end:
+ end = self._to_dt(date_range.end)
+ query[START] = {'$gte': start}
+ query[END] = {'$lte': end}
+ self._collection.delete_many(query)
+
+ def list_symbols(self, date_range=None):
+ return self._collection.distinct(SYMBOL)
+
+ def _mongo_date_range_query(self, symbol, date_range):
+ # Handle date_range
+ if not date_range:
+ date_range = DateRange()
+
+ # Find the start bound
+ start_range = {}
+ first = last = None
+ if date_range.start:
+ start = date_range.start
+ startq = self._symbol_query(symbol)
+ startq.update({START: {'$lte': start}})
+ first = self._collection.find_one(startq,
+ # Service entirely from the index
+ projection={START: 1, ID: 0},
+ sort=[(START, pymongo.DESCENDING)])
+ if first:
+ start_range['$gte'] = first[START]
+
+ # Find the end bound
+ if date_range.end:
+ end = date_range.end
+ endq = self._symbol_query(symbol)
+ endq.update({START: {'$gt': end}})
+ last = self._collection.find_one(endq,
+ # Service entirely from the index
+ projection={START: 1, ID: 0},
+ sort=[(START, pymongo.ASCENDING)])
+ else:
+ logger.info("No end provided. Loading a month for: {}:{}".format(symbol, first))
+ if not first:
+ first = self._collection.find_one(self._symbol_query(symbol),
+ projection={START: 1, ID: 0},
+ sort=[(START, pymongo.ASCENDING)])
+ if not first:
+ raise NoDataFoundException()
+ last = first[START]
+ last = {START: last + timedelta(days=30)}
+ if last:
+ start_range['$lt'] = last[START]
+
+ # Return chunks in the specified range
+ if not start_range:
+ return {}
+ return {START: start_range}
+
+ def _symbol_query(self, symbol):
+ if isinstance(symbol, basestring):
+ query = {SYMBOL: symbol}
+ elif symbol is not None:
+ query = {SYMBOL: {'$in': symbol}}
+ else:
+ query = {}
+ return query
+
+ def read(self, symbol, date_range=None, columns=None, include_images=False, _target_tick_count=0):
+ """
+ Read data for the named symbol. Returns a VersionedItem object with
+ a data and metdata element (as passed into write).
+
+ Parameters
+ ----------
+ symbol : `str`
+ symbol name for the item
+ date_range : `date.DateRange`
+ Returns ticks in the specified DateRange
+ columns : `list` of `str`
+ Columns (fields) to return from the tickstore
+ include_images : `bool`
+ Should images (/snapshots) be included in the read
+ Returns
+ -------
+ pandas.DataFrame of data
+ """
+ perf_start = dt.now()
+ rtn = {}
+ column_set = set()
+
+ multiple_symbols = not isinstance(symbol, basestring)
+
+ date_range = to_pandas_closed_closed(date_range)
+ query = self._symbol_query(symbol)
+ query.update(self._mongo_date_range_query(symbol, date_range))
+
+ if columns:
+ projection = dict([(SYMBOL, 1),
+ (INDEX, 1),
+ (START, 1),
+ (VERSION, 1),
+ (IMAGE_DOC, 1)] +
+ [(COLUMNS + '.%s' % c, 1) for c in columns])
+ column_set.update([c for c in columns if c != 'SYMBOL'])
+ else:
+ projection = dict([(SYMBOL, 1),
+ (INDEX, 1),
+ (START, 1),
+ (VERSION, 1),
+ (COLUMNS, 1),
+ (IMAGE_DOC, 1)])
+
+ column_dtypes = {}
+ ticks_read = 0
+ for b in self._collection.find(query, projection=projection).sort([(START, pymongo.ASCENDING)],):
+ data = self._read_bucket(b, column_set, column_dtypes,
+ multiple_symbols or (columns is not None and 'SYMBOL' in columns),
+ include_images)
+ for k, v in data.iteritems():
+ try:
+ rtn[k].append(v)
+ except KeyError:
+ rtn[k] = [v]
+ # For testing
+ ticks_read += len(data[INDEX])
+ if _target_tick_count and ticks_read > _target_tick_count:
+ break
+
+ if not rtn:
+ raise NoDataFoundException("No Data found for {} in range: {}".format(symbol, date_range))
+ rtn = self._pad_and_fix_dtypes(rtn, column_dtypes)
+
+ index = pd.to_datetime(np.concatenate(rtn[INDEX]), unit='ms')
+ if columns is None:
+ columns = [x for x in rtn.keys() if x not in (INDEX, 'SYMBOL')]
+ if multiple_symbols and 'SYMBOL' not in columns:
+ columns = ['SYMBOL', ] + columns
+
+ if len(index) > 0:
+ arrays = [np.concatenate(rtn[k]) for k in columns]
+ else:
+ arrays = [[] for k in columns]
+
+ if multiple_symbols:
+ sort = np.argsort(index)
+ index = index[sort]
+ arrays = [a[sort] for a in arrays]
+
+ t = (dt.now() - perf_start).total_seconds()
+ logger.info("Got data in %s secs, creating DataFrame..." % t)
+ mgr = _arrays_to_mgr(arrays, columns, index, columns, dtype=None)
+ rtn = pd.DataFrame(mgr)
+
+ t = (dt.now() - perf_start).total_seconds()
+ ticks = len(rtn)
+ logger.info("%d rows in %s secs: %s ticks/sec" % (ticks, t, int(ticks / t)))
+ if not rtn.index.is_monotonic:
+ logger.error("TimeSeries data is out of order, sorting!")
+ rtn = rtn.sort_index()
+ if date_range:
+ # FIXME: support DateRange.interval...
+ rtn = rtn.ix[date_range.start:date_range.end]
+ return rtn
+
+ def _pad_and_fix_dtypes(self, cols, column_dtypes):
+ # Pad out Nones with empty arrays of appropriate dtypes
+ rtn = {}
+ index = cols[INDEX]
+ full_length = len(index)
+ for k, v in cols.iteritems():
+ if k != INDEX and k != 'SYMBOL':
+ col_len = len(v)
+ if col_len < full_length:
+ v = ([None, ] * (full_length - col_len)) + v
+ assert len(v) == full_length
+ for i, arr in enumerate(v):
+ if arr is None:
+ # Replace Nones with appropriate-length empty arrays
+ v[i] = self._empty(len(index[i]), column_dtypes.get(k))
+ else:
+ # Promote to appropriate dtype only if we can safely cast all the values
+ # This avoids the case with strings where None is cast as 'None'.
+ # Casting the object to a string is not worthwhile anyway as Pandas changes the
+ # dtype back to objectS
+ if (i == 0 or v[i].dtype != v[i - 1].dtype) and np.can_cast(v[i].dtype, column_dtypes[k],
+ casting='safe'):
+ v[i] = v[i].astype(column_dtypes[k], casting='safe')
+
+ rtn[k] = v
+ return rtn
+
+ def _set_or_promote_dtype(self, column_dtypes, c, dtype):
+ existing_dtype = column_dtypes.get(c)
+ if existing_dtype is None or existing_dtype != dtype:
+ # Promote ints to floats - as we can't easily represent NaNs
+ if np.issubdtype(dtype, int):
+ dtype = np.dtype('f8')
+ column_dtypes[c] = np.promote_types(column_dtypes.get(c, dtype), dtype)
+
+ def _prepend_image(self, document, im):
+ image = im[IMAGE]
+ first_dt = im['t']
+ if not first_dt.tzinfo:
+ first_dt = first_dt.replace(tzinfo=mktz('UTC'))
+ document[INDEX] = np.insert(document[INDEX], 0, np.uint64(datetime_to_ms(first_dt)))
+ for field in document:
+ if field == INDEX or document[field] is None:
+ continue
+ if field in image:
+ val = image[field]
+ else:
+ logger.debug("Field %s is missing from image!", field)
+ val = np.nan
+ document[field] = np.insert(document[field], 0, document[field].dtype.type(val))
+ return document
+
+ def _read_bucket(self, doc, columns, column_dtypes, include_symbol, include_images):
+ rtn = {}
+ if doc[VERSION] != 3:
+ raise ArcticException("Unhandled document version: %s" % doc[VERSION])
+ rtn[INDEX] = np.cumsum(np.fromstring(lz4.decompress(doc[INDEX]), dtype='uint64'))
+ doc_length = len(rtn[INDEX])
+ rtn_length = len(rtn[INDEX])
+ if include_symbol:
+ rtn['SYMBOL'] = [doc[SYMBOL], ] * rtn_length
+ columns.update(doc[COLUMNS].keys())
+ for c in columns:
+ try:
+ coldata = doc[COLUMNS][c]
+ dtype = np.dtype(coldata[DTYPE])
+ values = np.fromstring(lz4.decompress(str(coldata[DATA])), dtype=dtype)
+ self._set_or_promote_dtype(column_dtypes, c, dtype)
+ rtn[c] = self._empty(rtn_length, dtype=column_dtypes[c])
+ rowmask = np.unpackbits(np.fromstring(lz4.decompress(str(coldata[ROWMASK])),
+ dtype='uint8'))[:doc_length].astype('bool')
+ rtn[c][rowmask] = values
+ except KeyError:
+ rtn[c] = None
+
+ if include_images and doc.get(IMAGE_DOC, {}).get(IMAGE, {}):
+ rtn = self._prepend_image(rtn, doc[IMAGE_DOC])
+ return rtn
+
+ def _empty(self, length, dtype):
+ if dtype is not None and dtype == np.float64:
+ rtn = np.empty(length, dtype)
+ rtn[:] = np.nan
+ return rtn
+ else:
+ return np.empty(length, dtype=np.object_)
+
+ def stats(self):
+ """
+ Return storage statistics about the library
+
+ Returns
+ -------
+ dictionary of storage stats
+ """
+ res = {}
+ db = self._collection.database
+ conn = db.connection
+ res['sharding'] = {}
+ try:
+ sharding = conn.config.databases.find_one({'_id': db.name})
+ if sharding:
+ res['sharding'].update(sharding)
+ res['sharding']['collections'] = list(conn.config.collections.find(
+ {'_id': {'$regex': '^' + db.name + "\..*"}}))
+ except OperationFailure:
+ # Access denied
+ pass
+ res['dbstats'] = db.command('dbstats')
+ res['chunks'] = db.command('collstats', self._collection.name)
+ res['totals'] = {'count': res['chunks']['count'],
+ 'size': res['chunks']['size'],
+ }
+ return res
+
+ def _assert_nonoverlapping_data(self, symbol, start, end):
+ #
+ # Imagine we're trying to insert a tick bucket like:
+ # |S------ New-B -------------- E|
+ # |---- 1 ----| |----- 2 -----| |----- 3 -----|
+ #
+ # S = New-B Start
+ # E = New-B End
+ # New-B overlaps with existing buckets 1,2,3
+ #
+ # All we need to do is find the bucket who's start is immediately before (E)
+ # If that document's end is > S, then we know it overlaps
+ # with this bucket.
+ doc = self._collection.find_one({SYMBOL: symbol,
+ START: {'$lt': end}
+ },
+ projection={START: 1,
+ END: 1,
+ '_id': 0},
+ sort=[(START, pymongo.DESCENDING)])
+ if doc:
+ if not doc[END].tzinfo:
+ doc[END] = doc[END].replace(tzinfo=mktz('UTC'))
+ if doc[END] > start:
+ raise OverlappingDataException("Document already exists with start:{} end:{} in the range of our start:{} end:{}".format(
+ doc[START], doc[END], start, end))
+
+ def write(self, symbol, data):
+ """
+ Writes a list of market data events.
+
+ Parameters
+ ----------
+ symbol : `str`
+ symbol name for the item
+ data : list of dicts
+ List of ticks to store to the tick-store.
+ """
+ pandas = False
+ # Check for overlapping data
+ if isinstance(data, list):
+ start = data[0]['index']
+ end = data[-1]['index']
+ elif isinstance(data, pd.DataFrame):
+ start = data.index[0].to_datetime()
+ end = data.index[-1].to_datetime()
+ pandas = True
+ else:
+ raise UnhandledDtypeException("Can't persist type %s to tickstore" % type(data))
+ self._assert_nonoverlapping_data(symbol, self._to_dt(start), self._to_dt(end))
+
+ if pandas:
+ buckets = self._pandas_to_buckets(data, symbol)
+ else:
+ buckets = self._to_buckets(data, symbol)
+ self._write(buckets)
+
+ def _write(self, buckets):
+ start = dt.now()
+ mongo_retry(self._collection.insert_many)(buckets)
+ t = (dt.now() - start).total_seconds()
+ ticks = len(buckets) * self.chunk_size
+ print "%d buckets in %s: approx %s ticks/sec" % (len(buckets), t, int(ticks / t))
+
+ def _pandas_to_buckets(self, x, symbol):
+ rtn = []
+ for i in range(0, len(x), self.chunk_size):
+ rtn.append(self._pandas_to_bucket(x[i:i + self.chunk_size], symbol))
+ return rtn
+
+ def _to_buckets(self, x, symbol):
+ rtn = []
+ for i in range(0, len(x), self.chunk_size):
+ rtn.append(self._to_bucket(x[i:i + self.chunk_size], symbol))
+ return rtn
+
+ def _to_ms(self, date):
+ if isinstance(date, dt):
+ logger.warn('WARNING: treating naive datetime as London in write path')
+ return datetime_to_ms(date)
+ return date
+
+ def _to_dt(self, date, default_tz=None):
+ if isinstance(date, (int, long)):
+ return ms_to_datetime(date, mktz('UTC'))
+ elif date.tzinfo is None:
+ if default_tz is None:
+ raise ValueError("Must specify a TimeZone on incoming data")
+ # Treat naive datetimes as London
+ return date.replace(tzinfo=mktz())
+ return date
+
+ def _str_dtype(self, dtype):
+ """
+ Represent dtypes without byte order, as earlier Java tickstore code doesn't support explicit byte order.
+ """
+ assert dtype.byteorder != '>'
+ if (dtype.kind) == 'i':
+ assert dtype.itemsize == 8
+ return 'int64'
+ elif (dtype.kind) == 'f':
+ assert dtype.itemsize == 8
+ return 'float64'
+ elif (dtype.kind) == 'U':
+ return 'U%d' % (dtype.itemsize / 4)
+ else:
+ raise UnhandledDtypeException("Bad dtype '%s'" % dtype)
+
+
+ def _ensure_supported_dtypes(self, array):
+ # We only support these types for now, as we need to read them in Java
+ if (array.dtype.kind) == 'i':
+ array = array.astype(' 1 or (len(library_metadata) == 1 and library_metadata[0] != library_name):
+ raise OverlappingDataException("""There are libraries that overlap with the date range:
+library: {}
+overlapping libraries: {}""".format(library_name, [l.library for l in library_metadata]))
+ self._collection.update_one({'library_name': library_name},
+ {'$set': {'start': start, 'end': end}}, upsert=True)
+
+ def read(self, symbol, date_range, columns=['BID', 'ASK', 'TRDPRC_1', 'BIDSIZE', 'ASKSIZE', 'TRDVOL_1'], **kwargs):
+ libraries = self._get_libraries(date_range)
+ dfs = [l.library.read(symbol, l.date_range.intersection(date_range), columns) for l in libraries]
+ return pd.concat(dfs)
+
+ def write(self, symbol, data):
+ # get the full set of date ranges that we have
+ cursor = self._collection.find()
+ for res in cursor:
+ library = self._arctic_lib.arctic[res['library_name']]
+ dslice = self._slice(data, res['start'], res['end'])
+ if len(dslice) != 0:
+ library.write(symbol, dslice)
+
+ def list_symbols(self, date_range):
+ libraries = self._get_libraries(date_range)
+ return sorted(list(set(itertools.chain(*[l.library.list_symbols() for l in libraries]))))
+
+ def get_name(self):
+ name = self._arctic_lib.get_name()
+ if name.startswith(self._arctic_lib.DB_PREFIX + '_'):
+ name = name[len(self._arctic_lib.DB_PREFIX) + 1:]
+ return name
+
+ def _get_libraries(self, date_range):
+ libraries = self._get_library_metadata(date_range)
+
+ rtn = [TickStoreLibrary(self._arctic_lib.arctic[library.library], library.date_range)
+ for library in libraries]
+ current_start = rtn[-1].date_range.end if rtn else dt(1970, 1, 1, 0, 0) # epoch
+ if date_range.end is None or current_start < date_range.end:
+ name = self.get_name()
+ db_name, tick_type = name.split('.', 1)
+ current_lib = "{}_current.{}".format(db_name, tick_type)
+ try:
+ rtn.append(TickStoreLibrary(self._arctic_lib.arctic[current_lib],
+ DateRange(current_start, None, OPEN_OPEN)))
+ except LibraryNotFoundException:
+ pass # No '_current', move on.
+
+ if not rtn:
+ raise NoDataFoundException("No underlying libraries exist for the given date range")
+ return rtn
+
+ def _slice(self, data, start, end):
+ if isinstance(data, list):
+ dictlist = DictList(data, 'index')
+ slice_start = bisect.bisect_left(dictlist, start)
+ slice_end = bisect.bisect_right(dictlist, end)
+ return data[slice_start:slice_end]
+ elif isinstance(data, pd.DataFrame):
+ return data[start:end]
+ else:
+ raise UnhandledDtypeException("Can't persist type %s to tickstore" % type(data))
+
+ def _get_library_metadata(self, date_range):
+ """
+ Retrieve the libraries for the given date range, the assumption is that the date ranges do not overlap and
+ they are CLOSED_CLOSED.
+
+ At the moment the date range is mandatory
+ """
+ if date_range is None:
+ raise Exception("A date range must be provided")
+ if not (date_range.start and date_range.end):
+ raise Exception("The date range {0} must contain a start and end date".format(date_range))
+
+ start = date_range.start if date_range.start.tzinfo is not None else date_range.start.replace(tzinfo=mktz())
+ end = date_range.end if date_range.end.tzinfo is not None else date_range.end.replace(tzinfo=mktz())
+ query = {'$or': [{'start': {'$lte': start}, 'end': {'$gte': start}},
+ {'start': {'$gte': start}, 'end': {'$lte': end}},
+ {'start': {'$lte': end}, 'end': {'$gte': end}}]}
+ return [TickStoreLibrary(res['library_name'], DateRange(res['start'], res['end'], CLOSED_CLOSED))
+ for res in self._collection.find(query,
+ projection={'library_name': 1,
+ 'start': 1, 'end': 1},
+ sort=[('start', pymongo.ASCENDING)])]
diff --git a/docs/conf.py b/docs/conf.py
new file mode 100644
index 000000000..a15128ec8
--- /dev/null
+++ b/docs/conf.py
@@ -0,0 +1 @@
+from ahl.pkgutils.sphinx.conf import *
diff --git a/docs/index.rst b/docs/index.rst
new file mode 100644
index 000000000..33404c6ff
--- /dev/null
+++ b/docs/index.rst
@@ -0,0 +1,19 @@
+.. arctic documentation master file
+
+arctic
+===============================
+
+.. toctree::
+ :maxdepth: 4
+
+ autodoc/arctic
+
+.. automodule:: arctic
+
+
+Indices and tables
+==================
+
+* :ref:`genindex`
+* :ref:`modindex`
+* :ref:`search`
diff --git a/howtos/how_to_custom_arctic_library.py b/howtos/how_to_custom_arctic_library.py
new file mode 100644
index 000000000..c237c7296
--- /dev/null
+++ b/howtos/how_to_custom_arctic_library.py
@@ -0,0 +1,163 @@
+from datetime import datetime as dt
+from bson.binary import Binary
+import cPickle
+
+from arctic import Arctic, register_library_type
+from arctic.decorators import mongo_retry
+
+
+#
+# Arctic maps a library, e.g. 'jblackburn.stuff' to a class instance
+# which implements whatever API you like.
+#
+# Arctic provides a standard switching layer for:
+# - Registering custom storage types (e.g. CustomArcticLibType)
+# - Mapping data libraries to a storage type (e.g. 'jblackburn.stuff' -> CustomArcticLibType)
+# - Handling Authentication
+# - Maintaining per-library metadata
+# - Quota
+#
+
+
+class Stuff(object):
+ """
+ Some custom class persisted by our CustomArcticLibType Library Type
+ """
+ def __init__(self, field1, date_field, stuff):
+ # Some string field
+ self.field1 = field1
+ # Some date field
+ self.date_field = date_field
+ # Arbitrary other stuff
+ self.stuff = stuff
+
+
+class CustomArcticLibType(object):
+ """
+ Custom Arctic Library for storing 'Stuff' items
+ """
+
+ # Choose a library type name that's unique; e.g. .DataType
+ _LIBRARY_TYPE = 'test.CustomArcticLibType'
+
+ def __init__(self, arctic_lib):
+ self._arctic_lib = arctic_lib
+
+ # Arctic_lib gives you a root pymongo.Collection just-for-you:
+ # You may store all your data in here ...
+ self._collection = arctic_lib.get_top_level_collection()
+ # ... or you can create 'sub-collections', e.g.
+ self._sub_collection = self._collection.sub_collection
+
+ # The name of this library
+ print "My name is %s" % arctic_lib.get_name()
+
+ # Fetch some per-library metadata for this library
+ self.some_metadata = arctic_lib.get_library_metadata('some_metadata')
+
+ @classmethod
+ def initialize_library(cls, arctic_lib, **kwargs):
+ # Persist some per-library metadata in this arctic_lib
+ arctic_lib.set_library_metadata('some_metadata', 'some_value')
+ CustomArcticLibType(arctic_lib)._ensure_index()
+
+ def _ensure_index(self):
+ """
+ Index any fields used by your queries.
+ """
+ collection = self._collection
+ # collection.add_indexes
+ collection.create_index('field1')
+
+ ###########################################
+ # Create your own API below!
+ ###########################################
+
+ @mongo_retry
+ def query(self, *args, **kwargs):
+ """
+ Generic query method.
+
+ In reality, your storage class would have its own query methods,
+
+ Performs a Mongo find on the Marketdata index metadata collection.
+ See:
+ http://api.mongodb.org/python/current/api/pymongo/collection.html
+ """
+ for x in self._collection.find(*args, **kwargs):
+ x['stuff'] = cPickle.loads(x['stuff'])
+ del x['_id'] # Remove default unique '_id' field from doc
+ yield Stuff(**x)
+
+ @mongo_retry
+ def stats(self):
+ """
+ Database usage statistics. Used by quota.
+ """
+ res = {}
+ db = self._collection.database
+ res['dbstats'] = db.command('dbstats')
+ res['data'] = db.command('collstats', self._collection.name)
+ res['totals'] = {'count': res['data']['count'],
+ 'size': res['data']['size']
+ }
+ return res
+
+ @mongo_retry
+ def store(self, thing):
+ """
+ Simple persistence method
+ """
+ to_store = {'field1': thing.field1,
+ 'date_field': thing.date_field,
+ }
+ to_store['stuff'] = Binary(cPickle.dumps(thing.stuff))
+ # Respect any soft-quota on write - raises if stats().totals.size > quota
+ self._arctic_lib.check_quota()
+ self._collection.insert_one(to_store)
+
+ @mongo_retry
+ def delete(self, query):
+ """
+ Simple delete method
+ """
+ self._collection.delete_one(query)
+
+
+# Hook the class in for the type string 'CustomArcticLibType'
+register_library_type(CustomArcticLibType._LIBRARY_TYPE, CustomArcticLibType)
+
+# Create a Arctic instance pointed at a mongo host
+store = Arctic(mongo_host)
+
+### Initialize the library
+# Map username.custom_lib -> CustomArcticLibType
+store.initialize_library('username.custom_lib', CustomArcticLibType._LIBRARY_TYPE)
+
+# Now pull our username.custom_lib ; note that it has the:
+# - query(...)
+# - store(...)
+# - delete(...)
+# API we defined above
+lib = store['username.custom_lib']
+
+
+# Store some items in the custom library type
+lib.store(Stuff('thing', dt(2012, 1, 1), object()))
+lib.store(Stuff('thing2', dt(2013, 1, 1), object()))
+lib.store(Stuff('thing3', dt(2014, 1, 1), object()))
+lib.store(Stuff(['a', 'b', 'c'], dt(2014, 1, 1), object()))
+
+
+# Do some querying via our library's query method.
+# You would have your own methods for querying here... (which use your index(es), of course)
+list(lib.query()) # Get everything
+list(lib.query({'field1': 'thing'})) # just get by name
+list(lib.query({'field1': 'a'})) # Can query lists
+list(lib.query({'field1': 'b'}))
+list(lib.query({'date_field': {'$lt': dt(2013, 2, 2)}}))
+list(lib.query({'field1':'thing',
+ 'date_field': {'$lt': dt(2013, 2, 2)} }))
+
+# Remove everything
+lib.delete({})
diff --git a/howtos/how_to_use_arctic.py b/howtos/how_to_use_arctic.py
new file mode 100644
index 000000000..3ee2d65c5
--- /dev/null
+++ b/howtos/how_to_use_arctic.py
@@ -0,0 +1,65 @@
+#
+# Arctic Key-Value store
+#
+
+from arctic import Arctic
+from datetime import datetime as dt
+import pandas as pd
+
+
+# Connect to the mongo-host / cluster
+store = Arctic(mongo_host)
+
+# Data is grouped into 'libraries'.
+# Users may have one or more named libraries:
+store.list_libraries()
+
+# Create a library
+store.initialize_library('username.scratch')
+
+# Get a library
+# library = m['username.']
+library = store['username.scratch']
+
+# Store some data in the library
+df = pd.DataFrame({'prices': [1, 2, 3]},
+ [dt(2014, 1, 1), dt(2014, 1, 2), dt(2014, 1, 3)])
+library.write('SYMBOL', df)
+
+# Read some data from the library
+# (Note the returned object has an associated version number and metadata.)
+library.read('SYMBOL')
+
+# Store some data into the library
+library.write('MY_DATA', library.read('SYMBOL').data)
+
+# What symbols (keys) are stored in the library
+library.list_symbols()
+
+# Delete the data item
+library.delete('MY_DATA')
+
+
+# Other library functionality
+
+# Store 'metadata' alongside a data item
+library.write('MY_DATA', library.read('SYMBOL').data, metadata={'some_key': 'some_value'})
+
+# Query avaialable symbols based on metadata
+library.list_symbols(some_key='some_value')
+
+# Find available versions of a symbol
+list(library.list_versions('SYMBOL'))
+
+# Snapshot a library
+# (Point-in-time named reference for all symbols in a library.)
+library.snapshot('snapshot_name')
+library.list_snapshots()
+
+# Get an old version of a symbol
+library.read('SYMBOL', as_of=1)
+# Geta version given a snapshot name
+library.read('SYMBOL', as_of='snapshot_name')
+
+# Delete a snapshot
+library.delete_snapshot('snapshot_name')
diff --git a/setup.py b/setup.py
new file mode 100644
index 000000000..f6f7961d4
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,123 @@
+#
+# Copyright (C) 2015 Man AHL
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
+# USA
+
+import os
+from setuptools import setup, Extension
+from setuptools.command.test import test as TestCommand
+
+
+# Utility function to read the README file.
+def read(fname):
+ return open(os.path.join(os.path.dirname(__file__), fname)).read()
+
+
+class PyTest(TestCommand):
+ user_options = [('pytest-args=', 'a', "Arguments to pass to py.test")]
+
+ def initialize_options(self):
+ TestCommand.initialize_options(self)
+ self.pytest_args = []
+
+ def finalize_options(self):
+ TestCommand.finalize_options(self)
+ self.test_args = []
+ self.test_suite = True
+
+ def run_tests(self):
+ # import here, cause outside the eggs aren't loaded
+ import pytest
+ args = [self.pytest_args] if isinstance(self.pytest_args, basestring) else list(self.pytest_args)
+ args.extend(['--cov', 'arctic',
+ '--cov-report', 'xml',
+ '--cov-report', 'html',
+ '--junitxml', 'junit.xml'
+ ])
+ errno = pytest.main(args)
+ sys.exit(errno)
+
+
+# setuptools_cython: setuptools DWIM monkey-patch madness
+# http://mail.python.org/pipermail/distutils-sig/2007-September/thread.html#8204
+import sys
+if 'setuptools.extension' in sys.modules:
+ m = sys.modules['setuptools.extension']
+ m.Extension.__dict__ = m._Extension.__dict__
+
+# Cython lz4
+compress = Extension('arctic._compress',
+ sources=["src/_compress.pyx", "src/lz4.c", "src/lz4hc.c"],
+ extra_compile_args=['-fopenmp'],
+ extra_link_args=['-fopenmp'])
+
+setup(
+ name="arctic",
+ version="1.0.0",
+ author="Man AHL Technology",
+ author_email="ManAHLTech@ahl.com",
+ description=("AHL Research Versioned TimeSeries and Tick store"),
+ license="GPL",
+ keywords=["ahl", "keyvalue", "tickstore", "mongo", "timeseries", ],
+ url="https://github.com/ahlmss/arctic",
+ packages=['arctic', 'tests'],
+ long_description="", # read('README'),
+ cmdclass={'test': PyTest},
+ ext_modules=[compress],
+ setup_requires=["setuptools_cython",
+ "Cython",
+ "numpy",
+ ],
+ install_requires=["decorator",
+ "enum34",
+ "lz4",
+ "mockextras",
+ "pandas",
+ "pymongo>=3.0",
+ "python-dateutil",
+ "pytz",
+ "tzlocal",
+ ],
+ tests_require=["mock",
+ "mockextras",
+ "pytest",
+ "pytest-cov",
+ "pytest-dbfixtures",
+ "pytest-timeout",
+ "pytest-xdist",
+ ],
+ entry_points={'console_scripts': [
+ 'arctic_init_library = arctic.scripts.arctic_init_library:main',
+ 'arctic_list_libraries = arctic.scripts.arctic_list_libraries:main',
+ 'arctic_delete_library = arctic.scripts.arctic_delete_library:main',
+ 'arctic_enable_sharding = arctic.scripts.arctic_enable_sharding:main',
+ 'arctic_copy_data = arctic.scripts.arctic_copy_data:main',
+ 'arctic_create_user = arctic.scripts.arctic_create_user:main',
+ 'arctic_prune_versions = arctic.scripts.arctic_prune_versions:main',
+ 'arctic_fsck = arctic.scripts.arctic_fsck:main',
+ ]
+ },
+ classifiers=[
+ "Development Status :: 4 - Beta",
+ "License :: OSI Approved :: GNU Library or Lesser General Public License (LGPL)",
+ "Programming Language :: Python :: 2.7",
+ "Programming Language :: Python :: Implementation :: CPython",
+ "Programming Language :: Cython",
+ "Topic :: Database",
+ "Topic :: Database :: Front-Ends",
+ "Topic :: Software Development :: Libraries",
+ ],
+)
diff --git a/src/_compress.pyx b/src/_compress.pyx
new file mode 100644
index 000000000..c7e517c0a
--- /dev/null
+++ b/src/_compress.pyx
@@ -0,0 +1,246 @@
+# cython: profile=True
+
+#
+# LZ4 code was copied from: https://github.com/steeve/python-lz4/ r8ac9cf9df8fb8d51f40a3065fa538f8df1c8a62a 22/4/2015 [tt]
+#
+
+cdef extern from "lz4.h":
+ cdef int LZ4_compress(char* source, char* dest, int inputSize) nogil
+ cdef int LZ4_compressBound(int isize) nogil
+ cdef int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxOutputSize) nogil
+
+cdef extern from "lz4hc.h":
+ cdef int LZ4_compressHC(char* source, char* dest, int inputSize) nogil
+
+cimport cython
+cimport cpython
+cimport libc.stdio
+cimport openmp
+
+from libc.stdlib cimport malloc, free, realloc
+from libc.stdint cimport uint8_t, uint32_t
+from libc.stdio cimport printf
+from cpython.string cimport PyString_AsString
+from cython.view cimport array as cvarray
+from cython.parallel import prange
+from cython.parallel import threadid
+from cython.parallel cimport parallel
+
+cdef void store_le32(char *c, uint32_t x) nogil:
+ c[0] = x & 0xff
+ c[1] = (x >> 8) & 0xff
+ c[2] = (x >> 16) & 0xff
+ c[3] = (x >> 24) & 0xff
+
+cdef uint32_t load_le32(char *c) nogil:
+ cdef uint8_t *d = c
+ return d[0] | (d[1] << 8) | (d[2] << 16) | (d[3] << 24)
+
+
+cdef int hdr_size = sizeof(uint32_t)
+
+cdef char ** to_cstring_array(list_str):
+ """ Convert a python string list to a **char
+ Note: Performs a malloc. You must free the array once created.
+ """
+ cdef char **ret = malloc(len(list_str) * sizeof(char *))
+ for i in xrange(len(list_str)):
+ ret[i] = PyString_AsString(list_str[i])
+ return ret
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.nonecheck(False)
+@cython.cdivision(True)
+def compress(pString):
+ return _compress(pString, LZ4_compress)
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.nonecheck(False)
+@cython.cdivision(True)
+def compressHC(pString):
+ return _compress(pString, LZ4_compressHC)
+
+
+cdef _compress(pString, int (*Fnptr_LZ4_compress)(char *, char *, int)):
+ # sizes
+ cdef uint32_t compressed_size
+ cdef uint32_t original_size = len(pString)
+
+ # buffers
+ cdef char *cString = pString
+ cdef char *result # destination buffer
+ cdef bytes pyResult # python wrapped result
+
+ # calc. estaimted compresed size
+ compressed_size = LZ4_compressBound(original_size)
+ # alloc memory
+ result = malloc(compressed_size + hdr_size)
+ # store original size
+ store_le32(result, original_size);
+ # compress & update size
+ compressed_size = Fnptr_LZ4_compress(cString, result + hdr_size, original_size)
+ # cast back into a python sstring
+ pyResult = result[:compressed_size + hdr_size]
+
+ free(result)
+
+ return pyResult
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.nonecheck(False)
+@cython.cdivision(True)
+def decompress(pString):
+
+ # sizes
+ cdef uint32_t compressed_size = len(pString)
+ cdef uint32_t original_size
+
+ # buffers
+ cdef char *cString # *char pStr
+ cdef char *result # destination buffer
+ cdef bytes pyResult # python wrapped result
+
+ # convert to char*
+ cString = pString
+ # find original size
+ original_size = load_le32(cString)
+ # malloc
+ result = malloc(original_size)
+ # decompress
+ LZ4_decompress_safe(cString + hdr_size, result, compressed_size - hdr_size, original_size)
+ # cast back into python string
+ pyResult = result[:original_size]
+
+ free(result)
+ return pyResult
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.nonecheck(False)
+@cython.cdivision(True)
+def compressarr(pStrList):
+ return _compressarr(pStrList, LZ4_compress)
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.nonecheck(False)
+@cython.cdivision(True)
+def compressarrHC(pStrList):
+ return _compressarr(pStrList, LZ4_compressHC)
+
+
+cdef _compressarr(pStrList, int (*Fnptr_LZ4_compress)(char *, char *, int) nogil):
+
+ if len(pStrList) == 0:
+ return []
+
+ cdef char **cStrList = to_cstring_array(pStrList)
+ cdef Py_ssize_t n = len(pStrList)
+
+ # loop parameters
+ cdef char *cString
+ cdef int original_size
+ cdef uint32_t compressed_size
+ cdef char *result
+ cdef Py_ssize_t i
+
+ # output parameters
+ cdef char **cResult = malloc(n * sizeof(char *))
+ cdef int[:] lengths = cvarray(shape=(n,), itemsize=sizeof(int), format="i")
+ cdef int[:] orilengths = cvarray(shape=(n,), itemsize=sizeof(int), format="i")
+ cdef bytes pyResult
+
+ # store original string lengths
+ for i in range(n):
+ orilengths[i] = len(pStrList[i])
+
+ with nogil, parallel():
+ for i in prange(n, schedule='static'):
+ cString = cStrList[i]
+ original_size = orilengths[i]
+ # calc. estaimted compresed size
+ compressed_size = LZ4_compressBound(original_size)
+ # alloc memory
+ result = malloc(compressed_size + hdr_size)
+ # store original size
+ store_le32(result, original_size)
+ # compress & update size
+ compressed_size = Fnptr_LZ4_compress(cString, result + hdr_size, original_size)
+ # assign to result
+ lengths[i] = compressed_size + hdr_size
+ cResult[i] = result
+
+ # cast back to python
+ result_list = []
+ for i in range(n):
+ pyResult = cResult[i][:lengths[i]]
+ free(cResult[i])
+ result_list.append(pyResult)
+
+ free(cResult)
+ free(cStrList)
+
+ return result_list
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.nonecheck(False)
+@cython.cdivision(True)
+def decompressarr(pStrList):
+
+ if len(pStrList) == 0:
+ return []
+
+ cdef char **cStrList = to_cstring_array(pStrList)
+ cdef Py_ssize_t n = len(pStrList)
+
+ # loop parameters
+ cdef char *cString
+ cdef uint32_t original_size
+ cdef uint32_t compressed_size
+ cdef char *result
+ cdef Py_ssize_t i
+
+ # output parameters
+ cdef char **cResult = malloc(n * sizeof(char *))
+ cdef int[:] clengths = cvarray(shape=(n,), itemsize=sizeof(int), format="i")
+ cdef int[:] lengths = cvarray(shape=(n,), itemsize=sizeof(int), format="i")
+ cdef bytes pyResult
+
+ for i in range(n):
+ clengths[i] = len(pStrList[i])
+
+ with nogil, parallel():
+ for i in prange(n, schedule='static'):
+ cString = cStrList[i]
+ # get compressed size
+ compressed_size = clengths[i]
+ # find original size
+ original_size = load_le32(cString)
+ # malloc
+ result = malloc(original_size)
+ # decompress
+ LZ4_decompress_safe(cString + hdr_size, result, compressed_size - hdr_size, original_size)
+ # assign to result
+ cResult[i] = result
+ lengths[i] = original_size
+
+ # cast back to python
+ result_list = []
+ for i in range(n):
+ pyResult = cResult[i][:lengths[i]]
+ free(cResult[i])
+ result_list.append(pyResult)
+
+ free(cResult)
+ free(cStrList)
+
+ return result_list
diff --git a/src/lz4.c b/src/lz4.c
new file mode 100644
index 000000000..b900f7a09
--- /dev/null
+++ b/src/lz4.c
@@ -0,0 +1,1247 @@
+/*
+ LZ4 - Fast LZ compression algorithm
+ Copyright (C) 2011-2014, Yann Collet.
+ BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following disclaimer
+ in the documentation and/or other materials provided with the
+ distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ You can contact the author at :
+ - LZ4 source repository : http://code.google.com/p/lz4/
+ - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/**************************************
+ Tuning parameters
+**************************************/
+/*
+ * HEAPMODE :
+ * Select how default compression functions will allocate memory for their hash table,
+ * in memory stack (0:default, fastest), or in memory heap (1:requires memory allocation (malloc)).
+ */
+#define HEAPMODE 0
+
+
+/**************************************
+ CPU Feature Detection
+**************************************/
+/* 32 or 64 bits ? */
+#if (defined(__x86_64__) || defined(_M_X64) || defined(_WIN64) \
+ || defined(__powerpc64__) || defined(__powerpc64le__) \
+ || defined(__ppc64__) || defined(__ppc64le__) \
+ || defined(__PPC64__) || defined(__PPC64LE__) \
+ || defined(__ia64) || defined(__itanium__) || defined(_M_IA64) ) /* Detects 64 bits mode */
+# define LZ4_ARCH64 1
+#else
+# define LZ4_ARCH64 0
+#endif
+
+/*
+ * Little Endian or Big Endian ?
+ * Overwrite the #define below if you know your architecture endianess
+ */
+#include /* Apparently required to detect endianess */
+#if defined (__GLIBC__)
+# include
+# if (__BYTE_ORDER == __BIG_ENDIAN)
+# define LZ4_BIG_ENDIAN 1
+# endif
+#elif (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN)) && !(defined(__LITTLE_ENDIAN__) || defined(__LITTLE_ENDIAN) || defined(_LITTLE_ENDIAN))
+# define LZ4_BIG_ENDIAN 1
+#elif defined(__sparc) || defined(__sparc__) \
+ || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) \
+ || defined(__hpux) || defined(__hppa) \
+ || defined(_MIPSEB) || defined(__s390__)
+# define LZ4_BIG_ENDIAN 1
+#else
+/* Little Endian assumed. PDP Endian and other very rare endian format are unsupported. */
+#endif
+
+/*
+ * Unaligned memory access is automatically enabled for "common" CPU, such as x86.
+ * For others CPU, such as ARM, the compiler may be more cautious, inserting unnecessary extra code to ensure aligned access property
+ * If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance
+ */
+#if defined(__ARM_FEATURE_UNALIGNED)
+# define LZ4_FORCE_UNALIGNED_ACCESS 1
+#endif
+
+/* Define this parameter if your target system or compiler does not support hardware bit count */
+#if defined(_MSC_VER) && defined(_WIN32_WCE) /* Visual Studio for Windows CE does not support Hardware bit count */
+# define LZ4_FORCE_SW_BITCOUNT
+#endif
+
+/*
+ * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE :
+ * This option may provide a small boost to performance for some big endian cpu, although probably modest.
+ * You may set this option to 1 if data will remain within closed environment.
+ * This option is useless on Little_Endian CPU (such as x86)
+ */
+
+/* #define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */
+
+
+/**************************************
+ Compiler Options
+**************************************/
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */
+/* "restrict" is a known keyword */
+#else
+# define restrict /* Disable restrict */
+#endif
+
+#ifdef _MSC_VER /* Visual Studio */
+# define FORCE_INLINE static __forceinline
+# include /* For Visual 2005 */
+# if LZ4_ARCH64 /* 64-bits */
+# pragma intrinsic(_BitScanForward64) /* For Visual 2005 */
+# pragma intrinsic(_BitScanReverse64) /* For Visual 2005 */
+# else /* 32-bits */
+# pragma intrinsic(_BitScanForward) /* For Visual 2005 */
+# pragma intrinsic(_BitScanReverse) /* For Visual 2005 */
+# endif
+# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
+#else
+# ifdef __GNUC__
+# define FORCE_INLINE static inline __attribute__((always_inline))
+# else
+# define FORCE_INLINE static inline
+# endif
+#endif
+
+#ifdef _MSC_VER /* Visual Studio */
+# define lz4_bswap16(x) _byteswap_ushort(x)
+#else
+# define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8)))
+#endif
+
+#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+
+#if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
+# define expect(expr,value) (__builtin_expect ((expr),(value)) )
+#else
+# define expect(expr,value) (expr)
+#endif
+
+#define likely(expr) expect((expr) != 0, 1)
+#define unlikely(expr) expect((expr) != 0, 0)
+
+
+/**************************************
+ Memory routines
+**************************************/
+#include /* malloc, calloc, free */
+#define ALLOCATOR(n,s) calloc(n,s)
+#define FREEMEM free
+#include /* memset, memcpy */
+#define MEM_INIT memset
+
+
+/**************************************
+ Includes
+**************************************/
+#include "lz4.h"
+
+
+/**************************************
+ Basic Types
+**************************************/
+#if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */
+# include
+ typedef uint8_t BYTE;
+ typedef uint16_t U16;
+ typedef uint32_t U32;
+ typedef int32_t S32;
+ typedef uint64_t U64;
+#else
+ typedef unsigned char BYTE;
+ typedef unsigned short U16;
+ typedef unsigned int U32;
+ typedef signed int S32;
+ typedef unsigned long long U64;
+#endif
+
+#if defined(__GNUC__) && !defined(LZ4_FORCE_UNALIGNED_ACCESS)
+# define _PACKED __attribute__ ((packed))
+#else
+# define _PACKED
+#endif
+
+#if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__)
+# if defined(__IBMC__) || defined(__SUNPRO_C) || defined(__SUNPRO_CC)
+# pragma pack(1)
+# else
+# pragma pack(push, 1)
+# endif
+#endif
+
+typedef struct { U16 v; } _PACKED U16_S;
+typedef struct { U32 v; } _PACKED U32_S;
+typedef struct { U64 v; } _PACKED U64_S;
+typedef struct {size_t v;} _PACKED size_t_S;
+
+#if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__)
+# if defined(__SUNPRO_C) || defined(__SUNPRO_CC)
+# pragma pack(0)
+# else
+# pragma pack(pop)
+# endif
+#endif
+
+#define A16(x) (((U16_S *)(x))->v)
+#define A32(x) (((U32_S *)(x))->v)
+#define A64(x) (((U64_S *)(x))->v)
+#define AARCH(x) (((size_t_S *)(x))->v)
+
+
+/**************************************
+ Constants
+**************************************/
+#define LZ4_HASHLOG (LZ4_MEMORY_USAGE-2)
+#define HASHTABLESIZE (1 << LZ4_MEMORY_USAGE)
+#define HASH_SIZE_U32 (1 << LZ4_HASHLOG)
+
+#define MINMATCH 4
+
+#define COPYLENGTH 8
+#define LASTLITERALS 5
+#define MFLIMIT (COPYLENGTH+MINMATCH)
+static const int LZ4_minLength = (MFLIMIT+1);
+
+#define KB *(1U<<10)
+#define MB *(1U<<20)
+#define GB *(1U<<30)
+
+#define LZ4_64KLIMIT ((64 KB) + (MFLIMIT-1))
+#define SKIPSTRENGTH 6 /* Increasing this value will make the compression run slower on incompressible data */
+
+#define MAXD_LOG 16
+#define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
+
+#define ML_BITS 4
+#define ML_MASK ((1U<=e; */
+#else
+# define LZ4_WILDCOPY(d,s,e) { if (likely(e-d <= 8)) LZ4_COPY8(d,s) else do { LZ4_COPY8(d,s) } while (d>3);
+# elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
+ return (__builtin_clzll(val) >> 3);
+# else
+ int r;
+ if (!(val>>32)) { r=4; } else { r=0; val>>=32; }
+ if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
+ r += (!val);
+ return r;
+# endif
+# else
+# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
+ unsigned long r = 0;
+ _BitScanForward64( &r, val );
+ return (int)(r>>3);
+# elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
+ return (__builtin_ctzll(val) >> 3);
+# else
+ static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
+ return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
+# endif
+# endif
+}
+
+#else
+
+int LZ4_NbCommonBytes (register U32 val)
+{
+# if defined(LZ4_BIG_ENDIAN)
+# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
+ unsigned long r = 0;
+ _BitScanReverse( &r, val );
+ return (int)(r>>3);
+# elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
+ return (__builtin_clz(val) >> 3);
+# else
+ int r;
+ if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
+ r += (!val);
+ return r;
+# endif
+# else
+# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
+ unsigned long r;
+ _BitScanForward( &r, val );
+ return (int)(r>>3);
+# elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
+ return (__builtin_ctz(val) >> 3);
+# else
+ static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
+ return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
+# endif
+# endif
+}
+
+#endif
+
+
+/********************************
+ Compression functions
+********************************/
+int LZ4_compressBound(int isize) { return LZ4_COMPRESSBOUND(isize); }
+
+static int LZ4_hashSequence(U32 sequence, tableType_t tableType)
+{
+ if (tableType == byU16)
+ return (((sequence) * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1)));
+ else
+ return (((sequence) * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG));
+}
+
+static int LZ4_hashPosition(const BYTE* p, tableType_t tableType) { return LZ4_hashSequence(A32(p), tableType); }
+
+static void LZ4_putPositionOnHash(const BYTE* p, U32 h, void* tableBase, tableType_t tableType, const BYTE* srcBase)
+{
+ switch (tableType)
+ {
+ case byPtr: { const BYTE** hashTable = (const BYTE**) tableBase; hashTable[h] = p; break; }
+ case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = (U32)(p-srcBase); break; }
+ case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = (U16)(p-srcBase); break; }
+ }
+}
+
+static void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase)
+{
+ U32 h = LZ4_hashPosition(p, tableType);
+ LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase);
+}
+
+static const BYTE* LZ4_getPositionOnHash(U32 h, void* tableBase, tableType_t tableType, const BYTE* srcBase)
+{
+ if (tableType == byPtr) { const BYTE** hashTable = (const BYTE**) tableBase; return hashTable[h]; }
+ if (tableType == byU32) { U32* hashTable = (U32*) tableBase; return hashTable[h] + srcBase; }
+ { U16* hashTable = (U16*) tableBase; return hashTable[h] + srcBase; } /* default, to ensure a return */
+}
+
+static const BYTE* LZ4_getPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase)
+{
+ U32 h = LZ4_hashPosition(p, tableType);
+ return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase);
+}
+
+static unsigned LZ4_count(const BYTE* pIn, const BYTE* pRef, const BYTE* pInLimit)
+{
+ const BYTE* const pStart = pIn;
+
+ while (likely(pIndictSize;
+ const BYTE* const dictionary = dictPtr->dictionary;
+ const BYTE* const dictEnd = dictionary + dictPtr->dictSize;
+ const size_t dictDelta = dictEnd - (const BYTE*)source;
+ const BYTE* anchor = (const BYTE*) source;
+ const BYTE* const iend = ip + inputSize;
+ const BYTE* const mflimit = iend - MFLIMIT;
+ const BYTE* const matchlimit = iend - LASTLITERALS;
+
+ BYTE* op = (BYTE*) dest;
+ BYTE* const olimit = op + maxOutputSize;
+
+ const int skipStrength = SKIPSTRENGTH;
+ U32 forwardH;
+ size_t refDelta=0;
+
+ /* Init conditions */
+ if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) return 0; /* Unsupported input size, too large (or negative) */
+ switch(dict)
+ {
+ case noDict:
+ default:
+ base = (const BYTE*)source;
+ lowLimit = (const BYTE*)source;
+ break;
+ case withPrefix64k:
+ base = (const BYTE*)source - dictPtr->currentOffset;
+ lowLimit = (const BYTE*)source - dictPtr->dictSize;
+ break;
+ case usingExtDict:
+ base = (const BYTE*)source - dictPtr->currentOffset;
+ lowLimit = (const BYTE*)source;
+ break;
+ }
+ if ((tableType == byU16) && (inputSize>=(int)LZ4_64KLIMIT)) return 0; /* Size too large (not within 64K limit) */
+ if (inputSize> skipStrength;
+ //if (step>8) step=8; // required for valid forwardIp ; slows down uncompressible data a bit
+
+ if (unlikely(forwardIp > mflimit)) goto _last_literals;
+
+ ref = LZ4_getPositionOnHash(h, ctx, tableType, base);
+ if (dict==usingExtDict)
+ {
+ if (ref<(const BYTE*)source)
+ {
+ refDelta = dictDelta;
+ lowLimit = dictionary;
+ }
+ else
+ {
+ refDelta = 0;
+ lowLimit = (const BYTE*)source;
+ }
+ }
+ forwardH = LZ4_hashPosition(forwardIp, tableType);
+ LZ4_putPositionOnHash(ip, h, ctx, tableType, base);
+
+ } while ( ((dictIssue==dictSmall) ? (ref < lowRefLimit) : 0)
+ || ((tableType==byU16) ? 0 : (ref + MAX_DISTANCE < ip))
+ || (A32(ref+refDelta) != A32(ip)) );
+ }
+
+ /* Catch up */
+ while ((ip>anchor) && (ref+refDelta > lowLimit) && (unlikely(ip[-1]==ref[refDelta-1]))) { ip--; ref--; }
+
+ {
+ /* Encode Literal length */
+ unsigned litLength = (unsigned)(ip - anchor);
+ token = op++;
+ if ((outputLimited) && (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit)))
+ return 0; /* Check output limit */
+ if (litLength>=RUN_MASK)
+ {
+ int len = (int)litLength-RUN_MASK;
+ *token=(RUN_MASK<= 255 ; len-=255) *op++ = 255;
+ *op++ = (BYTE)len;
+ }
+ else *token = (BYTE)(litLength< matchlimit) limit = matchlimit;
+ matchLength = LZ4_count(ip+MINMATCH, ref+MINMATCH, limit);
+ ip += MINMATCH + matchLength;
+ if (ip==limit)
+ {
+ unsigned more = LZ4_count(ip, (const BYTE*)source, matchlimit);
+ matchLength += more;
+ ip += more;
+ }
+ }
+ else
+ {
+ matchLength = LZ4_count(ip+MINMATCH, ref+MINMATCH, matchlimit);
+ ip += MINMATCH + matchLength;
+ }
+
+ if (matchLength>=ML_MASK)
+ {
+ if ((outputLimited) && (unlikely(op + (1 + LASTLITERALS) + (matchLength>>8) > olimit)))
+ return 0; /* Check output limit */
+ *token += ML_MASK;
+ matchLength -= ML_MASK;
+ for (; matchLength >= 510 ; matchLength-=510) { *op++ = 255; *op++ = 255; }
+ if (matchLength >= 255) { matchLength-=255; *op++ = 255; }
+ *op++ = (BYTE)matchLength;
+ }
+ else *token += (BYTE)(matchLength);
+ }
+
+ anchor = ip;
+
+ /* Test end of chunk */
+ if (ip > mflimit) break;
+
+ /* Fill table */
+ LZ4_putPosition(ip-2, ctx, tableType, base);
+
+ /* Test next position */
+ ref = LZ4_getPosition(ip, ctx, tableType, base);
+ if (dict==usingExtDict)
+ {
+ if (ref<(const BYTE*)source)
+ {
+ refDelta = dictDelta;
+ lowLimit = dictionary;
+ }
+ else
+ {
+ refDelta = 0;
+ lowLimit = (const BYTE*)source;
+ }
+ }
+ LZ4_putPosition(ip, ctx, tableType, base);
+ if ( ((dictIssue==dictSmall) ? (ref>=lowRefLimit) : 1)
+ && (ref+MAX_DISTANCE>=ip)
+ && (A32(ref+refDelta)==A32(ip)) )
+ { token=op++; *token=0; goto _next_match; }
+
+ /* Prepare next loop */
+ forwardH = LZ4_hashPosition(++ip, tableType);
+ }
+
+_last_literals:
+ /* Encode Last Literals */
+ {
+ int lastRun = (int)(iend - anchor);
+ if ((outputLimited) && (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize))
+ return 0; /* Check output limit */
+ if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<= 255 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
+ else *op++ = (BYTE)(lastRun<= sizeof(LZ4_stream_t_internal)); /* A compilation error here means LZ4_STREAMSIZE is not large enough */
+ if (dict->initCheck) MEM_INIT(dict, 0, sizeof(LZ4_stream_t_internal)); /* Uninitialized structure detected */
+
+ if (dictSize < MINMATCH)
+ {
+ dict->dictionary = NULL;
+ dict->dictSize = 0;
+ return 1;
+ }
+
+ if (p <= dictEnd - 64 KB) p = dictEnd - 64 KB;
+ base = p - dict->currentOffset;
+ dict->dictionary = p;
+ dict->dictSize = (U32)(dictEnd - p);
+ dict->currentOffset += dict->dictSize;
+
+ while (p <= dictEnd-MINMATCH)
+ {
+ LZ4_putPosition(p, dict, byU32, base);
+ p+=3;
+ }
+
+ return 1;
+}
+
+
+void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, const BYTE* src)
+{
+ if ((LZ4_dict->currentOffset > 0x80000000) ||
+ ((size_t)LZ4_dict->currentOffset > (size_t)src)) /* address space overflow */
+ {
+ /* rescale hash table */
+ U32 delta = LZ4_dict->currentOffset - 64 KB;
+ const BYTE* dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize;
+ int i;
+ for (i=0; ihashTable[i] < delta) LZ4_dict->hashTable[i]=0;
+ else LZ4_dict->hashTable[i] -= delta;
+ }
+ LZ4_dict->currentOffset = 64 KB;
+ if (LZ4_dict->dictSize > 64 KB) LZ4_dict->dictSize = 64 KB;
+ LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize;
+ }
+}
+
+
+FORCE_INLINE int LZ4_compress_continue_generic (void* LZ4_stream, const char* source, char* dest, int inputSize,
+ int maxOutputSize, limitedOutput_directive limit)
+{
+ LZ4_stream_t_internal* streamPtr = (LZ4_stream_t_internal*)LZ4_stream;
+ const BYTE* const dictEnd = streamPtr->dictionary + streamPtr->dictSize;
+
+ const BYTE* smallest = (const BYTE*) source;
+ if (streamPtr->initCheck) return 0; /* Uninitialized structure detected */
+ if ((streamPtr->dictSize>0) && (smallest>dictEnd)) smallest = dictEnd;
+ LZ4_renormDictT(streamPtr, smallest);
+
+ /* Check overlapping input/dictionary space */
+ {
+ const BYTE* sourceEnd = (const BYTE*) source + inputSize;
+ if ((sourceEnd > streamPtr->dictionary) && (sourceEnd < dictEnd))
+ {
+ streamPtr->dictSize = (U32)(dictEnd - sourceEnd);
+ if (streamPtr->dictSize > 64 KB) streamPtr->dictSize = 64 KB;
+ if (streamPtr->dictSize < 4) streamPtr->dictSize = 0;
+ streamPtr->dictionary = dictEnd - streamPtr->dictSize;
+ }
+ }
+
+ /* prefix mode : source data follows dictionary */
+ if (dictEnd == (const BYTE*)source)
+ {
+ int result;
+ if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset))
+ result = LZ4_compress_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limit, byU32, withPrefix64k, dictSmall);
+ else
+ result = LZ4_compress_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limit, byU32, withPrefix64k, noDictIssue);
+ streamPtr->dictSize += (U32)inputSize;
+ streamPtr->currentOffset += (U32)inputSize;
+ return result;
+ }
+
+ /* external dictionary mode */
+ {
+ int result;
+ if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset))
+ result = LZ4_compress_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limit, byU32, usingExtDict, dictSmall);
+ else
+ result = LZ4_compress_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limit, byU32, usingExtDict, noDictIssue);
+ streamPtr->dictionary = (const BYTE*)source;
+ streamPtr->dictSize = (U32)inputSize;
+ streamPtr->currentOffset += (U32)inputSize;
+ return result;
+ }
+}
+
+
+int LZ4_compress_continue (void* LZ4_stream, const char* source, char* dest, int inputSize)
+{
+ return LZ4_compress_continue_generic(LZ4_stream, source, dest, inputSize, 0, notLimited);
+}
+
+int LZ4_compress_limitedOutput_continue (void* LZ4_stream, const char* source, char* dest, int inputSize, int maxOutputSize)
+{
+ return LZ4_compress_continue_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limitedOutput);
+}
+
+
+// Hidden debug function, to force separate dictionary mode
+int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int inputSize)
+{
+ LZ4_stream_t_internal* streamPtr = (LZ4_stream_t_internal*)LZ4_dict;
+ int result;
+ const BYTE* const dictEnd = streamPtr->dictionary + streamPtr->dictSize;
+
+ const BYTE* smallest = dictEnd;
+ if (smallest > (const BYTE*) source) smallest = (const BYTE*) source;
+ LZ4_renormDictT((LZ4_stream_t_internal*)LZ4_dict, smallest);
+
+ result = LZ4_compress_generic(LZ4_dict, source, dest, inputSize, 0, notLimited, byU32, usingExtDict, noDictIssue);
+
+ streamPtr->dictionary = (const BYTE*)source;
+ streamPtr->dictSize = (U32)inputSize;
+ streamPtr->currentOffset += (U32)inputSize;
+
+ return result;
+}
+
+
+int LZ4_saveDict (void* LZ4_dict, char* safeBuffer, int dictSize)
+{
+ LZ4_stream_t_internal* dict = (LZ4_stream_t_internal*) LZ4_dict;
+ const BYTE* previousDictEnd = dict->dictionary + dict->dictSize;
+
+ if ((U32)dictSize > 64 KB) dictSize = 64 KB; /* useless to define a dictionary > 64 KB */
+ if ((U32)dictSize > dict->dictSize) dictSize = dict->dictSize;
+
+ memcpy(safeBuffer, previousDictEnd - dictSize, dictSize);
+
+ dict->dictionary = (const BYTE*)safeBuffer;
+ dict->dictSize = (U32)dictSize;
+
+ return 1;
+}
+
+
+
+/****************************
+ Decompression functions
+****************************/
+/*
+ * This generic decompression function cover all use cases.
+ * It shall be instanciated several times, using different sets of directives
+ * Note that it is essential this generic function is really inlined,
+ * in order to remove useless branches during compilation optimisation.
+ */
+FORCE_INLINE int LZ4_decompress_generic(
+ const char* source,
+ char* dest,
+ int inputSize,
+ int outputSize, /* If endOnInput==endOnInputSize, this value is the max size of Output Buffer. */
+
+ int endOnInput, /* endOnOutputSize, endOnInputSize */
+ int partialDecoding, /* full, partial */
+ int targetOutputSize, /* only used if partialDecoding==partial */
+ int dict, /* noDict, withPrefix64k, usingExtDict */
+ const char* dictStart, /* only if dict==usingExtDict */
+ int dictSize /* note : = 0 if noDict */
+ )
+{
+ /* Local Variables */
+ const BYTE* restrict ip = (const BYTE*) source;
+ const BYTE* ref;
+ const BYTE* const iend = ip + inputSize;
+
+ BYTE* op = (BYTE*) dest;
+ BYTE* const oend = op + outputSize;
+ BYTE* cpy;
+ BYTE* oexit = op + targetOutputSize;
+ const BYTE* const lowLimit = (const BYTE*)dest - dictSize;
+
+ const BYTE* const dictEnd = (const BYTE*)dictStart + dictSize;
+//#define OLD
+#ifdef OLD
+ const size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; /* static reduces speed for LZ4_decompress_safe() on GCC64 */
+#else
+ const size_t dec32table[] = {4-0, 4-3, 4-2, 4-3, 4-0, 4-0, 4-0, 4-0}; /* static reduces speed for LZ4_decompress_safe() on GCC64 */
+#endif
+ static const size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
+
+ const int checkOffset = (endOnInput) && (dictSize < (int)(64 KB));
+
+
+ /* Special cases */
+ if ((partialDecoding) && (oexit> oend-MFLIMIT)) oexit = oend-MFLIMIT; /* targetOutputSize too high => decode everything */
+ if ((endOnInput) && (unlikely(outputSize==0))) return ((inputSize==1) && (*ip==0)) ? 0 : -1; /* Empty output buffer */
+ if ((!endOnInput) && (unlikely(outputSize==0))) return (*ip==0?1:-1);
+
+
+ /* Main Loop */
+ while (1)
+ {
+ unsigned token;
+ size_t length;
+
+ /* get runlength */
+ token = *ip++;
+ if ((length=(token>>ML_BITS)) == RUN_MASK)
+ {
+ unsigned s;
+ do
+ {
+ s = *ip++;
+ length += s;
+ }
+ while (likely((endOnInput)?ipLZ4_MAX_INPUT_SIZE)) goto _output_error; /* overflow detection */
+ if ((sizeof(void*)==4) && unlikely((size_t)(op+length)<(size_t)(op))) goto _output_error; /* quickfix issue 134 */
+ if ((endOnInput) && (sizeof(void*)==4) && unlikely((size_t)(ip+length)<(size_t)(ip))) goto _output_error; /* quickfix issue 134 */
+ }
+
+ /* copy literals */
+ cpy = op+length;
+ if (((endOnInput) && ((cpy>(partialDecoding?oexit:oend-MFLIMIT)) || (ip+length>iend-(2+1+LASTLITERALS))) )
+ || ((!endOnInput) && (cpy>oend-COPYLENGTH)))
+ {
+ if (partialDecoding)
+ {
+ if (cpy > oend) goto _output_error; /* Error : write attempt beyond end of output buffer */
+ if ((endOnInput) && (ip+length > iend)) goto _output_error; /* Error : read attempt beyond end of input buffer */
+ }
+ else
+ {
+ if ((!endOnInput) && (cpy != oend)) goto _output_error; /* Error : block decoding must stop exactly there */
+ if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) goto _output_error; /* Error : input must be consumed */
+ }
+ memcpy(op, ip, length);
+ ip += length;
+ op += length;
+ break; /* Necessarily EOF, due to parsing restrictions */
+ }
+ LZ4_WILDCOPY(op, ip, cpy); ip -= (op-cpy); op = cpy;
+
+ /* get offset */
+ LZ4_READ_LITTLEENDIAN_16(ref,cpy,ip); ip+=2;
+ if ((checkOffset) && (unlikely(ref < lowLimit))) goto _output_error; /* Error : offset outside destination buffer */
+
+ /* get matchlength */
+ if ((length=(token&ML_MASK)) == ML_MASK)
+ {
+ unsigned s;
+ do
+ {
+ if ((endOnInput) && (ip > iend-LASTLITERALS)) goto _output_error;
+ s = *ip++;
+ length += s;
+ } while (s==255);
+ //if ((sizeof(void*)==4) && unlikely(length>LZ4_MAX_INPUT_SIZE)) goto _output_error; /* overflow detection */
+ if ((sizeof(void*)==4) && unlikely((size_t)(op+length)<(size_t)op)) goto _output_error; /* quickfix issue 134 */
+ }
+
+ /* check external dictionary */
+ if ((dict==usingExtDict) && (ref < (BYTE* const)dest))
+ {
+ if (unlikely(op+length+MINMATCH > oend-LASTLITERALS)) goto _output_error;
+
+ if (length+MINMATCH <= (size_t)(dest-(char*)ref))
+ {
+ ref = dictEnd - (dest-(char*)ref);
+ memcpy(op, ref, length+MINMATCH);
+ op += length+MINMATCH;
+ }
+ else
+ {
+ size_t copySize = (size_t)(dest-(char*)ref);
+ memcpy(op, dictEnd - copySize, copySize);
+ op += copySize;
+ copySize = length+MINMATCH - copySize;
+ if (copySize > (size_t)((char*)op-dest)) /* overlap */
+ {
+ BYTE* const cpy = op + copySize;
+ const BYTE* ref = (BYTE*)dest;
+ while (op < cpy) *op++ = *ref++;
+ }
+ else
+ {
+ memcpy(op, dest, copySize);
+ op += copySize;
+ }
+ }
+ continue;
+ }
+
+ /* copy repeated sequence */
+ if (unlikely((op-ref)<(int)STEPSIZE))
+ {
+ const size_t dec64 = dec64table[(sizeof(void*)==4) ? 0 : op-ref];
+ op[0] = ref[0];
+ op[1] = ref[1];
+ op[2] = ref[2];
+ op[3] = ref[3];
+#ifdef OLD
+ op += 4, ref += 4; ref -= dec32table[op-ref];
+ A32(op) = A32(ref);
+ op += STEPSIZE-4; ref -= dec64;
+#else
+ ref += dec32table[op-ref];
+ A32(op+4) = A32(ref);
+ op += STEPSIZE; ref -= dec64;
+#endif
+ } else { LZ4_COPYSTEP(op,ref); }
+ cpy = op + length - (STEPSIZE-4);
+
+ if (unlikely(cpy>oend-COPYLENGTH-(STEPSIZE-4)))
+ {
+ if (cpy > oend-LASTLITERALS) goto _output_error; /* Error : last 5 bytes must be literals */
+ if (opdictionary = dictionary;
+ lz4sd->dictSize = dictSize;
+ return 1;
+}
+
+/*
+*_continue() :
+ These decoding functions allow decompression of multiple blocks in "streaming" mode.
+ Previously decoded blocks must still be available at the memory position where they were decoded.
+ If it's not possible, save the relevant part of decoded data into a safe buffer,
+ and indicate where it stands using LZ4_setDictDecode()
+*/
+int LZ4_decompress_safe_continue (void* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize)
+{
+ LZ4_streamDecode_t_internal* lz4sd = (LZ4_streamDecode_t_internal*) LZ4_streamDecode;
+ int result;
+
+ result = LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, usingExtDict, lz4sd->dictionary, lz4sd->dictSize);
+ if (result <= 0) return result;
+ if (lz4sd->dictionary + lz4sd->dictSize == dest)
+ {
+ lz4sd->dictSize += result;
+ }
+ else
+ {
+ lz4sd->dictionary = dest;
+ lz4sd->dictSize = result;
+ }
+
+ return result;
+}
+
+int LZ4_decompress_fast_continue (void* LZ4_streamDecode, const char* source, char* dest, int originalSize)
+{
+ LZ4_streamDecode_t_internal* lz4sd = (LZ4_streamDecode_t_internal*) LZ4_streamDecode;
+ int result;
+
+ result = LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, usingExtDict, lz4sd->dictionary, lz4sd->dictSize);
+ if (result <= 0) return result;
+ if (lz4sd->dictionary + lz4sd->dictSize == dest)
+ {
+ lz4sd->dictSize += result;
+ }
+ else
+ {
+ lz4sd->dictionary = dest;
+ lz4sd->dictSize = result;
+ }
+
+ return result;
+}
+
+
+/*
+Advanced decoding functions :
+*_usingDict() :
+ These decoding functions work the same as "_continue" ones,
+ the dictionary must be explicitly provided within parameters
+*/
+
+int LZ4_decompress_safe_usingDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize)
+{
+ return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, usingExtDict, dictStart, dictSize);
+}
+
+int LZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize)
+{
+ return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, usingExtDict, dictStart, dictSize);
+}
+
+
+/***************************************************
+ Obsolete Functions
+***************************************************/
+/*
+These function names are deprecated and should no longer be used.
+They are only provided here for compatibility with older user programs.
+- LZ4_uncompress is totally equivalent to LZ4_decompress_fast
+- LZ4_uncompress_unknownOutputSize is totally equivalent to LZ4_decompress_safe
+*/
+int LZ4_uncompress (const char* source, char* dest, int outputSize) { return LZ4_decompress_fast(source, dest, outputSize); }
+int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize) { return LZ4_decompress_safe(source, dest, isize, maxOutputSize); }
+
+
+/* Obsolete Streaming functions */
+
+int LZ4_sizeofStreamState() { return LZ4_STREAMSIZE; }
+
+void LZ4_init(LZ4_stream_t_internal* lz4ds, const BYTE* base)
+{
+ MEM_INIT(lz4ds, 0, LZ4_STREAMSIZE);
+ lz4ds->bufferStart = base;
+}
+
+int LZ4_resetStreamState(void* state, const char* inputBuffer)
+{
+ if ((((size_t)state) & 3) != 0) return 1; /* Error : pointer is not aligned on 4-bytes boundary */
+ LZ4_init((LZ4_stream_t_internal*)state, (const BYTE*)inputBuffer);
+ return 0;
+}
+
+void* LZ4_create (const char* inputBuffer)
+{
+ void* lz4ds = ALLOCATOR(4, LZ4_STREAMSIZE_U32);
+ LZ4_init ((LZ4_stream_t_internal*)lz4ds, (const BYTE*)inputBuffer);
+ return lz4ds;
+}
+
+char* LZ4_slideInputBuffer (void* LZ4_Data)
+{
+ LZ4_stream_t_internal* lz4ds = (LZ4_stream_t_internal*)LZ4_Data;
+
+ LZ4_saveDict((LZ4_stream_t*)LZ4_Data, (char*)lz4ds->bufferStart, 64 KB);
+
+ return (char*)(lz4ds->bufferStart + 64 KB);
+}
+
+/* Obsolete compresson functions using User-allocated state */
+
+int LZ4_sizeofState() { return LZ4_STREAMSIZE; }
+
+int LZ4_compress_withState (void* state, const char* source, char* dest, int inputSize)
+{
+ if (((size_t)(state)&3) != 0) return 0; /* Error : state is not aligned on 4-bytes boundary */
+ MEM_INIT(state, 0, LZ4_STREAMSIZE);
+
+ if (inputSize < (int)LZ4_64KLIMIT)
+ return LZ4_compress_generic(state, source, dest, inputSize, 0, notLimited, byU16, noDict, noDictIssue);
+ else
+ return LZ4_compress_generic(state, source, dest, inputSize, 0, notLimited, (sizeof(void*)==8) ? byU32 : byPtr, noDict, noDictIssue);
+}
+
+int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize)
+{
+ if (((size_t)(state)&3) != 0) return 0; /* Error : state is not aligned on 4-bytes boundary */
+ MEM_INIT(state, 0, LZ4_STREAMSIZE);
+
+ if (inputSize < (int)LZ4_64KLIMIT)
+ return LZ4_compress_generic(state, source, dest, inputSize, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue);
+ else
+ return LZ4_compress_generic(state, source, dest, inputSize, maxOutputSize, limitedOutput, (sizeof(void*)==8) ? byU32 : byPtr, noDict, noDictIssue);
+}
+
+/* Obsolete streaming decompression functions */
+
+int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize)
+{
+ return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, withPrefix64k, NULL, 64 KB);
+}
+
+int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int originalSize)
+{
+ return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, withPrefix64k, NULL, 64 KB);
+}
\ No newline at end of file
diff --git a/src/lz4.h b/src/lz4.h
new file mode 100644
index 000000000..1064fa115
--- /dev/null
+++ b/src/lz4.h
@@ -0,0 +1,306 @@
+/*
+ LZ4 - Fast LZ compression algorithm
+ Header File
+ Copyright (C) 2011-2014, Yann Collet.
+ BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following disclaimer
+ in the documentation and/or other materials provided with the
+ distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ You can contact the author at :
+ - LZ4 source repository : http://code.google.com/p/lz4/
+ - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+#pragma once
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/**************************************
+ Version
+**************************************/
+#define LZ4_VERSION_MAJOR 1 /* for major interface/format changes */
+#define LZ4_VERSION_MINOR 2 /* for minor interface/format changes */
+#define LZ4_VERSION_RELEASE 0 /* for tweaks, bug-fixes, or development */
+
+
+/**************************************
+ Tuning parameter
+**************************************/
+/*
+ * LZ4_MEMORY_USAGE :
+ * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+ * Increasing memory usage improves compression ratio
+ * Reduced memory usage can improve speed, due to cache effect
+ * Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
+ */
+#define LZ4_MEMORY_USAGE 14
+
+
+/**************************************
+ Simple Functions
+**************************************/
+
+int LZ4_compress (const char* source, char* dest, int inputSize);
+int LZ4_decompress_safe (const char* source, char* dest, int compressedSize, int maxOutputSize);
+
+/*
+LZ4_compress() :
+ Compresses 'inputSize' bytes from 'source' into 'dest'.
+ Destination buffer must be already allocated,
+ and must be sized to handle worst cases situations (input data not compressible)
+ Worst case size evaluation is provided by function LZ4_compressBound()
+ inputSize : Max supported value is LZ4_MAX_INPUT_VALUE
+ return : the number of bytes written in buffer dest
+ or 0 if the compression fails
+
+LZ4_decompress_safe() :
+ compressedSize : is obviously the source size
+ maxOutputSize : is the size of the destination buffer, which must be already allocated.
+ return : the number of bytes decoded in the destination buffer (necessarily <= maxOutputSize)
+ If the destination buffer is not large enough, decoding will stop and output an error code (<0).
+ If the source stream is detected malformed, the function will stop decoding and return a negative result.
+ This function is protected against buffer overflow exploits :
+ it never writes outside of output buffer, and never reads outside of input buffer.
+ Therefore, it is protected against malicious data packets.
+*/
+
+
+/*
+Note :
+ Should you prefer to explicitly allocate compression-table memory using your own allocation method,
+ use the streaming functions provided below, simply reset the memory area between each call to LZ4_compress_continue()
+*/
+
+
+/**************************************
+ Advanced Functions
+**************************************/
+#define LZ4_MAX_INPUT_SIZE 0x7E000000 /* 2 113 929 216 bytes */
+#define LZ4_COMPRESSBOUND(isize) ((unsigned int)(isize) > (unsigned int)LZ4_MAX_INPUT_SIZE ? 0 : (isize) + ((isize)/255) + 16)
+
+/*
+LZ4_compressBound() :
+ Provides the maximum size that LZ4 may output in a "worst case" scenario (input data not compressible)
+ primarily useful for memory allocation of output buffer.
+ macro is also provided when result needs to be evaluated at compilation (such as stack memory allocation).
+
+ isize : is the input size. Max supported value is LZ4_MAX_INPUT_SIZE
+ return : maximum output size in a "worst case" scenario
+ or 0, if input size is too large ( > LZ4_MAX_INPUT_SIZE)
+*/
+int LZ4_compressBound(int isize);
+
+
+/*
+LZ4_compress_limitedOutput() :
+ Compress 'inputSize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'.
+ If it cannot achieve it, compression will stop, and result of the function will be zero.
+ This function never writes outside of provided output buffer.
+
+ inputSize : Max supported value is LZ4_MAX_INPUT_VALUE
+ maxOutputSize : is the size of the destination buffer (which must be already allocated)
+ return : the number of bytes written in buffer 'dest'
+ or 0 if the compression fails
+*/
+int LZ4_compress_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize);
+
+
+/*
+LZ4_decompress_fast() :
+ originalSize : is the original and therefore uncompressed size
+ return : the number of bytes read from the source buffer (in other words, the compressed size)
+ If the source stream is malformed, the function will stop decoding and return a negative result.
+ Destination buffer must be already allocated. Its size must be a minimum of 'originalSize' bytes.
+ note : This function is a bit faster than LZ4_decompress_safe()
+ It provides fast decompression and fully respect memory boundaries for properly formed compressed data.
+ It does not provide full protection against intentionnally modified data stream.
+ Use this function in a trusted environment (data to decode comes from a trusted source).
+*/
+int LZ4_decompress_fast (const char* source, char* dest, int originalSize);
+
+
+/*
+LZ4_decompress_safe_partial() :
+ This function decompress a compressed block of size 'compressedSize' at position 'source'
+ into output buffer 'dest' of size 'maxOutputSize'.
+ The function tries to stop decompressing operation as soon as 'targetOutputSize' has been reached,
+ reducing decompression time.
+ return : the number of bytes decoded in the destination buffer (necessarily <= maxOutputSize)
+ Note : this number can be < 'targetOutputSize' should the compressed block to decode be smaller.
+ Always control how many bytes were decoded.
+ If the source stream is detected malformed, the function will stop decoding and return a negative result.
+ This function never writes outside of output buffer, and never reads outside of input buffer. It is therefore protected against malicious data packets
+*/
+int LZ4_decompress_safe_partial (const char* source, char* dest, int compressedSize, int targetOutputSize, int maxOutputSize);
+
+
+/***********************************************
+ Experimental Streaming Compression Functions
+***********************************************/
+
+#define LZ4_STREAMSIZE_U32 ((1 << (LZ4_MEMORY_USAGE-2)) + 8)
+#define LZ4_STREAMSIZE (LZ4_STREAMSIZE_U32 * sizeof(unsigned int))
+/*
+ * LZ4_stream_t
+ * information structure to track an LZ4 stream.
+ * important : set this structure content to zero before first use !
+ */
+typedef struct { unsigned int table[LZ4_STREAMSIZE_U32]; } LZ4_stream_t;
+
+/*
+ * If you prefer dynamic allocation methods,
+ * LZ4_createStream
+ * provides a pointer (void*) towards an initialized LZ4_stream_t structure.
+ * LZ4_free just frees it.
+ */
+void* LZ4_createStream();
+int LZ4_free (void* LZ4_stream);
+
+
+/*
+ * LZ4_loadDict
+ * Use this function to load a static dictionary into LZ4_stream.
+ * Any previous data will be forgotten, only 'dictionary' will remain in memory.
+ * Loading a size of 0 is allowed (same effect as init).
+ * Return : 1 if OK, 0 if error
+ */
+int LZ4_loadDict (void* LZ4_stream, const char* dictionary, int dictSize);
+
+/*
+ * LZ4_compress_continue
+ * Compress data block 'source', using blocks compressed before as dictionary to improve compression ratio
+ * Previous data blocks are assumed to still be present at their previous location.
+ */
+int LZ4_compress_continue (void* LZ4_stream, const char* source, char* dest, int inputSize);
+
+/*
+ * LZ4_compress_limitedOutput_continue
+ * Same as before, but also specify a maximum target compressed size (maxOutputSize)
+ * If objective cannot be met, compression exits, and returns a zero.
+ */
+int LZ4_compress_limitedOutput_continue (void* LZ4_stream, const char* source, char* dest, int inputSize, int maxOutputSize);
+
+/*
+ * LZ4_saveDict
+ * If previously compressed data block is not guaranteed to remain at its previous memory location
+ * save it into a safe place (char* safeBuffer)
+ * Note : you don't need to call LZ4_loadDict() afterwards,
+ * dictionary is immediately usable, you can therefore call again LZ4_compress_continue()
+ * Return : 1 if OK, 0 if error
+ * Note : any dictSize > 64 KB will be interpreted as 64KB.
+ */
+int LZ4_saveDict (void* LZ4_stream, char* safeBuffer, int dictSize);
+
+
+/************************************************
+ Experimental Streaming Decompression Functions
+************************************************/
+
+#define LZ4_STREAMDECODESIZE_U32 4
+#define LZ4_STREAMDECODESIZE (LZ4_STREAMDECODESIZE_U32 * sizeof(unsigned int))
+/*
+ * LZ4_streamDecode_t
+ * information structure to track an LZ4 stream.
+ * important : set this structure content to zero before first use !
+ */
+typedef struct { unsigned int table[LZ4_STREAMDECODESIZE_U32]; } LZ4_streamDecode_t;
+
+/*
+ * If you prefer dynamic allocation methods,
+ * LZ4_createStreamDecode()
+ * provides a pointer (void*) towards an initialized LZ4_streamDecode_t structure.
+ * LZ4_free just frees it.
+ */
+void* LZ4_createStreamDecode();
+int LZ4_free (void* LZ4_stream); /* yes, it's the same one as for compression */
+
+/*
+*_continue() :
+ These decoding functions allow decompression of multiple blocks in "streaming" mode.
+ Previously decoded blocks must still be available at the memory position where they were decoded.
+ If it's not possible, save the relevant part of decoded data into a safe buffer,
+ and indicate where it stands using LZ4_setDictDecode()
+*/
+int LZ4_decompress_safe_continue (void* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize);
+int LZ4_decompress_fast_continue (void* LZ4_streamDecode, const char* source, char* dest, int originalSize);
+
+/*
+ * LZ4_setDictDecode
+ * Use this function to instruct where to find the dictionary.
+ * This function can be used to specify a static dictionary,
+ * or to instruct where to find some previously decoded data saved into a different memory space.
+ * Setting a size of 0 is allowed (same effect as no dictionary).
+ * Return : 1 if OK, 0 if error
+ */
+int LZ4_setDictDecode (void* LZ4_streamDecode, const char* dictionary, int dictSize);
+
+
+/*
+Advanced decoding functions :
+*_usingDict() :
+ These decoding functions work the same as
+ a combination of LZ4_setDictDecode() followed by LZ4_decompress_x_continue()
+ all together into a single function call.
+ It doesn't use nor update an LZ4_streamDecode_t structure.
+*/
+int LZ4_decompress_safe_usingDict (const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize);
+int LZ4_decompress_fast_usingDict (const char* source, char* dest, int originalSize, const char* dictStart, int dictSize);
+
+
+
+
+/**************************************
+ Obsolete Functions
+**************************************/
+/*
+Obsolete decompression functions
+These function names are deprecated and should no longer be used.
+They are only provided here for compatibility with older user programs.
+- LZ4_uncompress is the same as LZ4_decompress_fast
+- LZ4_uncompress_unknownOutputSize is the same as LZ4_decompress_safe
+*/
+int LZ4_uncompress (const char* source, char* dest, int outputSize);
+int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize);
+
+/* Obsolete functions for externally allocated state; use streaming interface instead */
+int LZ4_sizeofState(void);
+int LZ4_compress_withState (void* state, const char* source, char* dest, int inputSize);
+int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
+
+/* Obsolete streaming functions; use new streaming interface whenever possible */
+void* LZ4_create (const char* inputBuffer);
+int LZ4_sizeofStreamState(void);
+int LZ4_resetStreamState(void* state, const char* inputBuffer);
+char* LZ4_slideInputBuffer (void* state);
+
+/* Obsolete streaming decoding functions */
+int LZ4_decompress_safe_withPrefix64k (const char* source, char* dest, int compressedSize, int maxOutputSize);
+int LZ4_decompress_fast_withPrefix64k (const char* source, char* dest, int originalSize);
+
+
+#if defined (__cplusplus)
+}
+#endif
diff --git a/src/lz4hc.c b/src/lz4hc.c
new file mode 100755
index 000000000..608674902
--- /dev/null
+++ b/src/lz4hc.c
@@ -0,0 +1,892 @@
+/*
+ LZ4 HC - High Compression Mode of LZ4
+ Copyright (C) 2011-2014, Yann Collet.
+ BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following disclaimer
+ in the documentation and/or other materials provided with the
+ distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ You can contact the author at :
+ - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
+ - LZ4 source repository : http://code.google.com/p/lz4/
+*/
+
+
+
+/**************************************
+ Tuning Parameter
+**************************************/
+#define LZ4HC_DEFAULT_COMPRESSIONLEVEL 8
+
+
+/**************************************
+ Memory routines
+**************************************/
+#include /* calloc, free */
+#define ALLOCATOR(s) calloc(1,s)
+#define FREEMEM free
+#include /* memset, memcpy */
+#define MEM_INIT memset
+
+
+/**************************************
+ CPU Feature Detection
+**************************************/
+/* 32 or 64 bits ? */
+#if (defined(__x86_64__) || defined(_M_X64) || defined(_WIN64) \
+ || defined(__powerpc64__) || defined(__powerpc64le__) \
+ || defined(__ppc64__) || defined(__ppc64le__) \
+ || defined(__PPC64__) || defined(__PPC64LE__) \
+ || defined(__ia64) || defined(__itanium__) || defined(_M_IA64) ) /* Detects 64 bits mode */
+# define LZ4_ARCH64 1
+#else
+# define LZ4_ARCH64 0
+#endif
+
+/*
+ * Little Endian or Big Endian ?
+ * Overwrite the #define below if you know your architecture endianess
+ */
+#include /* Apparently required to detect endianess */
+#if defined (__GLIBC__)
+# include
+# if (__BYTE_ORDER == __BIG_ENDIAN)
+# define LZ4_BIG_ENDIAN 1
+# endif
+#elif (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN)) && !(defined(__LITTLE_ENDIAN__) || defined(__LITTLE_ENDIAN) || defined(_LITTLE_ENDIAN))
+# define LZ4_BIG_ENDIAN 1
+#elif defined(__sparc) || defined(__sparc__) \
+ || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) \
+ || defined(__hpux) || defined(__hppa) \
+ || defined(_MIPSEB) || defined(__s390__)
+# define LZ4_BIG_ENDIAN 1
+#else
+/* Little Endian assumed. PDP Endian and other very rare endian format are unsupported. */
+#endif
+
+/*
+ * Unaligned memory access is automatically enabled for "common" CPU, such as x86.
+ * For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected
+ * If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance
+ */
+#if defined(__ARM_FEATURE_UNALIGNED)
+# define LZ4_FORCE_UNALIGNED_ACCESS 1
+#endif
+
+/* Define this parameter if your target system or compiler does not support hardware bit count */
+#if defined(_MSC_VER) && defined(_WIN32_WCE) /* Visual Studio for Windows CE does not support Hardware bit count */
+# define LZ4_FORCE_SW_BITCOUNT
+#endif
+
+
+/**************************************
+ Compiler Options
+**************************************/
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */
+/* "restrict" is a known keyword */
+#else
+# define restrict /* Disable restrict */
+#endif
+
+#ifdef _MSC_VER /* Visual Studio */
+# define FORCE_INLINE static __forceinline
+# include /* For Visual 2005 */
+# if LZ4_ARCH64 /* 64-bits */
+# pragma intrinsic(_BitScanForward64) /* For Visual 2005 */
+# pragma intrinsic(_BitScanReverse64) /* For Visual 2005 */
+# else /* 32-bits */
+# pragma intrinsic(_BitScanForward) /* For Visual 2005 */
+# pragma intrinsic(_BitScanReverse) /* For Visual 2005 */
+# endif
+# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
+# pragma warning(disable : 4701) /* disable: C4701: potentially uninitialized local variable used */
+#else
+# ifdef __GNUC__
+# define FORCE_INLINE static inline __attribute__((always_inline))
+# else
+# define FORCE_INLINE static inline
+# endif
+#endif
+
+#ifdef _MSC_VER /* Visual Studio */
+# define lz4_bswap16(x) _byteswap_ushort(x)
+#else
+# define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8)))
+#endif
+
+
+/**************************************
+ Includes
+**************************************/
+#include "lz4hc.h"
+#include "lz4.h"
+
+
+/**************************************
+ Basic Types
+**************************************/
+#if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */
+# include
+ typedef uint8_t BYTE;
+ typedef uint16_t U16;
+ typedef uint32_t U32;
+ typedef int32_t S32;
+ typedef uint64_t U64;
+#else
+ typedef unsigned char BYTE;
+ typedef unsigned short U16;
+ typedef unsigned int U32;
+ typedef signed int S32;
+ typedef unsigned long long U64;
+#endif
+
+#if defined(__GNUC__) && !defined(LZ4_FORCE_UNALIGNED_ACCESS)
+# define _PACKED __attribute__ ((packed))
+#else
+# define _PACKED
+#endif
+
+#if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__)
+# ifdef __IBMC__
+# pragma pack(1)
+# else
+# pragma pack(push, 1)
+# endif
+#endif
+
+typedef struct _U16_S { U16 v; } _PACKED U16_S;
+typedef struct _U32_S { U32 v; } _PACKED U32_S;
+typedef struct _U64_S { U64 v; } _PACKED U64_S;
+
+#if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__)
+# pragma pack(pop)
+#endif
+
+#define A64(x) (((U64_S *)(x))->v)
+#define A32(x) (((U32_S *)(x))->v)
+#define A16(x) (((U16_S *)(x))->v)
+
+
+/**************************************
+ Constants
+**************************************/
+#define MINMATCH 4
+
+#define DICTIONARY_LOGSIZE 16
+#define MAXD (1<> ((MINMATCH*8)-HASH_LOG))
+#define HASH_VALUE(p) HASH_FUNCTION(A32(p))
+#define HASH_POINTER(p) (HashTable[HASH_VALUE(p)] + base)
+#define DELTANEXT(p) chainTable[(size_t)(p) & MAXD_MASK]
+#define GETNEXT(p) ((p) - (size_t)DELTANEXT(p))
+
+
+/**************************************
+ Private functions
+**************************************/
+#if LZ4_ARCH64
+
+FORCE_INLINE int LZ4_NbCommonBytes (register U64 val)
+{
+#if defined(LZ4_BIG_ENDIAN)
+# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
+ unsigned long r = 0;
+ _BitScanReverse64( &r, val );
+ return (int)(r>>3);
+# elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
+ return (__builtin_clzll(val) >> 3);
+# else
+ int r;
+ if (!(val>>32)) { r=4; } else { r=0; val>>=32; }
+ if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
+ r += (!val);
+ return r;
+# endif
+#else
+# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
+ unsigned long r = 0;
+ _BitScanForward64( &r, val );
+ return (int)(r>>3);
+# elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
+ return (__builtin_ctzll(val) >> 3);
+# else
+ static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
+ return DeBruijnBytePos[((U64)((val & -val) * 0x0218A392CDABBD3F)) >> 58];
+# endif
+#endif
+}
+
+#else
+
+FORCE_INLINE int LZ4_NbCommonBytes (register U32 val)
+{
+#if defined(LZ4_BIG_ENDIAN)
+# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
+ unsigned long r;
+ _BitScanReverse( &r, val );
+ return (int)(r>>3);
+# elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
+ return (__builtin_clz(val) >> 3);
+# else
+ int r;
+ if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
+ r += (!val);
+ return r;
+# endif
+#else
+# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
+ unsigned long r;
+ _BitScanForward( &r, val );
+ return (int)(r>>3);
+# elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
+ return (__builtin_ctz(val) >> 3);
+# else
+ static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
+ return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
+# endif
+#endif
+}
+
+#endif
+
+
+int LZ4_sizeofStreamStateHC()
+{
+ return sizeof(LZ4HC_Data_Structure);
+}
+
+FORCE_INLINE void LZ4_initHC (LZ4HC_Data_Structure* hc4, const BYTE* base)
+{
+ MEM_INIT((void*)hc4->hashTable, 0, sizeof(hc4->hashTable));
+ MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable));
+ hc4->nextToUpdate = base + 1;
+ hc4->base = base;
+ hc4->inputBuffer = base;
+ hc4->end = base;
+}
+
+int LZ4_resetStreamStateHC(void* state, const char* inputBuffer)
+{
+ if ((((size_t)state) & (sizeof(void*)-1)) != 0) return 1; /* Error : pointer is not aligned for pointer (32 or 64 bits) */
+ LZ4_initHC((LZ4HC_Data_Structure*)state, (const BYTE*)inputBuffer);
+ return 0;
+}
+
+
+void* LZ4_createHC (const char* inputBuffer)
+{
+ void* hc4 = ALLOCATOR(sizeof(LZ4HC_Data_Structure));
+ LZ4_initHC ((LZ4HC_Data_Structure*)hc4, (const BYTE*)inputBuffer);
+ return hc4;
+}
+
+
+int LZ4_freeHC (void* LZ4HC_Data)
+{
+ FREEMEM(LZ4HC_Data);
+ return (0);
+}
+
+
+/* Update chains up to ip (excluded) */
+FORCE_INLINE void LZ4HC_Insert (LZ4HC_Data_Structure* hc4, const BYTE* ip)
+{
+ U16* chainTable = hc4->chainTable;
+ HTYPE* HashTable = hc4->hashTable;
+ INITBASE(base,hc4->base);
+
+ while(hc4->nextToUpdate < ip)
+ {
+ const BYTE* const p = hc4->nextToUpdate;
+ size_t delta = (p) - HASH_POINTER(p);
+ if (delta>MAX_DISTANCE) delta = MAX_DISTANCE;
+ DELTANEXT(p) = (U16)delta;
+ HashTable[HASH_VALUE(p)] = (HTYPE)((p) - base);
+ hc4->nextToUpdate++;
+ }
+}
+
+
+char* LZ4_slideInputBufferHC(void* LZ4HC_Data)
+{
+ LZ4HC_Data_Structure* hc4 = (LZ4HC_Data_Structure*)LZ4HC_Data;
+ U32 distance = (U32)(hc4->end - hc4->inputBuffer) - 64 KB;
+ distance = (distance >> 16) << 16; /* Must be a multiple of 64 KB */
+ LZ4HC_Insert(hc4, hc4->end - MINMATCH);
+ memcpy((void*)(hc4->end - 64 KB - distance), (const void*)(hc4->end - 64 KB), 64 KB);
+ hc4->nextToUpdate -= distance;
+ hc4->base -= distance;
+ if ((U32)(hc4->inputBuffer - hc4->base) > 1 GB + 64 KB) /* Avoid overflow */
+ {
+ int i;
+ hc4->base += 1 GB;
+ for (i=0; ihashTable[i] -= 1 GB;
+ }
+ hc4->end -= distance;
+ return (char*)(hc4->end);
+}
+
+
+FORCE_INLINE size_t LZ4HC_CommonLength (const BYTE* p1, const BYTE* p2, const BYTE* const matchlimit)
+{
+ const BYTE* p1t = p1;
+
+ while (p1tchainTable;
+ HTYPE* const HashTable = hc4->hashTable;
+ const BYTE* ref;
+ INITBASE(base,hc4->base);
+ int nbAttempts=maxNbAttempts;
+ size_t repl=0, ml=0;
+ U16 delta=0; /* useless assignment, to remove an uninitialization warning */
+
+ /* HC4 match finder */
+ LZ4HC_Insert(hc4, ip);
+ ref = HASH_POINTER(ip);
+
+#define REPEAT_OPTIMIZATION
+#ifdef REPEAT_OPTIMIZATION
+ /* Detect repetitive sequences of length <= 4 */
+ if ((U32)(ip-ref) <= 4) /* potential repetition */
+ {
+ if (A32(ref) == A32(ip)) /* confirmed */
+ {
+ delta = (U16)(ip-ref);
+ repl = ml = LZ4HC_CommonLength(ip+MINMATCH, ref+MINMATCH, matchlimit) + MINMATCH;
+ *matchpos = ref;
+ }
+ ref = GETNEXT(ref);
+ }
+#endif
+
+ while (((U32)(ip-ref) <= MAX_DISTANCE) && (nbAttempts))
+ {
+ nbAttempts--;
+ if (*(ref+ml) == *(ip+ml))
+ if (A32(ref) == A32(ip))
+ {
+ size_t mlt = LZ4HC_CommonLength(ip+MINMATCH, ref+MINMATCH, matchlimit) + MINMATCH;
+ if (mlt > ml) { ml = mlt; *matchpos = ref; }
+ }
+ ref = GETNEXT(ref);
+ }
+
+#ifdef REPEAT_OPTIMIZATION
+ /* Complete table */
+ if (repl)
+ {
+ const BYTE* ptr = ip;
+ const BYTE* end;
+
+ end = ip + repl - (MINMATCH-1);
+ while(ptr < end-delta)
+ {
+ DELTANEXT(ptr) = delta; /* Pre-Load */
+ ptr++;
+ }
+ do
+ {
+ DELTANEXT(ptr) = delta;
+ HashTable[HASH_VALUE(ptr)] = (HTYPE)((ptr) - base); /* Head of chain */
+ ptr++;
+ } while(ptr < end);
+ hc4->nextToUpdate = end;
+ }
+#endif
+
+ return (int)ml;
+}
+
+
+FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch (LZ4HC_Data_Structure* hc4, const BYTE* ip, const BYTE* startLimit, const BYTE* matchlimit, int longest, const BYTE** matchpos, const BYTE** startpos, const int maxNbAttempts)
+{
+ U16* const chainTable = hc4->chainTable;
+ HTYPE* const HashTable = hc4->hashTable;
+ INITBASE(base,hc4->base);
+ const BYTE* ref;
+ int nbAttempts = maxNbAttempts;
+ int delta = (int)(ip-startLimit);
+
+ /* First Match */
+ LZ4HC_Insert(hc4, ip);
+ ref = HASH_POINTER(ip);
+
+ while (((U32)(ip-ref) <= MAX_DISTANCE) && (nbAttempts))
+ {
+ nbAttempts--;
+ if (*(startLimit + longest) == *(ref - delta + longest))
+ if (A32(ref) == A32(ip))
+ {
+#if 1
+ const BYTE* reft = ref+MINMATCH;
+ const BYTE* ipt = ip+MINMATCH;
+ const BYTE* startt = ip;
+
+ while (iptstartLimit) && (reft > hc4->inputBuffer) && (startt[-1] == reft[-1])) {startt--; reft--;}
+
+ if ((ipt-startt) > longest)
+ {
+ longest = (int)(ipt-startt);
+ *matchpos = reft;
+ *startpos = startt;
+ }
+ }
+ ref = GETNEXT(ref);
+ }
+
+ return longest;
+}
+
+
+typedef enum { noLimit = 0, limitedOutput = 1 } limitedOutput_directive;
+
+FORCE_INLINE int LZ4HC_encodeSequence (
+ const BYTE** ip,
+ BYTE** op,
+ const BYTE** anchor,
+ int matchLength,
+ const BYTE* ref,
+ limitedOutput_directive limitedOutputBuffer,
+ BYTE* oend)
+{
+ int length;
+ BYTE* token;
+
+ /* Encode Literal length */
+ length = (int)(*ip - *anchor);
+ token = (*op)++;
+ if ((limitedOutputBuffer) && ((*op + length + (2 + 1 + LASTLITERALS) + (length>>8)) > oend)) return 1; /* Check output limit */
+ if (length>=(int)RUN_MASK) { int len; *token=(RUN_MASK< 254 ; len-=255) *(*op)++ = 255; *(*op)++ = (BYTE)len; }
+ else *token = (BYTE)(length<>8) > oend)) return 1; /* Check output limit */
+ if (length>=(int)ML_MASK) { *token+=ML_MASK; length-=ML_MASK; for(; length > 509 ; length-=510) { *(*op)++ = 255; *(*op)++ = 255; } if (length > 254) { length-=255; *(*op)++ = 255; } *(*op)++ = (BYTE)length; }
+ else *token += (BYTE)(length);
+
+ /* Prepare next loop */
+ *ip += matchLength;
+ *anchor = *ip;
+
+ return 0;
+}
+
+
+#define MAX_COMPRESSION_LEVEL 16
+static int LZ4HC_compress_generic (
+ void* ctxvoid,
+ const char* source,
+ char* dest,
+ int inputSize,
+ int maxOutputSize,
+ int compressionLevel,
+ limitedOutput_directive limit
+ )
+{
+ LZ4HC_Data_Structure* ctx = (LZ4HC_Data_Structure*) ctxvoid;
+ const BYTE* ip = (const BYTE*) source;
+ const BYTE* anchor = ip;
+ const BYTE* const iend = ip + inputSize;
+ const BYTE* const mflimit = iend - MFLIMIT;
+ const BYTE* const matchlimit = (iend - LASTLITERALS);
+
+ BYTE* op = (BYTE*) dest;
+ BYTE* const oend = op + maxOutputSize;
+
+ const int maxNbAttempts = compressionLevel > MAX_COMPRESSION_LEVEL ? 1 << MAX_COMPRESSION_LEVEL : compressionLevel ? 1<<(compressionLevel-1) : 1<end) return 0;
+ ctx->end += inputSize;
+
+ ip++;
+
+ /* Main Loop */
+ while (ip < mflimit)
+ {
+ ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, (&ref), maxNbAttempts);
+ if (!ml) { ip++; continue; }
+
+ /* saved, in case we would skip too much */
+ start0 = ip;
+ ref0 = ref;
+ ml0 = ml;
+
+_Search2:
+ if (ip+ml < mflimit)
+ ml2 = LZ4HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, ip + 1, matchlimit, ml, &ref2, &start2, maxNbAttempts);
+ else ml2 = ml;
+
+ if (ml2 == ml) /* No better match */
+ {
+ if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
+ continue;
+ }
+
+ if (start0 < ip)
+ {
+ if (start2 < ip + ml0) /* empirical */
+ {
+ ip = start0;
+ ref = ref0;
+ ml = ml0;
+ }
+ }
+
+ /* Here, start0==ip */
+ if ((start2 - ip) < 3) /* First Match too small : removed */
+ {
+ ml = ml2;
+ ip = start2;
+ ref =ref2;
+ goto _Search2;
+ }
+
+_Search3:
+ /*
+ * Currently we have :
+ * ml2 > ml1, and
+ * ip1+3 <= ip2 (usually < ip1+ml1)
+ */
+ if ((start2 - ip) < OPTIMAL_ML)
+ {
+ int correction;
+ int new_ml = ml;
+ if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML;
+ if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
+ correction = new_ml - (int)(start2 - ip);
+ if (correction > 0)
+ {
+ start2 += correction;
+ ref2 += correction;
+ ml2 -= correction;
+ }
+ }
+ /* Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18) */
+
+ if (start2 + ml2 < mflimit)
+ ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3, maxNbAttempts);
+ else ml3 = ml2;
+
+ if (ml3 == ml2) /* No better match : 2 sequences to encode */
+ {
+ /* ip & ref are known; Now for ml */
+ if (start2 < ip+ml) ml = (int)(start2 - ip);
+ /* Now, encode 2 sequences */
+ if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
+ ip = start2;
+ if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml2, ref2, limit, oend)) return 0;
+ continue;
+ }
+
+ if (start3 < ip+ml+3) /* Not enough space for match 2 : remove it */
+ {
+ if (start3 >= (ip+ml)) /* can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1 */
+ {
+ if (start2 < ip+ml)
+ {
+ int correction = (int)(ip+ml - start2);
+ start2 += correction;
+ ref2 += correction;
+ ml2 -= correction;
+ if (ml2 < MINMATCH)
+ {
+ start2 = start3;
+ ref2 = ref3;
+ ml2 = ml3;
+ }
+ }
+
+ if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
+ ip = start3;
+ ref = ref3;
+ ml = ml3;
+
+ start0 = start2;
+ ref0 = ref2;
+ ml0 = ml2;
+ goto _Search2;
+ }
+
+ start2 = start3;
+ ref2 = ref3;
+ ml2 = ml3;
+ goto _Search3;
+ }
+
+ /*
+ * OK, now we have 3 ascending matches; let's write at least the first one
+ * ip & ref are known; Now for ml
+ */
+ if (start2 < ip+ml)
+ {
+ if ((start2 - ip) < (int)ML_MASK)
+ {
+ int correction;
+ if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
+ if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH;
+ correction = ml - (int)(start2 - ip);
+ if (correction > 0)
+ {
+ start2 += correction;
+ ref2 += correction;
+ ml2 -= correction;
+ }
+ }
+ else
+ {
+ ml = (int)(start2 - ip);
+ }
+ }
+ if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
+
+ ip = start2;
+ ref = ref2;
+ ml = ml2;
+
+ start2 = start3;
+ ref2 = ref3;
+ ml2 = ml3;
+
+ goto _Search3;
+
+ }
+
+ /* Encode Last Literals */
+ {
+ int lastRun = (int)(iend - anchor);
+ if ((limit) && (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)) return 0; /* Check output limit */
+ if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK< 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
+ else *op++ = (BYTE)(lastRun< 0
+ assert library._collection.versions.count()
+ assert repr(library.read('symbol').data) == repr(data)
+ # Nothing done_APPEND_COUNT
+ assert len(library._collection.versions.find_one({})['parent'])
+ else:
+ run_as_main(main, '--library', 'user.library', '--host', mongo_host, '-f')
+ assert library._collection.count() > 0
+ assert library._collection.versions.count()
+ # Data still available (write with prune_previous_version will do the cleanup)
+ assert repr(library.read('symbol').data) == repr(data)
+ # Snapshot cleaned up
+ assert not len(library._collection.versions.find_one({})['parent'])
+
+
+@pytest.mark.parametrize(['dry_run', 'data'], [(x, y) for (x, y) in itertools.product([True, False],
+ [some_object, ts])])
+def test_cleanup_orphaned_snapshots_nop(mongo_host, library, data, dry_run):
+ """
+ Check that we do / don't cleanup chunks based on the dry-run
+ """
+ yesterday = dt.utcnow() - dtd(days=1, seconds=1)
+ _id = bson.ObjectId.from_datetime(yesterday)
+ library.write('symbol', data, prune_previous_version=False)
+ with patch("bson.ObjectId", return_value=_id):
+ library.snapshot('snap_name')
+
+ # No cleanup on dry-run
+ if dry_run:
+ run_as_main(main, '--library', 'user.library', '--host', mongo_host)
+ assert library._collection.count() > 0
+ assert library._collection.versions.count()
+ assert repr(library.read('symbol').data) == repr(data)
+ # Nothing done
+ assert len(library._collection.versions.find_one({})['parent'])
+ else:
+ run_as_main(main, '--library', 'user.library', '--host', mongo_host, '-f')
+ assert library._collection.count() > 0
+ assert library._collection.versions.count()
+ # Data still available (write with prune_previous_version will do the cleanup)
+ assert repr(library.read('symbol').data) == repr(data)
+ # Nothing done
+ assert len(library._collection.versions.find_one({})['parent'])
+
+
+@pytest.mark.parametrize(['dry_run', 'data'], [(x, y) for (x, y) in itertools.product([True, False],
+ [some_object, ts])])
+def test_dont_cleanup_recent_orphaned_snapshots(mongo_host, library, data, dry_run):
+ """
+ Check that we do / don't cleanup chunks based on the dry-run
+ """
+ today = dt.utcnow() - dtd(hours=12, seconds=1)
+ _id = bson.ObjectId.from_datetime(today)
+ library.write('symbol', data, prune_previous_version=False)
+ with patch("bson.ObjectId", return_value=_id):
+ library.snapshot('snap_name')
+
+ # Remove the version document ; should cleanup
+ assert library._collection.snapshots.delete_many({})
+
+ # No cleanup on dry-run
+ if dry_run:
+ run_as_main(main, '--library', 'user.library', '--host', mongo_host)
+ assert library._collection.count() > 0
+ assert library._collection.versions.count()
+ assert repr(library.read('symbol').data) == repr(data)
+ # Nothing done
+ assert len(library._collection.versions.find_one({})['parent'])
+ else:
+ run_as_main(main, '--library', 'user.library', '--host', mongo_host, '-f')
+ assert library._collection.count() > 0
+ assert library._collection.versions.count()
+ # Data still available (write with prune_previous_version will do the cleanup)
+ assert repr(library.read('symbol').data) == repr(data)
+ # Snapshot cleaned up
+ assert len(library._collection.versions.find_one({})['parent'])
diff --git a/tests/integration/scripts/test_copy_data.py b/tests/integration/scripts/test_copy_data.py
new file mode 100644
index 000000000..601531e29
--- /dev/null
+++ b/tests/integration/scripts/test_copy_data.py
@@ -0,0 +1,144 @@
+from mock import patch, call
+from pandas.util.testing import assert_frame_equal
+import pytest
+
+from arctic import arctic as m
+from arctic.scripts import arctic_copy_data as mcd
+
+from ...util import read_str_as_pandas, run_as_main
+
+
+@pytest.fixture(scope='function', autouse=True)
+def init(arctic):
+ arctic.initialize_library('user.library', m.VERSION_STORE, segment='month')
+ arctic.initialize_library('user.library2', m.VERSION_STORE, segment='month')
+
+
+ts = read_str_as_pandas(""" times | near
+ 2012-09-08 17:06:11.040 | 1.0
+ 2012-10-08 17:06:11.040 | 2.0
+ 2012-10-09 17:06:11.040 | 2.5
+ 2012-11-08 17:06:11.040 | 3.0""")
+ts1 = read_str_as_pandas(""" times | near
+ 2012-09-08 17:06:11.040 | 4.0
+ 2012-10-08 17:06:11.040 | 5.0
+ 2012-10-09 17:06:11.040 | 6.5
+ 2012-11-08 17:06:11.040 | 7.0""")
+ts2 = read_str_as_pandas(""" times | near
+ 2012-10-08 17:06:11.040 | 5.0
+ 2012-10-09 17:06:11.040 | 6.5""")
+ts3 = read_str_as_pandas(""" times | near
+ 2012-09-08 17:06:11.040 | 1.0
+ 2012-10-08 17:06:11.040 | 5.0
+ 2012-10-09 17:06:11.040 | 6.5
+ 2012-11-08 17:06:11.040 | 3.0""")
+
+def test_copy_data_no_force(arctic, mongo_host):
+ src = 'user.library'
+ dest = 'user.library2'
+ # Put ts, ts1 in library
+ arctic[src].write('some_ts', ts1)
+ arctic[src].write('some_ts1', ts1)
+
+ # Put some other value for ts in library2
+ arctic[dest].write('some_ts', ts)
+
+ # Create the user against the current mongo database
+ src_host = 'arctic_' + src + '@' + mongo_host
+ dest_host = 'arctic_' + dest + '@' + mongo_host
+ with patch('arctic.scripts.arctic_copy_data.logger') as logger:
+ run_as_main(mcd.main, '--src', src_host, '--dest', dest_host, '--log', 'CR101', 'some_ts', 'some_ts1')
+
+ assert_frame_equal(ts, arctic[dest].read('some_ts').data)
+ assert_frame_equal(ts1, arctic[dest].read('some_ts1').data)
+ assert logger.info.call_args_list == [call('Copying data from %s -> %s' % (src_host, dest_host)),
+ call('Copying: 2 symbols')]
+ assert logger.warn.call_args_list == [call('Symbol: some_ts already exists in %s, use --force to overwrite or --splice to join with existing data' % dest_host)]
+ assert arctic[dest].read_audit_log('some_ts1')[0]['message'] == 'CR101'
+
+
+def test_copy_data_force(arctic, mongo_host):
+ src = 'user.library'
+ dest = 'user.library2'
+ # Put ts, ts1 in library
+ arctic[src].write('some_ts', ts)
+ arctic[src].write('some_ts1', ts1)
+
+ # Put some other value for ts in library2
+ arctic[dest].write('some_ts', ts1)
+
+ # Create the user against the current mongo database
+ src_host = src + '@' + mongo_host
+ dest_host = dest + '@' + mongo_host
+ with patch('arctic.scripts.arctic_copy_data.logger') as logger:
+ run_as_main(mcd.main, '--src', src_host, '--dest', dest_host, '--log', 'CR101', '--force', 'some_ts', 'some_ts1')
+
+ assert_frame_equal(ts, arctic[dest].read('some_ts').data)
+ assert_frame_equal(ts1, arctic[dest].read('some_ts1').data)
+ assert logger.info.call_args_list == [call('Copying data from %s -> %s' % (src_host, dest_host)),
+ call('Copying: 2 symbols')]
+ assert logger.warn.call_args_list == [call('Symbol: some_ts already exists in destination, OVERWRITING')]
+ assert arctic[dest].read_audit_log('some_ts1')[0]['message'] == 'CR101'
+
+
+def test_copy_data_splice(arctic, mongo_host):
+ src = 'user.library'
+ dest = 'user.library2'
+ # Put ts, ts1 in library
+ arctic[src].write('some_ts', ts2)
+ arctic[src].write('some_ts1', ts1)
+
+ # Put some other value for ts in library2
+ arctic[dest].write('some_ts', ts)
+
+ # Create the user against the current mongo database
+ src_host = src + '@' + mongo_host
+ dest_host = dest + '@' + mongo_host
+ with patch('arctic.scripts.arctic_copy_data.logger') as logger:
+ run_as_main(mcd.main, '--src', src_host, '--dest', dest_host, '--log', 'CR101', '--splice', 'some_ts', 'some_ts1')
+
+ assert_frame_equal(ts3, arctic[dest].read('some_ts').data)
+ assert_frame_equal(ts1, arctic[dest].read('some_ts1').data)
+ assert logger.info.call_args_list == [call('Copying data from %s -> %s' % (src_host, dest_host)),
+ call('Copying: 2 symbols')]
+ assert logger.warn.call_args_list == [call('Symbol: some_ts already exists in destination, splicing in new data')]
+
+ assert arctic[dest].read_audit_log('some_ts')[0]['message'] == 'CR101'
+
+
+def test_copy_data_wild(arctic, mongo_host):
+ src = 'user.library'
+ dest = 'user.library2'
+ # Put ts, ts1 in library
+ arctic[src].write('some_a_ts', ts)
+ arctic[src].write('some_a_ts1', ts1)
+ arctic[src].write('some_b_ts1', ts1)
+ arctic[src].write('some_c_ts1', ts1)
+
+ # Create the user against the current mongo database
+ src_host = 'arctic_' + src + '@' + mongo_host
+ dest_host = 'arctic_' + dest + '@' + mongo_host
+ with patch('arctic.scripts.arctic_copy_data.logger') as logger:
+ run_as_main(mcd.main, '--src', src_host, '--dest', dest_host, '--log', 'CR101', '.*_a_.*', '.*_b_.*')
+
+ assert_frame_equal(ts, arctic[dest].read('some_a_ts').data)
+ assert_frame_equal(ts1, arctic[dest].read('some_a_ts1').data)
+ assert_frame_equal(ts1, arctic[dest].read('some_b_ts1').data)
+ assert logger.info.call_args_list == [call('Copying data from %s -> %s' % (src_host, dest_host)),
+ call('Copying: 3 symbols')]
+ assert arctic[dest].read_audit_log('some_a_ts1')[0]['message'] == 'CR101'
+
+
+def test_copy_data_doesnt_exist(arctic, mongo_host):
+ src = 'user.library'
+ dest = 'user.library2'
+
+ # Create the user against the current mongo database
+ src_host = src + '@' + mongo_host
+ dest_host = dest + '@' + mongo_host
+ with patch('arctic.scripts.arctic_copy_data.logger') as logger:
+ run_as_main(mcd.main, '--src', src_host, '--dest', dest_host, '--log', 'CR101', 'some_ts')
+
+ assert logger.info.call_args_list == [call('Copying data from %s -> %s' % (src_host, dest_host)),
+ call('Copying: 0 symbols')]
+ assert logger.warn.call_args_list == [call('No symbols found that matched those provided.')]
diff --git a/tests/integration/scripts/test_create_user.py b/tests/integration/scripts/test_create_user.py
new file mode 100644
index 000000000..b70f549cd
--- /dev/null
+++ b/tests/integration/scripts/test_create_user.py
@@ -0,0 +1,108 @@
+from mock import patch
+from StringIO import StringIO
+
+from arctic.auth import Credential
+from arctic.scripts import arctic_create_user as mcu
+
+from ...util import run_as_main
+
+
+def test_create_user(mongo_host, mongodb):
+ # Create the user agains the current mongo database
+ with patch('arctic.scripts.arctic_create_user.get_auth',
+ return_value=Credential('admin', 'adminuser', 'adminpwd')), \
+ patch('pymongo.database.Database.authenticate',return_value=True):
+ run_as_main(mcu.main, '--host', mongo_host, 'user', '--pass', 'pass')
+
+ # Check:
+ # User exists in system
+ user = mongodb.admin.system.users.find_one({'user': 'user'})
+ assert user
+ assert user['readOnly'] == True
+ # User db exists
+ user = mongodb.arctic_user.system.users.find_one({'user': 'user'})
+ assert user
+ assert 'readOnly' not in user or user['readOnly'] == False
+
+
+def test_create_admin_user(mongo_host, mongodb):
+ # Create the user agains the current mongo database
+
+ with patch('arctic.scripts.arctic_create_user.get_auth',
+ return_value=Credential('admin', 'adminuser', 'adminpwd')), \
+ patch('pymongo.database.Database.authenticate', return_value=True):
+ run_as_main(mcu.main, '--host', mongo_host, 'user', '--pass', 'pass', '--admin-write')
+
+ # Check:
+ # User exists in system
+ user = mongodb.admin.system.users.find_one({'user': 'user'})
+ assert user
+ assert 'readOnly' not in user or user['readOnly'] == False
+ # User db exists
+ user = mongodb.arctic_user.system.users.find_one({'user': 'user'})
+ assert user
+ assert 'readOnly' not in user or user['readOnly'] == False
+
+
+def test_create_user_verbose(mongo_host, mongodb):
+ user = 'user'
+ pwd = 'password'
+ stderr = StringIO()
+ stdout = StringIO()
+ with patch('arctic.scripts.arctic_create_user.get_auth',
+ return_value=Credential('admin', 'adminuser', 'adminpwd')), \
+ patch('pymongo.database.Database.authenticate', return_value=True), \
+ patch('sys.stderr', stderr), \
+ patch('sys.stdout', stdout):
+ run_as_main(mcu.main, '--host', mongo_host, user, '--pass', pwd, '--verbose')
+ out = stdout.getvalue()
+ assert 'Adding user %s to DB %s' % (user, mongo_host) in out
+ assert 'Adding database arctic_%s to DB %s' % (user, mongo_host) in out
+
+
+def test_create_user_dryrun_nodb(mongo_host, mongodb):
+ user = 'user'
+ pwd = 'password'
+ stderr = StringIO()
+ stdout = StringIO()
+ with patch('arctic.scripts.arctic_create_user.get_auth',
+ return_value=Credential('admin', 'adminuser', 'adminpwd')), \
+ patch('pymongo.database.Database.authenticate', return_value=True), \
+ patch('sys.stderr', stderr), \
+ patch('sys.stdout', stdout):
+ run_as_main(mcu.main, '--host', mongo_host, user, '--pass', pwd, '--dryrun', '--nodb')
+ out = stdout.getvalue()
+ assert 'DRYRUN: add user %s readonly True nodb True' % (user) in out
+
+def test_create_user_no_passwd(mongo_host, mongodb):
+ user = 'user'
+ pwd = None
+ newpwd = 'newpasswd'
+ stdout = StringIO()
+ with patch('arctic.scripts.arctic_create_user.get_auth',
+ return_value=Credential('admin', 'adminuser', 'adminpwd')), \
+ patch('pymongo.database.Database.authenticate',return_value=True), \
+ patch('base64.b64encode',return_value=newpwd), \
+ patch('sys.stdout', stdout):
+ run_as_main(mcu.main, '--host', mongo_host, user)
+ out = stdout.getvalue()
+ assert '%-16s %s' % (user,newpwd) in out
+
+
+def test_create_user_no_creds(mongo_host, mongodb):
+ stderr = StringIO()
+ with patch('arctic.scripts.arctic_create_user.get_auth', return_value=None), \
+ patch('sys.stderr', stderr):
+ run_as_main(mcu.main, '--host', mongo_host)
+ err = stderr.getvalue()
+ assert 'You have no admin credentials' in err
+
+
+def test_create_user_auth_fail(mongo_host):
+ stderr = StringIO()
+ with patch('arctic.scripts.arctic_create_user.get_auth', return_value=Credential('admin', 'user', 'pass')), \
+ patch('pymongo.database.Database.authenticate', return_value=False), \
+ patch('sys.stderr', stderr):
+ run_as_main(mcu.main, '--host', mongo_host)
+ err = stderr.getvalue()
+ assert 'Failed to authenticate' in err
diff --git a/tests/integration/scripts/test_delete_library.py b/tests/integration/scripts/test_delete_library.py
new file mode 100644
index 000000000..0ce944ffb
--- /dev/null
+++ b/tests/integration/scripts/test_delete_library.py
@@ -0,0 +1,61 @@
+import getpass
+import pytest
+
+from arctic.scripts import arctic_delete_library
+
+from ...util import run_as_main
+
+
+@pytest.fixture(scope='function')
+def library_name():
+ return 'user.library'
+
+
+@pytest.fixture(scope='function')
+def user_library_name():
+ return "{}.library".format(getpass.getuser())
+
+
+def test_delete_library(mongo_host, arctic, library, user_library):
+ assert 'user.library' in arctic.list_libraries()
+ assert '%s.library' % getpass.getuser() in arctic.list_libraries()
+ run_as_main(arctic_delete_library.main, '--host', mongo_host,
+ '--library', 'user.library')
+ assert 'user.library' not in arctic.list_libraries()
+ assert '%s.library' % getpass.getuser() in arctic.list_libraries()
+
+
+def test_delete_library1(mongo_host, arctic, library, user_library):
+ assert 'user.library' in arctic.list_libraries()
+ assert '%s.library' % getpass.getuser() in arctic.list_libraries()
+ run_as_main(arctic_delete_library.main, '--host', mongo_host,
+ '--library', 'arctic_user.library')
+ assert 'user.library' not in arctic.list_libraries()
+ assert '%s.library' % getpass.getuser() in arctic.list_libraries()
+
+
+def test_delete_library2(mongo_host, arctic, library, user_library):
+ assert 'user.library' in arctic.list_libraries()
+ assert '%s.library' % getpass.getuser() in arctic.list_libraries()
+ run_as_main(arctic_delete_library.main, '--host', mongo_host,
+ '--library', 'arctic_%s.library' % getpass.getuser())
+ assert 'user.library' in arctic.list_libraries()
+ assert '%s.library' % getpass.getuser() not in arctic.list_libraries()
+
+
+def test_delete_library3(mongo_host, arctic, library, user_library):
+ assert 'user.library' in arctic.list_libraries()
+ assert '%s.library' % getpass.getuser() in arctic.list_libraries()
+ run_as_main(arctic_delete_library.main, '--host', mongo_host,
+ '--library', '%s.library' % getpass.getuser())
+ assert 'user.library' in arctic.list_libraries()
+ assert '%s.library' % getpass.getuser() not in arctic.list_libraries()
+
+
+def test_delete_library_doesnt_exist(mongo_host, arctic, library, user_library):
+ assert 'user.library' in arctic.list_libraries()
+ assert '%s.library' % getpass.getuser() in arctic.list_libraries()
+ run_as_main(arctic_delete_library.main, '--host', mongo_host,
+ '--library', 'arctic_nosuchlibrary.missing')
+ assert 'user.library' in arctic.list_libraries()
+ assert '%s.library' % getpass.getuser() in arctic.list_libraries()
diff --git a/tests/integration/scripts/test_enable_sharding.py b/tests/integration/scripts/test_enable_sharding.py
new file mode 100644
index 000000000..ec59f3a3f
--- /dev/null
+++ b/tests/integration/scripts/test_enable_sharding.py
@@ -0,0 +1,44 @@
+from mock import patch, Mock, call
+import getpass
+import pytest
+from pymongo.errors import OperationFailure
+from pymongo.read_preferences import Primary
+
+from arctic.hooks import get_mongodb_uri
+from arctic.scripts import arctic_enable_sharding as mes
+
+from ...util import run_as_main
+
+
+def test_enable_sharding(mongo_host, arctic, mongodb, user_library, user_library_name):
+ c = mongodb
+ with patch.object(c, 'admin') as admin:
+ with patch('pymongo.MongoClient', return_value=c) as mc:
+ run_as_main(mes.main, '--host', mongo_host, '--library', user_library_name)
+ assert mc.call_args_list == [call(get_mongodb_uri(mongo_host))]
+ assert admin.command.call_args_list == [call('buildinfo', read_preference=Primary()),
+ call('enablesharding', 'arctic_' + getpass.getuser()),
+ call('shardCollection', 'arctic_' + user_library_name, key={'symbol': 1})]
+
+
+def test_enable_sharding_already_on_db(mongo_host, arctic, mongodb, user_library, user_library_name):
+ c = mongodb
+ with patch.object(c, 'admin') as admin:
+ admin.command = Mock(return_value=[OperationFailure("failed: already enabled"),
+ None])
+ with patch('pymongo.MongoClient', return_value=c) as mc:
+ run_as_main(mes.main, '--host', mongo_host, '--library', user_library_name)
+ assert mc.call_args_list == [call(get_mongodb_uri(mongo_host))]
+ assert admin.command.call_args_list == [call('buildinfo', read_preference=Primary()),
+ call('enablesharding', 'arctic_' + getpass.getuser()),
+ call('shardCollection', 'arctic_' + user_library_name, key={'symbol': 1})]
+
+
+def test_enable_sharding_on_db_other_failure(mongo_host, arctic, mongodb, user_library, user_library_name):
+ # Create the user agains the current mongo database
+ c = mongodb
+ with pytest.raises(OperationFailure):
+ with patch.object(c, 'admin') as admin:
+ with patch('pymongo.MongoClient', return_value=c):
+ admin.command = Mock(side_effect=OperationFailure('OOPS'))
+ run_as_main(mes.main, '--host', mongo_host, '--library', user_library_name)
diff --git a/tests/integration/scripts/test_initialize_library.py b/tests/integration/scripts/test_initialize_library.py
new file mode 100644
index 000000000..57ec93419
--- /dev/null
+++ b/tests/integration/scripts/test_initialize_library.py
@@ -0,0 +1,41 @@
+from mock import patch
+import pytest
+
+from arctic.auth import Credential
+from arctic.arctic import Arctic
+from arctic.scripts import arctic_init_library as mil
+
+from ...util import run_as_main
+
+
+def test_init_library(mongo_host):
+ # Create the user agains the current mongo database
+ with patch('arctic.scripts.arctic_init_library.do_db_auth', return_value=True), \
+ patch('pymongo.database.Database.authenticate', return_value=True):
+ run_as_main(mil.main, '--host', mongo_host, '--library', 'arctic_user.library')
+
+ # Should be able to write something to the library now
+ store = Arctic(mongo_host)
+ assert store['user.library']._arctic_lib.get_library_metadata('QUOTA') == 10240 * 1024 * 1024
+ store['user.library'].write('key', {'a': 'b'})
+ assert store['user.library'].read('key').data == {'a': 'b'}
+
+
+def test_init_library_quota(mongo_host):
+ # Create the user agains the current mongo database
+ with patch('arctic.scripts.arctic_init_library.do_db_auth', return_value=True), \
+ patch('pymongo.database.Database.authenticate', return_value=True):
+ run_as_main(mil.main, '--host', mongo_host, '--library', 'arctic_user.library', '--quota', '100')
+
+ # Should be able to write something to the library now
+ store = Arctic(mongo_host)
+ assert store['user.library']._arctic_lib.get_library_metadata('QUOTA') == 100 * 1024 * 1024 * 1024
+
+
+def test_init_library_bad_library(mongo_host):
+ with pytest.raises(Exception):
+ with patch('arctic.arctic.get_auth', return_value=Credential('admin', 'adminuser', 'adminpwd', 'admin')), \
+ patch('pymongo.database.Database.authenticate', return_value=True), \
+ patch('argparse.ArgumentParser.error', side_effect=Exception):
+ # Create the user agains the current mongo database
+ run_as_main(mil.main, '--host', mongo_host, '--library', 'user')
diff --git a/tests/integration/scripts/test_list_libraries.py b/tests/integration/scripts/test_list_libraries.py
new file mode 100644
index 000000000..113f73a2e
--- /dev/null
+++ b/tests/integration/scripts/test_list_libraries.py
@@ -0,0 +1,28 @@
+from mock import patch, call
+import pytest
+
+from arctic.scripts import arctic_list_libraries
+
+from ...util import run_as_main
+
+
+def test_list_library(mongo_host, library, library_name):
+ with patch('arctic.scripts.arctic_list_libraries.print') as p:
+ run_as_main(arctic_list_libraries.main, "--host", mongo_host)
+ for x in p.call_args_list:
+ if x == call(library_name):
+ return
+ assert False, "Failed to find a library"
+
+
+def test_list_library_args(mongo_host, library, library_name):
+ with patch('arctic.scripts.arctic_list_libraries.print') as p:
+ run_as_main(arctic_list_libraries.main, "--host", mongo_host, library_name[:2])
+ for x in p.call_args_list:
+ assert x[0][0].startswith(library_name[:2])
+
+
+def test_list_library_args_not_found(mongo_host, library, library_name):
+ with patch('arctic.scripts.arctic_list_libraries.print') as p:
+ run_as_main(arctic_list_libraries.main, "--host", mongo_host, 'some_library_which_doesnt_exist')
+ assert p.call_count == 0
diff --git a/tests/integration/scripts/test_prune_versions.py b/tests/integration/scripts/test_prune_versions.py
new file mode 100644
index 000000000..8acd98ae0
--- /dev/null
+++ b/tests/integration/scripts/test_prune_versions.py
@@ -0,0 +1,39 @@
+from mock import patch, ANY, call
+
+from arctic.auth import Credential
+from arctic.scripts import arctic_prune_versions as mpv
+
+from ...util import run_as_main
+
+
+def test_prune_versions_symbol(mongo_host, library, library_name):
+ with patch('arctic.scripts.arctic_prune_versions.prune_versions', autospec=True) as prune_versions, \
+ patch('arctic.scripts.utils.get_auth', return_value=Credential('admin', 'adminuser', 'adminpwd')), \
+ patch('pymongo.database.Database.authenticate', return_value=True):
+
+ run_as_main(mpv.main, '--host', mongo_host, '--library', library_name, '--symbols', 'sym1,sym2')
+ prune_versions.assert_has_calls([call(ANY, 'sym1', 10),
+ call(ANY, 'sym2', 10), ])
+
+
+def test_prune_versions_full(mongo_host, library, library_name):
+ with patch('arctic.scripts.arctic_prune_versions.do_db_auth', return_value=True):
+ # Write some stuff with snapshots
+ library.snapshot('snap')
+ library.write('symbol', "val1")
+ library.write('symbol', "val2")
+ library.snapshot('snap1')
+ library.write('symbol', "val3")
+
+ # Prune older than 10 mins - nothing deleted
+ run_as_main(mpv.main, '--host', mongo_host, '--library', library_name, '--keep-mins', 10)
+ assert [x['version'] for x in library.list_versions('symbol')] == [3, 2, 1]
+ # Prune older than 0 minutes, v1 deleted
+ run_as_main(mpv.main, '--host', mongo_host, '--library', library_name, '--keep-mins', 0)
+ assert [x['version'] for x in library.list_versions('symbol')] == [3, 2]
+
+ # Delete the snapshots
+ library.delete_snapshot('snap')
+ library.delete_snapshot('snap1')
+ run_as_main(mpv.main, '--host', mongo_host, '--library', library_name, '--keep-mins', 0)
+ assert [x['version'] for x in library.list_versions('symbol')] == [3]
diff --git a/tests/integration/store/__init__.py b/tests/integration/store/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/integration/store/test_ndarray_store.py b/tests/integration/store/test_ndarray_store.py
new file mode 100644
index 000000000..ff061005f
--- /dev/null
+++ b/tests/integration/store/test_ndarray_store.py
@@ -0,0 +1,380 @@
+import bson
+from bson import ObjectId, Binary
+import datetime
+from datetime import datetime as dt, timedelta as dtd
+from mock import patch
+import numpy as np
+from numpy.testing import assert_equal
+import os
+from pymongo.server_type import SERVER_TYPE
+import pytest
+import time
+
+from arctic.store._ndarray_store import NdarrayStore, _APPEND_COUNT
+from arctic.store.version_store import register_versioned_storage
+
+from tests.integration.store.test_version_store import _query
+
+
+register_versioned_storage(NdarrayStore)
+
+
+def test_save_read_simple_ndarray(library):
+ ndarr = np.ones(1000)
+ library.write('MYARR', ndarr)
+ saved_arr = library.read('MYARR').data
+ assert np.all(ndarr == saved_arr)
+
+
+def test_read_simple_ndarray_from_secondary(library_secondary, library_name):
+ ndarr = np.ones(1000)
+ library_secondary.write('MYARR', ndarr)
+ with patch('pymongo.message.query', side_effect=_query(True, library_name)) as query, \
+ patch('pymongo.server_description.ServerDescription.server_type', SERVER_TYPE.Mongos):
+ saved_arr = library_secondary.read('MYARR').data
+ assert query.call_count > 0
+ assert np.all(ndarr == saved_arr)
+
+
+def test_save_read_big_1darray(library):
+ ndarr = np.random.rand(5326, 6020).ravel()
+ library.write('MYARR', ndarr)
+ saved_arr = library.read('MYARR').data
+ assert np.all(ndarr == saved_arr)
+
+
+def test_save_and_resave_reuses_chunks(library):
+ with patch('arctic.store._ndarray_store._CHUNK_SIZE', 1000):
+ ndarr = np.random.rand(1024)
+ library.write('MYARR', ndarr)
+ saved_arr = library.read('MYARR').data
+ assert np.all(ndarr == saved_arr)
+ orig_chunks = library._collection.count()
+ assert orig_chunks == 9
+
+ # Concatenate more values
+ ndarr = np.concatenate([ndarr, np.random.rand(10)])
+ # And change the original values - we're not a simple append
+ ndarr[0] = ndarr[1] = ndarr[2] = 0
+ library.write('MYARR', ndarr)
+ saved_arr = library.read('MYARR').data
+ assert np.all(ndarr == saved_arr)
+
+ # Should contain the original chunks, but not double the number
+ # of chunks
+ new_chunks = library._collection.count()
+ assert new_chunks == 11
+
+ # We hit the update (rather than upsert) code path
+ assert library._collection.find({'parent': {'$size': 2}}).count() == 7
+
+
+def test_save_read_big_2darray(library):
+ ndarr = np.random.rand(5326, 6020)
+ library.write('MYARR', ndarr)
+ saved_arr = library.read('MYARR').data
+ assert np.all(ndarr == saved_arr)
+
+
+def test_get_info_bson_object(library):
+ ndarr = np.ones(1000)
+ library.write('MYARR', ndarr)
+ assert library._get_info('MYARR').startswith('''Handler: NdarrayStore''')
+
+
+def test_save_read_ndarray_with_array_field(library):
+ ndarr = np.empty(10, dtype=[('A', 'int64'), ('B', 'float64', (2,))])
+ ndarr['A'] = 1
+ ndarr['B'] = 2
+ library.write('MYARR', ndarr)
+ saved_arr = library.read('MYARR').data
+ assert np.all(ndarr == saved_arr)
+
+
+def test_append_ndarray_with_field_shape(library):
+ ndarr = np.empty(10, dtype=[('A', 'int64'), ('B', 'float64', (2,))])
+ ndarr['A'] = 1
+ ndarr['B'] = 2
+ ndarr2 = np.empty(10, dtype=[('A', 'int64'), ('B', 'int64', (2,))])
+ ndarr2['A'] = 1
+ ndarr2['B'] = 2
+
+ library.write('MYARR', ndarr)
+ library.append('MYARR', ndarr2)
+ saved_arr = library.read('MYARR').data
+ ndarr3 = np.empty(20, dtype=[('A', 'int64'), ('B', 'float64', (2,))])
+ ndarr3['A'] = 1
+ ndarr3['B'] = 2
+ assert np.all(ndarr3 == saved_arr)
+
+
+def test_append_simple_ndarray(library):
+ ndarr = np.ones(1000, dtype='int64')
+ library.write('MYARR', ndarr)
+ library.append('MYARR', np.ones(1000, dtype='int64'))
+ library.append('MYARR', np.ones(1000, dtype='int64'))
+ library.append('MYARR', np.ones(2005, dtype='int64'))
+ saved_arr = library.read('MYARR').data
+ assert np.all(np.ones(5005, dtype='int64') == saved_arr)
+
+
+def test_append_simple_ndarray_promoting_types(library):
+ ndarr = np.ones(100, dtype='int64')
+ library.write('MYARR', ndarr)
+ library.append('MYARR', np.ones(100, dtype='float64'))
+ library.append('MYARR', np.ones(100, dtype='int64'))
+ library.append('MYARR', np.ones(205, dtype='float64'))
+ saved_arr = library.read('MYARR').data
+ assert np.all(np.ones(505, dtype='float64') == saved_arr)
+
+
+def test_save_read_ndarray(library):
+ ndarr = np.empty(1000, dtype=[('abc', 'int64')])
+ library.write('MYARR', ndarr)
+ saved_arr = library.read('MYARR').data
+ assert np.all(ndarr == saved_arr)
+
+
+def test_multiple_write(library):
+ ndarr = np.empty(1000, dtype=[('abc', 'int64')])
+ foo = np.empty(900, dtype=[('abc', 'int64')])
+ library.write('MYARR', foo)
+ v1 = library.read('MYARR').version
+ library.write('MYARR', ndarr[:900])
+ v2 = library.read('MYARR').version
+ library.append('MYARR', ndarr[-100:])
+ v3 = library.read('MYARR').version
+
+ assert np.all(ndarr == library.read('MYARR').data)
+ assert np.all(ndarr == library.read('MYARR', as_of=v3).data)
+ assert np.all(foo == library.read('MYARR', as_of=v1).data)
+ assert np.all(ndarr[:900] == library.read('MYARR', as_of=v2).data)
+
+
+def test_cant_write_objects():
+ store = NdarrayStore()
+ assert not store.can_write(None, None, np.array([object()]))
+
+
+def test_promote_types(library):
+ ndarr = np.empty(1000, dtype=[('abc', 'int64')])
+ library.write('MYARR', ndarr[:800])
+ library.append('MYARR', ndarr[-200:].astype([('abc', 'float64')]))
+ saved_arr = library.read('MYARR').data
+ assert np.all(ndarr.astype([('abc', 'float64')]) == saved_arr)
+
+
+def test_promote_types2(library):
+ ndarr = np.array(np.arange(1000), dtype=[('abc', 'float64')])
+ library.write('MYARR', ndarr[:800])
+ library.append('MYARR', ndarr[-200:].astype([('abc', 'int64')]))
+ saved_arr = library.read('MYARR').data
+ assert np.all(ndarr.astype([('abc', np.promote_types('float64', 'int64'))]) == saved_arr)
+
+
+def test_save_read_large_ndarray(library):
+ dtype = np.dtype([('abc', 'int64')])
+ ndarr = np.arange(30 * 1024 * 1024 / dtype.itemsize).view(dtype=dtype)
+ assert len(ndarr.tostring()) > 16 * 1024 * 1024
+ library.write('MYARR', ndarr)
+ saved_arr = library.read('MYARR').data
+ assert np.all(ndarr == saved_arr)
+
+def test_append_read_large_ndarray(library):
+ dtype = np.dtype([('abc', 'int64')])
+ ndarr = np.arange(50 * 1024 * 1024 / dtype.itemsize).view(dtype=dtype)
+ assert len(ndarr.tostring()) > 16 * 1024 * 1024
+ library.write('MYARR1', ndarr)
+ # Exactly enough appends to trigger 2 re-compacts, so the result should be identical
+ # to writing the whole array at once
+ ndarr2 = np.arange(240).view(dtype=dtype)
+ for n in np.split(ndarr2, 120):
+ library.append('MYARR1', n)
+
+ saved_arr = library.read('MYARR1').data
+ assert np.all(np.concatenate([ndarr, ndarr2]) == saved_arr)
+
+ library.write('MYARR2', np.concatenate([ndarr, ndarr2]))
+
+ version1 = library._read_metadata('MYARR1')
+ version2 = library._read_metadata('MYARR2')
+ assert version1['append_count'] == version2['append_count']
+ assert version1['append_size'] == version2['append_size']
+ assert version1['segment_count'] == version2['segment_count']
+ assert version1['up_to'] == version2['up_to']
+
+
+def test_save_append_read_ndarray(library):
+ dtype = np.dtype([('abc', 'int64')])
+ ndarr = np.arange(30 * 1024 * 1024 / dtype.itemsize).view(dtype=dtype)
+ assert len(ndarr.tostring()) > 16 * 1024 * 1024
+ library.write('MYARR', ndarr)
+
+ sliver = np.arange(30).view(dtype=dtype)
+ library.append('MYARR', sliver)
+
+ saved_arr = library.read('MYARR').data
+ assert np.all(np.concatenate([ndarr, sliver]) == saved_arr)
+
+ library.append('MYARR', sliver)
+ saved_arr = library.read('MYARR').data
+ assert np.all(np.concatenate([ndarr, sliver, sliver]) == saved_arr)
+
+
+def test_save_append_read_1row_ndarray(library):
+ dtype = np.dtype([('abc', 'int64')])
+ ndarr = np.arange(30 * 1024 * 1024 / dtype.itemsize).view(dtype=dtype)
+ assert len(ndarr.tostring()) > 16 * 1024 * 1024
+ library.write('MYARR', ndarr)
+
+ sliver = np.arange(1).view(dtype=dtype)
+ library.append('MYARR', sliver)
+
+ saved_arr = library.read('MYARR').data
+ assert np.all(np.concatenate([ndarr, sliver]) == saved_arr)
+
+ library.append('MYARR', sliver)
+ saved_arr = library.read('MYARR').data
+ assert np.all(np.concatenate([ndarr, sliver, sliver]) == saved_arr)
+
+
+def test_append_too_large_ndarray(library):
+ dtype = np.dtype([('abc', 'int64')])
+ ndarr = np.arange(30 * 1024 * 1024 / dtype.itemsize).view(dtype=dtype)
+ assert len(ndarr.tostring()) > 16 * 1024 * 1024
+ library.write('MYARR', ndarr)
+ library.append('MYARR', ndarr)
+ saved_arr = library.read('MYARR').data
+ assert np.all(np.concatenate([ndarr, ndarr]) == saved_arr)
+
+
+def test_empty_append_promotes_dtype(library):
+ ndarr = np.array(["a", "b", "c"])
+ ndarr2 = np.array([])
+ library.write('MYARR', ndarr)
+ library.append('MYARR', ndarr2)
+ saved_arr = library.read('MYARR').data
+ assert np.all(saved_arr == ndarr)
+
+
+def test_empty_append_promotes_dtype2(library):
+ ndarr = np.array([])
+ ndarr2 = np.array(["a", "b", "c"])
+ library.write('MYARR', ndarr)
+ library.append('MYARR', ndarr2)
+ saved_arr = library.read('MYARR').data
+ assert np.all(saved_arr == ndarr2)
+
+
+def test_empty_append_promotes_dtype3(library):
+ ndarr = np.array([])
+ ndarr2 = np.array(["a", "b", "c"])
+ library.write('MYARR', ndarr)
+ library.append('MYARR', ndarr2)
+ library.append('MYARR', ndarr)
+ library.append('MYARR', ndarr2)
+ saved_arr = library.read('MYARR').data
+ assert np.all(saved_arr == np.hstack((ndarr2, ndarr2)))
+
+
+def test_empty_append_concat_and_rewrite(library):
+ ndarr = np.array([])
+ ndarr2 = np.array(["a", "b", "c"])
+ library.write('MYARR', ndarr)
+ for _ in range(_APPEND_COUNT + 2):
+ library.append('MYARR', ndarr)
+ library.append('MYARR', ndarr2)
+ saved_arr = library.read('MYARR').data
+ assert np.all(saved_arr == ndarr2)
+
+
+def test_empty_append_concat_and_rewrite_2(library):
+ ndarr2 = np.array(["a", "b", "c"])
+ library.write('MYARR', ndarr2)
+ for _ in range(_APPEND_COUNT + 1):
+ library.append('MYARR', ndarr2)
+ saved_arr = library.read('MYARR').data
+ assert np.all(saved_arr == np.hstack([ndarr2] * (_APPEND_COUNT + 2)))
+
+
+def test_empty_append_concat_and_rewrite_3(library):
+ ndarr = np.array([])
+ ndarr2 = np.array(["a", "b", "c"])
+ library.write('MYARR', ndarr2)
+ for _ in range(_APPEND_COUNT + 1):
+ library.append('MYARR', ndarr)
+ saved_arr = library.read('MYARR').data
+ assert np.all(saved_arr == ndarr2)
+
+
+def test_append_with_extra_columns(library):
+ ndarr = np.array([(2.1, 1, "a")], dtype=[('C', np.float), ('B', np.int), ('A', 'S1')])
+ ndarr2 = np.array([("b", 2, 3.1, 'c', 4, 5.)], dtype=[('A', 'S1'), ('B', np.int), ('C', np.float),
+ ('D', 'S1'), ('E', np.int), ('F', np.float)])
+ expected = np.array([("a", 1, 2.1, '', 0, np.nan),
+ ("b", 2, 3.1, 'c', 4, 5.)],
+ dtype=np.dtype([('A', 'S1'), ('B', np.int), ('C', np.float),
+ ('D', 'S1'), ('E', np.int), ('F', np.float)]))
+ library.write('MYARR', ndarr)
+ library.append('MYARR', ndarr2)
+ saved_arr = library.read('MYARR').data
+
+ assert expected.dtype == saved_arr.dtype
+ assert_equal(expected.tolist(), saved_arr.tolist())
+
+
+def test_logging_of_bad_documents(library):
+ ndarr = np.array([(2.1, 1, "a")], dtype=[('C', np.float), ('B', np.int), ('A', 'S1')])
+ library.write('MYARR', ndarr)
+
+ doc = library._collection.find_one()
+ with patch('arctic.store._ndarray_store.decompress', side_effect=Exception("some-error")), \
+ patch('arctic.decorators.datetime') as dt, \
+ pytest.raises(Exception) as e:
+ dt.now.return_value = datetime.datetime(1970, 1, 1)
+ library.read('MYARR')
+ assert 'some-error' in str(e)
+ path = '/tmp/mongo_debug_' + str(os.getpid()) + '_' + str(doc['_id']) + '_1970-01-01 00:00:00'
+ with open(path, 'r') as f:
+ for l in f:
+ assert l.strip() == str(doc)
+ new_doc = eval(l.strip())
+ assert doc['data'] == new_doc['data']
+
+ os.remove(path)
+
+
+def test_save_append_delete_append(library):
+ dtype = np.dtype([('abc', 'int64')])
+ ndarr = np.arange(30 / dtype.itemsize).view(dtype=dtype)
+ v1 = library.write('MYARR', ndarr)
+
+ sliver = np.arange(30).view(dtype=dtype)
+ v2 = library.append('MYARR', sliver)
+
+ # intentionally leave an orphaned chunk lying around here
+ library._delete_version('MYARR', v2.version, do_cleanup=False)
+
+ sliver2 = np.arange(start=10, stop=40).view(dtype=dtype)
+ # we can't append here, as the latest version is now out of sync with version_nums.
+ # This gets translated to a do_append by the handler anyway.
+ v3 = library.write('MYARR', np.concatenate([ndarr, sliver2]))
+
+ assert np.all(ndarr == library.read('MYARR', as_of=v1.version).data)
+
+ # Check that we don't get the orphaned chunk from v2 back again.
+ assert np.all(np.concatenate([ndarr, sliver2]) == library.read('MYARR', as_of=v3.version).data)
+
+
+@pytest.mark.xfail(reason="delete_version not safe with append...")
+def test_delete_version_shouldnt_break_read(library):
+ data = np.arange(30)
+ yesterday = dt.utcnow() - dtd(days=1, seconds=1)
+ _id = bson.ObjectId.from_datetime(yesterday)
+ with patch("bson.ObjectId", return_value=_id):
+ library.write('symbol', data, prune_previous_version=False)
+
+ # Re-Write the data again
+ library.write('symbol', data, prune_previous_version=False)
+ library._delete_version('symbol', 1)
+ assert repr(library.read('symbol').data) == repr(data)
diff --git a/tests/integration/store/test_pandas_store.py b/tests/integration/store/test_pandas_store.py
new file mode 100644
index 000000000..d1d3b4197
--- /dev/null
+++ b/tests/integration/store/test_pandas_store.py
@@ -0,0 +1,601 @@
+from StringIO import StringIO
+from datetime import datetime as dt, timedelta as dtd
+from dateutil.rrule import rrule, DAILY
+from pandas import DataFrame, Series, DatetimeIndex, MultiIndex, read_csv, Panel
+from pandas.util.testing import assert_frame_equal
+import numpy as np
+import pytest
+import io
+import itertools
+from mock import Mock
+import string
+
+from arctic.store._pandas_ndarray_store import PandasDataFrameStore, PandasSeriesStore
+from arctic.store.version_store import register_versioned_storage
+
+register_versioned_storage(PandasDataFrameStore)
+
+
+def test_save_read_pandas_series(library):
+ s = Series(data=[1, 2, 3], index=[4, 5, 6])
+ library.write('pandas', s)
+ saved = library.read('pandas').data
+ assert np.all(s == saved)
+ assert saved.name == "values"
+
+
+def test_save_read_pandas_series_maintains_name(library):
+ s = Series(data=[1, 2, 3], index=[4, 5, 6], name="ADJ")
+ library.write('pandas', s)
+ saved = library.read('pandas').data
+ assert np.all(s == saved)
+ assert saved.name == "ADJ"
+
+
+def test_save_read_pandas_series_with_multiindex(library):
+ df = Series(data=['A', 'BC', 'DEF'], index=MultiIndex.from_tuples([(1, 2), (1, 3), (2, 2)]))
+ library.write('pandas', df)
+ saved_df = library.read('pandas').data
+ assert np.all(df.values == saved_df.values)
+
+
+def test_save_read_pandas_series_with_multiindex_and_name(library):
+ df = Series(data=['A', 'BC', 'DEF'],
+ index=MultiIndex.from_tuples([(1, 2), (1, 3), (2, 2)]),
+ name='Foo')
+ library.write('pandas', df)
+ saved_df = library.read('pandas').data
+ assert np.all(df.values == saved_df.values)
+ assert df.name == 'Foo'
+
+
+def test_save_read_pandas_series_with_unicode_index_name(library):
+ df = Series(data=['A', 'BC', 'DEF'],
+ index=MultiIndex.from_tuples([(np.datetime64(dt(2013, 1, 1)),),
+ (np.datetime64(dt(2013, 1, 2)),),
+ (np.datetime64(dt(2013, 1, 3)),)], names=[u'DATETIME']))
+ library.write('pandas', df)
+ saved_df = library.read('pandas').data
+ assert np.all(df.values == saved_df.values)
+
+
+def test_save_read_pandas_dataframe_with_multiindex(library):
+ df = DataFrame(data=['A', 'BC', 'DEF'], index=MultiIndex.from_tuples([(1, 2), (1, 3), (2, 2)]))
+ library.write('pandas', df)
+ saved_df = library.read('pandas').data
+ assert np.all(df.values == saved_df.values)
+
+
+def test_save_read_pandas_dataframe_with_none_values(library):
+ df = DataFrame(data=[(1, None), (1, 3), (2, 2)])
+ library.write('pandas', df)
+ saved_df = library.read('pandas').data
+ assert np.all((df.values == saved_df.values) | (np.isnan(df.values) & np.isnan(saved_df.values)))
+
+
+def test_save_read_pandas_dataframe_with_unicode_index_name(library):
+ df = DataFrame(data=['A', 'BC', 'DEF'],
+ index=MultiIndex.from_tuples([(np.datetime64(dt(2013, 1, 1)),),
+ (np.datetime64(dt(2013, 1, 2)),),
+ (np.datetime64(dt(2013, 1, 3)),)], names=[u'DATETIME']))
+ library.write('pandas', df)
+ saved_df = library.read('pandas').data
+ assert np.all(df.values == saved_df.values)
+
+
+
+def test_cant_write_pandas_series_with_tuple_values(library):
+ df = Series(data=[('A', 'BC')], index=np.array([dt(2013, 1, 1), ]).astype('datetime64[ns]'))
+ assert PandasSeriesStore().can_write(Mock(), 'FOO', df) == False
+
+
+def test_save_read_pandas_series_with_datetimeindex_with_timezone(library):
+ df = Series(data=['A', 'BC', 'DEF'], index=DatetimeIndex(np.array([dt(2013, 1, 1),
+ dt(2013, 1, 2),
+ dt(2013, 1, 3)]).astype('datetime64[ns]'),
+ tz="America/Chicago"))
+ library.write('pandas', df)
+ saved_df = library.read('pandas').data
+ assert df.index.tz == saved_df.index.tz
+ assert all(df.index == saved_df.index)
+
+
+def test_save_read_pandas_series_with_datetimeindex(library):
+ df = Series(data=['A', 'BC', 'DEF'], index=np.array([dt(2013, 1, 1),
+ dt(2013, 1, 2),
+ dt(2013, 1, 3)]).astype('datetime64[ns]'))
+ library.write('pandas', df)
+ saved_df = library.read('pandas').data
+ assert np.all(df.index == saved_df.index)
+ assert np.all(df.values == saved_df.values)
+
+
+def test_save_read_pandas_dataframe_with_datetimeindex_with_timezone(library):
+ df = DataFrame(data=['A', 'BC', 'DEF'], index=DatetimeIndex(np.array([dt(2013, 1, 1),
+ dt(2013, 1, 2),
+ dt(2013, 1, 3)]).astype('datetime64[ns]'),
+ tz="America/Chicago"))
+ library.write('pandas', df)
+ saved_df = library.read('pandas').data
+ assert df.index.tz == saved_df.index.tz
+ assert all(df.index == saved_df.index)
+
+
+def test_save_read_pandas_dataframe_with_datetimeindex(library):
+ df = DataFrame(data=['A', 'BC', 'DEF'], index=np.array([dt(2013, 1, 1),
+ dt(2013, 1, 2),
+ dt(2013, 1, 3)]).astype('datetime64[ns]'))
+ library.write('pandas', df)
+ saved_df = library.read('pandas').data
+ assert np.all(df.index == saved_df.index)
+ assert np.all(df.values == saved_df.values)
+
+
+def test_save_read_pandas_dataframe_with_strings(library):
+ df = DataFrame(data=['A', 'BC', 'DEF'], index=[4, 5, 6])
+ library.write('pandas', df)
+ saved_df = library.read('pandas').data
+ assert np.all(df.values == saved_df.values)
+
+
+def test_save_read_pandas_dataframe(library):
+ df = DataFrame(data=[1, 2, 3], index=[4, 5, 6])
+ library.write('pandas', df)
+ saved_df = library.read('pandas').data
+ assert np.all(df.values == saved_df.values)
+
+
+def test_save_read_empty_dataframe(library):
+ df = DataFrame({'a': [], 'b': []})
+ library.write('pandas', df)
+ saved_df = library.read('pandas').data
+ assert np.all(df.values == saved_df.values)
+
+
+def test_save_read_pandas_dataframe2(library):
+ df = DataFrame(data=[1, 2, 3], index=DatetimeIndex(start='1/1/2011', periods=3, freq='H'))
+ library.write('pandas', df)
+ saved_df = library.read('pandas').data
+ assert np.all(df.values == saved_df.values)
+
+
+def test_save_read_pandas_dataframe_strings(library):
+ df = DataFrame(data=['a', 'b', 'c'], index=DatetimeIndex(start='1/1/2011', periods=3, freq='H'))
+ library.write('pandas', df)
+ saved_df = library.read('pandas').data
+ assert np.all(df.values == saved_df.values)
+
+
+def test_save_read_pandas_dataframe_empty_multiindex(library):
+ expected = read_csv(io.BytesIO('''\
+STRATEGY MAC INSTRUMENT CONTRACT $Price $Delta $Gamma $Vega $Theta $Notional uDelta uGamma uVega uTheta Delta Gamma Vega Theta'''),
+ delimiter=' ').set_index(['STRATEGY', 'MAC', 'INSTRUMENT', 'CONTRACT'])
+ library.write('pandas', expected)
+ saved_df = library.read('pandas').data
+ assert np.all(expected.values == saved_df.values)
+ assert np.all(expected.index.names == saved_df.index.names)
+
+
+def test_save_read_pandas_dataframe_empty_multiindex_and_no_columns(library):
+ expected = read_csv(io.BytesIO('''STRATEGY MAC INSTRUMENT CONTRACT'''),
+ delimiter=' ').set_index(['STRATEGY', 'MAC', 'INSTRUMENT', 'CONTRACT'])
+ library.write('pandas', expected)
+ saved_df = library.read('pandas').data
+ assert np.all(expected.values == saved_df.values)
+ assert np.all(expected.index.names == saved_df.index.names)
+
+
+def test_save_read_pandas_dataframe_multiindex_and_no_columns(library):
+ expected = read_csv(io.BytesIO('''\
+STRATEGY MAC INSTRUMENT CONTRACT
+STRAT F22 ASD 201312'''),
+ delimiter=' ').set_index(['STRATEGY', 'MAC', 'INSTRUMENT', 'CONTRACT'])
+ library.write('pandas', expected)
+ saved_df = library.read('pandas').data
+ assert np.all(expected.values == saved_df.values)
+ assert np.all(expected.index.names == saved_df.index.names)
+
+
+def test_append_pandas_dataframe(library):
+ df = DataFrame(data=[1, 2, 3], index=DatetimeIndex(start='1/1/2011', periods=3, freq='H'))
+ df2 = DataFrame(data=[4, 5, 6], index=DatetimeIndex(start='2/1/2011', periods=3, freq='H'))
+ library.write('pandas', df)
+ library.append('pandas', df2)
+ saved_df = library.read('pandas').data
+ assert np.all(df.append(df2).values == saved_df.values)
+
+
+def test_empty_dataframe_multindex(library):
+ df = DataFrame({'a': [], 'b': [], 'c': []})
+ df = df.groupby(['a', 'b']).sum()
+ print df
+ library.write('pandas', df)
+ saved_df = library.read('pandas').data
+ assert np.all(df.values == saved_df.values)
+ assert np.all(df.index.names == df.index.names)
+
+
+def test_dataframe_append_empty(library):
+ df = DataFrame(data=[1, 2, 3], index=DatetimeIndex(start='1/1/2011', periods=3, freq='H'))
+ df2 = DataFrame(data=[], index=[])
+ library.write('pandas', df)
+ library.append('pandas', df2)
+ saved_df = library.read('pandas').data
+ assert np.all(df.append(df2).values == saved_df.values)
+
+
+def test_empy_dataframe_append(library):
+ df = DataFrame(data=[], index=[])
+ df2 = DataFrame(data=[1, 2, 3], index=DatetimeIndex(start='1/1/2011', periods=3, freq='H'))
+ library.write('pandas', df)
+ library.append('pandas', df2)
+ saved_df = library.read('pandas').data
+ assert np.all(df.append(df2).values == saved_df.values)
+
+
+def test_dataframe_append_empty_multiindex(library):
+ df = DataFrame({'a': [1, 1, 1], 'b': [1, 1, 2], 'c': [1, 2, 3]}).groupby(['a', 'b']).sum()
+ df2 = DataFrame({'a': [], 'b': [], 'c': []}).groupby(['a', 'b']).sum()
+ library.write('pandas', df)
+ library.append('pandas', df2)
+ saved_df = library.read('pandas').data
+ assert np.all(df.append(df2).values == saved_df.values)
+ assert np.all(df.index.names == saved_df.index.names)
+
+
+def test_empty_dataframe_append_multiindex(library):
+ df = DataFrame({'a': [], 'b': [], 'c': []}).groupby(['a', 'b']).sum()
+ df2 = DataFrame({'a': [1, 1, 1], 'b': [1, 1, 2], 'c': [1, 2, 3]}).groupby(['a', 'b']).sum()
+ library.write('pandas', df)
+ library.append('pandas', df2)
+ saved_df = library.read('pandas').data
+ assert np.all(df.append(df2).values == saved_df.values)
+ assert np.all(df.index.names == saved_df.index.names)
+
+
+def test_empty_dataframe_should_ignore_dtype(library):
+ df = DataFrame({'a': [], 'b': [], 'c': []}).groupby(['a', 'b']).sum()
+ df2 = DataFrame({'a': [1, 1, 1], 'b': [1, 1, 2], 'c': [1, 2, 3]}).groupby(['a']).sum()
+ library.write('pandas', df)
+ library.append('pandas', df2)
+ saved_df = library.read('pandas').data
+ assert np.all(df2.index.names == saved_df.index.names)
+
+
+def test_empty_dataframe_should_ignore_dtype2(library):
+ df = DataFrame({'a': []})
+ df2 = DataFrame({'a': [1, 1, 1], 'b': [1, 1, 2], 'c': [1, 2, 3]}).groupby(['a']).sum()
+ library.write('pandas', df)
+ library.append('pandas', df2)
+ saved_df = library.read('pandas').data
+ assert np.all(df2.values == saved_df.values)
+ assert np.all(df2.index.names == saved_df.index.names)
+
+
+def test_dataframe_append_should_promote_string_column(library):
+ data = np.zeros((2,), dtype=[('A', 'i4'), ('B', 'f4'), ('C', 'a10')])
+ data[:] = [(1, 2., 'Hello'), (2, 3., "World")]
+ df = DataFrame(data, index=MultiIndex.from_tuples([(np.datetime64(dt(2013, 1, 1)),),
+ (np.datetime64(dt(2013, 1, 2)),), ], names=[u'DATETIME']))
+ data2 = np.zeros((1,), dtype=[('A', 'i4'), ('B', 'f4'), ('C', 'a30')])
+ data2[:] = [(3, 4., 'Hello World - Good Morning')]
+ df2 = DataFrame(data2, index=MultiIndex.from_tuples([(np.datetime64(dt(2013, 1, 3)),)], names=[u'DATETIME']))
+ expected_data = np.zeros((3,), dtype=[('A', 'i4'), ('B', 'f4'), ('C', 'a30')])
+ expected_data[:] = [(1, 2., 'Hello'), (2, 3., "World"), (3, 4., 'Hello World - Good Morning')]
+ expected = DataFrame(expected_data, MultiIndex.from_tuples([(np.datetime64(dt(2013, 1, 1)),),
+ (np.datetime64(dt(2013, 1, 2)),),
+ (np.datetime64(dt(2013, 1, 3)),)],
+ names=[u'DATETIME']))
+
+ library.write('pandas', df)
+ library.append('pandas', df2)
+ actual = library.read('pandas').data
+
+ assert_frame_equal(expected, actual)
+
+
+def test_dataframe_append_should_add_new_column(library):
+ data = np.zeros((2,), dtype=[('A', 'i4'), ('B', 'f4'), ('C', 'a10')])
+ data[:] = [(1, 2., 'Hello'), (2, 3., "World")]
+ df = DataFrame(data, index=MultiIndex.from_tuples([(np.datetime64(dt(2013, 1, 1)),),
+ (np.datetime64(dt(2013, 1, 2)),), ], names=[u'DATETIME']))
+ data2 = np.zeros((1,), dtype=[('A', 'i4'), ('B', 'f4'), ('C', 'a10'), ('D', 'f4')])
+ data2[:] = [(4, 5., 'Hi', 6.)]
+ df2 = DataFrame(data2, index=MultiIndex.from_tuples([(np.datetime64(dt(2013, 1, 3)),)], names=[u'DATETIME']))
+ expected_data = np.zeros((3,), dtype=[('A', 'i4'), ('B', 'f4'), ('C', 'a10'), ('D', 'f4')])
+ expected_data[:] = [(1, 2., 'Hello', np.nan), (2, 3., "World", np.nan), (4, 5., 'Hi', 6.)]
+ expected = DataFrame(expected_data, MultiIndex.from_tuples([(np.datetime64(dt(2013, 1, 1)),),
+ (np.datetime64(dt(2013, 1, 2)),),
+ (np.datetime64(dt(2013, 1, 3)),)],
+ names=[u'DATETIME']))
+
+ library.write('pandas', df)
+ library.append('pandas', df2)
+ actual = library.read('pandas').data
+
+ assert_frame_equal(expected, actual)
+
+
+def test_dataframe_append_should_add_new_columns_and_reorder(library):
+ data = np.zeros((2,), dtype=[('A', 'i4'), ('B', 'f4'), ('C', 'a10')])
+ data[:] = [(1, 2., 'Hello'), (2, 3., "World")]
+ df = DataFrame(data, index=MultiIndex.from_tuples([(np.datetime64(dt(2013, 1, 1)),),
+ (np.datetime64(dt(2013, 1, 2)),), ], names=[u'DATETIME']))
+ data2 = np.zeros((1,), dtype=[('C', 'a10'), ('A', 'i4'), ('E', 'a1'), ('B', 'f4'), ('D', 'f4'), ('F', 'i4')])
+ data2[:] = [('Hi', 4, 'Y', 5., 6., 7)]
+ df2 = DataFrame(data2, index=MultiIndex.from_tuples([(np.datetime64(dt(2013, 1, 3)),)], names=[u'DATETIME']))
+ expected_data = np.zeros((3,), dtype=[('C', 'a10'), ('A', 'i4'), ('E', 'a1'),
+ ('B', 'f4'), ('D', 'f4'), ('F', 'i4')])
+ expected_data[:] = [('Hello', 1, '', 2., np.nan, 0), ("World", 2, '', 3., np.nan, 0), ('Hi', 4, 'Y', 5., 6., 7)]
+ expected = DataFrame(expected_data, MultiIndex.from_tuples([(np.datetime64(dt(2013, 1, 1)),),
+ (np.datetime64(dt(2013, 1, 2)),),
+ (np.datetime64(dt(2013, 1, 3)),)],
+ names=[u'DATETIME']))
+
+ library.write('pandas', df)
+ library.append('pandas', df2)
+ actual = library.read('pandas').data
+
+ assert_frame_equal(expected, actual)
+
+
+# -- auto generated tests --- #
+def dataframe(columns, length, index):
+ df = DataFrame(np.ones((length, columns)), columns=list(string.ascii_lowercase[:columns]))
+ index = min(index, columns)
+ if index:
+ df = df.set_index(list(string.ascii_lowercase[:index]))
+ return df
+
+
+
+@pytest.mark.parametrize("df_size", list(itertools.combinations_with_replacement([0, 1, 2, 4], r=3)))
+def test_dataframe_save_read(library, df_size):
+ df = dataframe(*df_size)
+ library.write('pandas', df)
+ result = library.read('pandas').data
+ assert np.all(df.values == result.values), str(df.values) + "!=" + str(result.values)
+ if None not in df.index.names: # saved as 'index' or 'level'
+ assert np.all(df.index.names == result.index.names), str(df.index.names) + "!=" + str(result.index.names)
+ assert np.all(df.index.values == result.index.values), str(df.index.values) + "!=" + str(result.index.values)
+ assert np.all(df.columns.values == result.columns.values), str(df.columns.values) + "!=" + str(result.columns.values)
+
+
+@pytest.mark.parametrize("df_size", list(itertools.combinations_with_replacement([0, 1, 2, 4], r=3)))
+def test_dataframe_save_append_read(library, df_size):
+ df = dataframe(*df_size)
+ library.write('pandas', df)
+ library.append('pandas', df)
+ result = library.read('pandas').data
+ assert len(result) == len(df) * 2
+ if None not in df.index.names: # saved as 'index' or 'level'
+ assert np.all(df.index.names == result.index.names), str(df.index.names) + "!=" + str(result.index.names)
+ assert np.all(df.columns.values == result.columns.values), str(df.columns.values) + "!=" + str(result.columns.values)
+
+
+
+def test_large_dataframe_append_rewrite_same_item(library):
+ csv = \
+"""index, f1, f2, f3, f4, f5, f6, f7, f8, iVol, tau, uPrice, uDelta, uGamma, uVega, uTheta, Delta, Gamma, Vega, Theta, $Price, $Delta, $Gamma, $Vega, $Theta, $Time_Value, $Notional, FX, f9
+0, 201401, 2013 - 12 - 20 16:15:00, 15.0, F1, CALL, STRAT, 140.0, 140.345, 0.07231398622706062, 0.008813407863715872, 0.5768068954653813, 0.6427860135978315, 0.391592427081917, 4.915801583071703, -20.166163353481476, 9.641790203967473, 5.873886406228755, 73.73702374607555, -302.49245030222215, 11909.274289984183, 18625.940769791625, 15925.131550993763, 1014.9606370552315, -1601.4183005499872, 4786.093789984206, 2897689.1805000002, 1.37646, SYM
+1, 201401, 2013 - 12 - 20 16:15:00, 15.0, F1, PUT, STRAT, 140.0, 140.345, 0.07231398622706062, 0.008813407863715872, 0.2318116692147143, -0.357200149447554, 0.391592427081917, 4.915801583071703, -20.16670499598669, -5.358002241713311, 5.873886406228755, 73.73702374607555, -302.50057493980034, 4786.192353109285, -10350.550083271604, 15925.131550993763, 1014.9606370552315, -1601.4613130062987, 4786.192353109285, 2897689.1805000002, 1.37646, SYM
+2, 201401, 2013 - 12 - 20 16:15:00, -48.0, F22, CALL, STRAT, 141.5, 140.345, 0.0739452718231504, 0.008813407863715872, 0.05709601681178711, 0.11956012929302556, 0.20479158314197934, 2.628816497069195, -11.027911868706408, -5.738886206065227, -9.829995990815009, -126.18319185932137, 529.3397696979075, -3772.3383984361194, -11086.338978290602, -26650.835319775462, -1736.8611626668148, 2802.3654592245452, -3772.3383984361194, -9272605.3776, 1.37646, SYM
+3, 201402, 2014 - 01 - 24 16:15:00, -286.0, F22, CALL, STRAT, 141.5, 140.345, 0.045487609195962696, 0.10463818541333941, 0.3747457492377393, 0.29120692771365, 0.1660598823861943, 15.56832633851715, -3.3830120036011397, -83.2851813261039, -47.49312636245157, -4452.541332815905, 967.541433029926, -147525.24472279268, -160889.7125497546, -128762.15724702866, -61287.4504296778, 5122.238772724507, -147525.24472279268, -55249273.7082, 1.37646, SYM
+4, 201402, 2014 - 01 - 24 16:15:00, -264.0, F22, CALL, STRAT, 142.0, 140.345, 0.044822991783170785, 0.10463818541333941, 0.24229877494137142, 0.21142760067302388, 0.14217904830463807, 13.134711351643713, -2.812643033008342, -55.816886577678304, -37.53526875242445, -3467.56379683394, 742.5377607142022, -88047.84694353123, -107826.65888355605, -101764.66675460352, -47729.62863790045, 3931.052023510272, -88047.84694353123, -50999329.576799996, 1.37646, SYM
+5, 201401, 2013 - 12 - 20 16:15:00, -350.0, F22, CALL, STRAT, 142.0, 140.345, 0.07732984880519912, 0.008813407863715872, 0.022997617617102506, 0.053564485523868555, 0.10692101346714668, 1.4353175202195965, -6.296783951449458, -18.747569933353994, -37.422354713501335, -502.3611320768588, 2203.8743830073104, -11079.355260832921, -36216.420371031316, -101458.53708176922, -6914.80003858513, 11667.480512439395, -11079.355260832921, -67612747.545, 1.37646, SYM
+6, 201402, 2014 - 01 - 24 16:15:00, -43.0, F22, CALL, STRAT, 142.5, 140.345, 0.04429193547308161, 0.10463818541333941, 0.14930517833206025, 0.14535540627931182, 0.11352765189447668, 10.36359429711007, -2.1930395074393734, -6.250282470010408, -4.881689031462497, -445.634554775733, 94.30069881989306, -8837.042047978748, -12074.25059227865, -13235.111243323556, -6133.9813926660545, 499.23515345242305, -8837.042047978748, -8306708.9841, 1.37646, SYM
+7, 201401, 2013 - 12 - 20 16:15:00, -557.0, F22, CALL, STRAT, 142.5, 140.345, 0.0814452531405243, 0.008813407863715872, 0.009355428262274312, 0.02334242880598363, 0.05141464658820557, 0.7269263150873877, -3.358771076933658, -13.001732844932882, -28.637958149630503, -404.89795750367495, 1870.8354898520474, -7172.696641740786, -25116.653728342328, -77642.50435641785, -5573.258425855083, 9904.346993699035, -7172.696641740786, -107600858.2359, 1.37646, SYM
+8, 201401, 2013 - 12 - 20 16:15:00, -607.0, F22, CALL, STRAT, 143.0, 140.345, 0.08598678226600448, 0.008813407863715872, 0.003929576582252237, 0.010236258301012439, 0.024009328219809185, 0.35838470316321597, -1.748258969026736, -6.21340878871455, -14.573662229424174, -217.5395148200721, 1061.1931941992289, -3283.2053243209966, -12003.018280721177, -39511.74267470002, -2994.344405692364, 5618.038400336425, -3283.2053243209966, -117259822.1709, 1.37646, SYM
+9, 201401, 2013 - 12 - 20 16:15:00, -799.0, F22, CALL, STRAT, 143.5, 140.345, 0.09076344895187359, 0.008813407863715872, 0.0017194411099074047, 0.004596451952699387, 0.01121737082629775, 0.1767420247600966, -0.9100718136522263, -3.67256511020681, -8.962679290211902, -141.2168777833172, 727.1473791081288, -1891.026786204374, -7094.634789685377, -24299.388322293227, -1943.793835936248, 3849.574159412212, -1891.026786204374, -154350243.6813, 1.37646, SYM
+10, 201401, 2013 - 12 - 20 16:15:00, -377.0, F22, CALL, STRAT, 144.0, 140.345, 0.09566038240450792, 0.008813407863715872, 0.0007852689424384662, 0.0021295289144923784, 0.005324993197820229, 0.08842782200919548, -0.47989481865526434, -0.8028324007636266, -2.007522435578226, -33.3372888974667, 180.92034663303465, -407.4960157678369, -1550.905840965067, -5442.743810001693, -458.87444675807006, 957.8062320250265, -407.4960157678369, -72828588.06989999, 1.37646, SYM
+11, 201402, 2014 - 01 - 24 16:15:00, -43.0, F22, PUT, STRAT, 137.5, 140.345, 0.05414565513055749, 0.10463818541333941, 0.14529132959784974, -0.11936326135136956, 0.08106840033227831, 9.046889913827847, -2.3403474666535415, 5.132620238108891, -3.4859412142879673, -389.0162662945974, 100.63494106610229, -8599.471252145018, 9915.158754388978, -9450.995231657676, -5354.653299038616, 532.7691191532583, -8599.471252145018, -8306708.9841, 1.37646, SYM
+12, 201402, 2014 - 01 - 24 16:15:00, -264.0, F22, PUT, STRAT, 138.0, 140.345, 0.052853182910226726, 0.10463818541333941, 0.20369081242765574, -0.16004607860136968, 0.10141337819029916, 11.047155968410756, -2.789510903380204, 42.252164750761594, -26.77313184223898, -2916.44917566044, 736.4308784923738, -74018.27549798116, 81622.4271006569, -72586.63466280713, -40143.75632329569, 3898.7217192677417, -74018.27549798116, -50999329.576799996, 1.37646, SYM
+13, 201401, 2013 - 12 - 20 16:15:00, -376.0, F22, PUT, STRAT, 138.0, 140.345, 0.08897789701177691, 0.008813407863715872, 0.009425028910330369, -0.021620830082859088, 0.04411750741642045, 0.6814450839280415, -3.43983227630679, 8.129432111155017, -16.588182788574088, -256.2233515569436, 1293.376935891353, -4877.913910511415, 15704.378314735444, -44973.45961948536, -3526.811944840706, 6847.236989142353, -4877.913910511415, -72635408.7912, 1.37646, SYM
+14, 201401, 2013 - 12 - 20 16:15:00, -301.0, F22, PUT, STRAT, 138.5, 140.345, 0.08383267513417192, 0.008813407863715872, 0.020991265826436845, -0.045956251827941025, 0.08727871921287762, 1.2701629715541363, -6.0408289559434, 13.832831800210249, -26.270894483076166, -382.319054437795, 1818.2895157389635, -8696.984965596635, 26722.164695430383, -71224.98149804553, -5262.468856714474, 9626.164564746361, -8696.984965596635, -58146962.8887, 1.37646, SYM
+15, 201402, 2014 - 01 - 24 16:15:00, -286.0, F22, PUT, STRAT, 138.5, 140.345, 0.051599724402617266, 0.10463818541333941, 0.28321473137770425, -0.21146513081873966, 0.12351912253075312, 13.136076509490826, -3.2382097361444653, 60.479027414159546, -35.32646904379539, -3756.917881714376, 926.127984537317, -111492.45225751627, 116832.94892344868, -95776.22511698281, -51712.4718746457, 4902.992790754752, -111492.45225751627, -55249273.7082, 1.37646, SYM
+16, 201401, 2013 - 12 - 20 16:15:00, -739.0, F22, PUT, STRAT, 139.0, 140.345, 0.0791166184474159, 0.008813407863715872, 0.047581495319667155, -0.0967612790459439, 0.16434769983129724, 2.257195133461944, -10.131173555213623, 71.50658521495254, -121.45295017532867, -1668.0672036283765, 7486.937257302868, -48400.084510256995, 138135.90554124617, -329280.15202122304, -22960.277831063155, 39636.42175841195, -48400.084510256995, -142759486.9593, 1.37646, SYM
+17, 201401, 2013 - 12 - 20 16:15:00, -669.0, F22, PUT, STRAT, 139.5, 140.345, 0.07513349054261133, 0.008813407863715872, 0.10733307441031315, -0.1949726645282245, 0.27848967340302655, 3.6322892048663644, -15.482297001088007, 130.4367125693822, -186.30959150662477, -2430.001478055598, 10357.656693727877, -98837.84833028633, 251976.70050152476, -505117.8297913038, -33447.99834484408, 54834.231279417974, -98837.84833028633, -129236937.4503, 1.37646, SYM
+18, 201401, 2013 - 12 - 20 16:15:00, -471.0, F22, PUT, STRAT, 140.0, 140.345, 0.07231398622706062, 0.008813407863715872, 0.2318116692147143, -0.357200149447554, 0.391592427081917, 4.915801583071703, -20.16670499598669, 168.24127038979793, -184.4400331555829, -2315.3425456267723, 9498.518053109732, -150286.43988763154, 325007.2726147283, -500049.1307012041, -31869.76400353427, 50285.885228397776, -150286.43988763154, -90987440.2677, 1.37646, SYM
+19, 201401, 2013 - 12 - 20 16:15:00, -364.0, F22, PUT, STRAT, 141.0, 140.345, 0.07172143045750252, 0.008813407863715872, 0.7922715181315709, -0.7543151841866509, 0.333159035321538, 4.147995696473539, -16.876460506586433, 274.5707270439409, -121.26988885703983, -1509.8704335163682, 6143.031624397461, -396952.9396004471, 530413.7500248309, -328783.8408272312, -20782.762569179402, 32521.681960454345, -68777.34640044652, -70317257.4468, 1.37646, SYM
+20, 201401, 2013 - 12 - 20 16:15:00, -394.0, F22, PUT, STRAT, 141.5, 140.345, 0.0739452718231504, 0.008813407863715872, 1.212080035129219, -0.88042603375236, 0.20479158314197934, 2.628816497069195, -11.026098543797652, 346.8878572984298, -80.68788375793986, -1035.7536998452629, 4344.282826256274, -657341.595950662, 670115.460626992, -218758.93991649026, -14256.735376890107, 22998.967457802737, -30955.94375066146, -76112635.8078, 1.37646, SYM
+21, 201402, 2014 - 01 - 24 16:15:00, -40.0, GEE1, CALL, STRAT, 141.5, 140.345, 0.045487609195962696, 0.10463818541333941, 0.3747457492377393, 0.29120692771365, 0.1660598823861943, 15.56832633851715, -3.3830120036011397, -11.648277108545999, -6.642395295447772, -622.733053540686, 135.32048014404558, -20632.901359831147, -22502.057699266377, -18008.69332126275, -8571.671388766126, 716.3970311502808, -20632.901359831147, -7727171.148, 1.37646, SYM
+22, 201401, 2013 - 12 - 20 16:15:00, -12.0, GEE1, CALL, STRAT, 141.5, 140.345, 0.0739452718231504, 0.008813407863715872, 0.05709601681178711, 0.11956012929302556, 0.20479158314197934, 2.628816497069195, -11.027911868706408, -1.4347215515163068, -2.4574989977037522, -31.545797964830342, 132.33494242447688, -943.0845996090299, -2771.5847445726504, -6662.708829943866, -434.2152906667037, 700.5913648061363, -943.0845996090299, -2318151.3444, 1.37646, SYM
+23, 201402, 2014 - 01 - 24 16:15:00, -45.0, GEE1, CALL, STRAT, 142.0, 140.345, 0.044822991783170785, 0.10463818541333941, 0.24229877494137142, 0.21142760067302388, 0.14217904830463807, 13.134711351643713, -2.812643033008342, -9.514242030286075, -6.398057173708713, -591.0620108239671, 126.56893648537539, -15008.155729011005, -18379.544127878875, -17346.250014989233, -8135.732154187577, 670.0656858256148, -15008.155729011005, -8693067.5415, 1.37646, SYM
+24, 201401, 2013 - 12 - 20 16:15:00, -57.0, GEE1, CALL, STRAT, 142.0, 140.345, 0.07732984880519912, 0.008813407863715872, 0.022997617617102506, 0.053564485523868555, 0.10692101346714668, 1.4353175202195965, -6.296783951449458, -3.0531756748605074, -6.09449776762736, -81.813098652517, 358.9166852326191, -1804.3521424785042, -5898.102746139386, -16523.247467602414, -1126.1245777124357, 1900.1325405972727, -1804.3521424785042, -11011218.8859, 1.37646, SYM
+25, 201401, 2013 - 12 - 20 16:15:00, -68.0, GEE1, CALL, STRAT, 142.5, 140.345, 0.0814452531405243, 0.008813407863715872, 0.009355428262274312, 0.02334242880598363, 0.05141464658820557, 0.7269263150873877, -3.358771076933658, -1.5872851588068868, -3.496195967997979, -49.430989425942364, 228.39643323148874, -875.6613494405268, -3066.306020695293, -9478.797659311334, -680.3977970523262, 1209.1482864839038, -875.6613494405268, -13136190.9516, 1.37646, SYM
+26, 201402, 2014 - 01 - 24 16:15:00, -19.0, GEE1, CALL, STRAT, 142.5, 140.345, 0.04429193547308161, 0.10463818541333941, 0.14930517833206025, 0.14535540627931182, 0.11352765189447668, 10.36359429711007, -2.1930395074393734, -2.7617527193069247, -2.1570253859950568, -196.90829164509134, 41.66775064134809, -3904.7395095720058, -5335.133982634753, -5848.072409840642, -2710.3638711780245, 220.5922771068846, -3904.7395095720058, -3670406.2953000003, 1.37646, SYM
+27, 201401, 2013 - 12 - 20 16:15:00, -91.0, GEE1, CALL, STRAT, 143.0, 140.345, 0.08598678226600448, 0.008813407863715872, 0.003929576582252237, 0.010236258301012439, 0.024009328219809185, 0.35838470316321597, -1.748258969026736, -0.9314995053921319, -2.1848488680026357, -32.613007987852654, 159.091566181433, -492.21035339902915, -1799.464025610588, -5923.5067271790795, -448.9050097495967, 842.2429891772894, -492.21035339902915, -17579314.3617, 1.37646, SYM
+28, 201401, 2013 - 12 - 20 16:15:00, -117.0, GEE1, CALL, STRAT, 143.5, 140.345, 0.09076344895187359, 0.008813407863715872, 0.0017194411099074047, 0.004596451952699387, 0.01121737082629775, 0.1767420247600966, -0.9100718136522263, -0.5377848784658282, -1.3124323866768368, -20.678816896931302, 106.47840219731049, -276.9088034867481, -1038.8889491779587, -3558.233333802638, -284.63564305950064, 563.7048518788846, -276.9088034867481, -22601975.6079, 1.37646, SYM
+29, 201401, 2013 - 12 - 20 16:15:00, -126.0, GEE1, CALL, STRAT, 144.0, 140.345, 0.09566038240450792, 0.008813407863715872, 0.0007852689424384662, 0.0021295289144923784, 0.005324993197820229, 0.08842782200919548, -0.47989481865526434, -0.26832064322603966, -0.6709491429253489, -11.141905573158631, 60.46674715056331, -136.19230235211526, -518.3398831872638, -1819.0602654117067, -153.3638734522993, 320.1156107033245, -136.19230235211526, -24340589.1162, 1.37646, SYM
+30, 201402, 2014 - 01 - 24 16:15:00, -19.0, GEE1, PUT, STRAT, 137.5, 140.345, 0.05414565513055749, 0.10463818541333941, 0.14529132959784974, -0.11936326135136956, 0.08106840033227831, 9.046889913827847, -2.3403474666535415, 2.2679019656760215, -1.5402996063132879, -171.8909083627291, 44.46660186641729, -3799.766367226869, 4381.11665891606, -4176.021148871998, -2366.009597249621, 235.40961078864902, -3799.766367226869, -3670406.2953000003, 1.37646, SYM
+31, 201401, 2013 - 12 - 20 16:15:00, -64.0, GEE1, PUT, STRAT, 138.0, 140.345, 0.08897789701177691, 0.008813407863715872, 0.009425028910330369, -0.021620830082859088, 0.04411750741642045, 0.6814450839280415, -3.43983227630679, 1.3837331253029816, -2.8235204746509086, -43.612485371394655, 220.14926568363455, -830.2832188104537, 2673.0856705932674, -7655.056956508147, -600.3084161430988, 1165.48714708806, -830.2832188104537, -12363473.8368, 1.37646, SYM
+32, 201402, 2014 - 01 - 24 16:15:00, -45.0, GEE1, PUT, STRAT, 138.0, 140.345, 0.052853182910226726, 0.10463818541333941, 0.20369081242765574, -0.16004607860136968, 0.10141337819029916, 11.047155968410756, -2.789510903380204, 7.2020735370616356, -4.563602018563462, -497.122018578484, 125.52799065210918, -12616.751505337697, 13912.913710339246, -12372.721817523941, -6842.685736925402, 664.5548385115469, -12616.751505337697, -8693067.5415, 1.37646, SYM
+33, 201401, 2013 - 12 - 20 16:15:00, -51.0, GEE1, PUT, STRAT, 138.5, 140.345, 0.08383267513417192, 0.008813407863715872, 0.020991265826436845, -0.045956251827941025, 0.08727871921287762, 1.2701629715541363, -6.0408289559434, 2.3437688432249923, -4.451214679856759, -64.77831154926095, 308.08227675311343, -1473.5755257323203, 4527.675745737374, -12068.020120931302, -891.6475471509574, 1631.011271767656, -1473.5755257323203, -9852143.2137, 1.37646, SYM
+34, 201402, 2014 - 01 - 24 16:15:00, -40.0, GEE1, PUT, STRAT, 138.5, 140.345, 0.051599724402617266, 0.10463818541333941, 0.28321473137770425, -0.21146513081873966, 0.12351912253075312, 13.136076509490826, -3.2382097361444653, 8.458605232749587, -4.940764901230125, -525.443060379633, 129.5283894457786, -15593.349966086193, 16340.27257670611, -13395.276240137457, -7232.5135489014965, 685.733257448217, -15593.349966086193, -7727171.148, 1.37646, SYM
+35, 201401, 2013 - 12 - 20 16:15:00, -98.0, GEE1, PUT, STRAT, 139.0, 140.345, 0.0791166184474159, 0.008813407863715872, 0.047581495319667155, -0.0967612790459439, 0.16434769983129724, 2.257195133461944, -10.131173555213623, 9.4826053465025, -16.10607458346713, -221.20512307927052, 992.855008410935, -6418.414454675487, 18318.42861034117, -43666.38010565609, -3044.8000371369267, 5256.250787989675, -6418.414454675487, -18931569.312599998, 1.37646, SYM
+36, 201401, 2013 - 12 - 20 16:15:00, -111.0, GEE1, PUT, STRAT, 139.5, 140.345, 0.07513349054261133, 0.008813407863715872, 0.10733307441031315, -0.1949726645282245, 0.27848967340302655, 3.6322892048663644, -15.482297001088007, 21.64196576263292, -30.912353747735946, -403.18410174016645, 1718.5349671207687, -16399.10487991298, 41807.79335675523, -83808.78790259302, -5549.667886812695, 9098.05631093482, -16399.10487991298, -21442899.9357, 1.37646, SYM
+37, 201401, 2013 - 12 - 20 16:15:00, -108.0, GEE1, PUT, STRAT, 140.0, 140.345, 0.07231398622706062, 0.008813407863715872, 0.2318116692147143, -0.357200149447554, 0.391592427081917, 4.915801583071703, -20.16670499598669, 38.577616140335834, -42.29198212484704, -530.9065709717439, 2178.0041395665626, -34460.58494238685, 74523.96059955555, -114660.94716715509, -7307.7165867976655, 11530.52145364535, -34460.58494238685, -20863362.0996, 1.37646, SYM
+38, 201401, 2013 - 12 - 20 16:15:00, -83.0, GEE1, PUT, STRAT, 141.0, 140.345, 0.07172143045750252, 0.008813407863715872, 0.7922715181315709, -0.7543151841866509, 0.333159035321538, 4.147995696473539, -16.876460506586433, 62.608160287492026, -27.652199931687655, -344.28364280730375, 1400.746222046674, -90513.99446933273, 120945.99245071695, -74969.94172708844, -4738.926629785414, 7415.658249224481, -15682.746569332587, -16033880.132100001, 1.37646, SYM
+39, 201401, 2013 - 12 - 20 16:15:00, -56.0, GEE1, PUT, STRAT, 141.5, 140.345, 0.0739452718231504, 0.008813407863715872, 1.212080035129219, -0.88042603375236, 0.20479158314197934, 2.628816497069195, -11.026098543797652, 49.30385789013216, -11.468328655950843, -147.21372383587493, 617.4615184526685, -93429.26236862202, 95244.83704343032, -31092.641206404707, -2026.3380231112837, 3268.888775728308, -4399.8295686219335, -10818039.607199999, 1.37646, SYM"""
+ csv = StringIO(csv)
+ df = read_csv(csv).set_index(['index'])
+ for _ in range(10):
+ library.write('pandas', df[:-2])
+ result = library.read('pandas').data
+ assert len(result) == len(df[:-2])
+ assert np.all(df[:-2].values == result.values)
+ assert np.all(df[:-2].columns.values == result.columns.values)
+ for _ in range(10):
+ library.write('pandas', df[:-1])
+ result = library.read('pandas').data
+ assert len(result) == len(df[:-1])
+ assert np.all(df[:-1].values == result.values)
+ assert np.all(df[:-1].columns.values == result.columns.values)
+ for _ in range(10):
+ library.write('pandas', df)
+ result = library.read('pandas').data
+ assert len(result) == len(df)
+ assert np.all(df.values == result.values)
+ assert np.all(df.columns.values == result.columns.values)
+
+
+def test_large_dataframe_rewrite_same_item(library):
+ csv = \
+"""index, f1, f2, f3, f4, f5, f6, f7, f8, iVol, tau, uPrice, uDelta, uGamma, uVega, uTheta, Delta, Gamma, Vega, Theta, $Price, $Delta, $Gamma, $Vega, $Theta, $Time_Value, $Notional, FX, f9
+0, 201401, 2013 - 12 - 20 16:15:00, 15.0, F1, CALL, STRAT, 140.0, 140.345, 0.07231398622706062, 0.008813407863715872, 0.5768068954653813, 0.6427860135978315, 0.391592427081917, 4.915801583071703, -20.166163353481476, 9.641790203967473, 5.873886406228755, 73.73702374607555, -302.49245030222215, 11909.274289984183, 18625.940769791625, 15925.131550993763, 1014.9606370552315, -1601.4183005499872, 4786.093789984206, 2897689.1805000002, 1.37646, SYM
+1, 201401, 2013 - 12 - 20 16:15:00, 15.0, F1, PUT, STRAT, 140.0, 140.345, 0.07231398622706062, 0.008813407863715872, 0.2318116692147143, -0.357200149447554, 0.391592427081917, 4.915801583071703, -20.16670499598669, -5.358002241713311, 5.873886406228755, 73.73702374607555, -302.50057493980034, 4786.192353109285, -10350.550083271604, 15925.131550993763, 1014.9606370552315, -1601.4613130062987, 4786.192353109285, 2897689.1805000002, 1.37646, SYM
+2, 201401, 2013 - 12 - 20 16:15:00, -48.0, F22, CALL, STRAT, 141.5, 140.345, 0.0739452718231504, 0.008813407863715872, 0.05709601681178711, 0.11956012929302556, 0.20479158314197934, 2.628816497069195, -11.027911868706408, -5.738886206065227, -9.829995990815009, -126.18319185932137, 529.3397696979075, -3772.3383984361194, -11086.338978290602, -26650.835319775462, -1736.8611626668148, 2802.3654592245452, -3772.3383984361194, -9272605.3776, 1.37646, SYM
+3, 201402, 2014 - 01 - 24 16:15:00, -286.0, F22, CALL, STRAT, 141.5, 140.345, 0.045487609195962696, 0.10463818541333941, 0.3747457492377393, 0.29120692771365, 0.1660598823861943, 15.56832633851715, -3.3830120036011397, -83.2851813261039, -47.49312636245157, -4452.541332815905, 967.541433029926, -147525.24472279268, -160889.7125497546, -128762.15724702866, -61287.4504296778, 5122.238772724507, -147525.24472279268, -55249273.7082, 1.37646, SYM
+4, 201402, 2014 - 01 - 24 16:15:00, -264.0, F22, CALL, STRAT, 142.0, 140.345, 0.044822991783170785, 0.10463818541333941, 0.24229877494137142, 0.21142760067302388, 0.14217904830463807, 13.134711351643713, -2.812643033008342, -55.816886577678304, -37.53526875242445, -3467.56379683394, 742.5377607142022, -88047.84694353123, -107826.65888355605, -101764.66675460352, -47729.62863790045, 3931.052023510272, -88047.84694353123, -50999329.576799996, 1.37646, SYM
+5, 201401, 2013 - 12 - 20 16:15:00, -350.0, F22, CALL, STRAT, 142.0, 140.345, 0.07732984880519912, 0.008813407863715872, 0.022997617617102506, 0.053564485523868555, 0.10692101346714668, 1.4353175202195965, -6.296783951449458, -18.747569933353994, -37.422354713501335, -502.3611320768588, 2203.8743830073104, -11079.355260832921, -36216.420371031316, -101458.53708176922, -6914.80003858513, 11667.480512439395, -11079.355260832921, -67612747.545, 1.37646, SYM
+6, 201402, 2014 - 01 - 24 16:15:00, -43.0, F22, CALL, STRAT, 142.5, 140.345, 0.04429193547308161, 0.10463818541333941, 0.14930517833206025, 0.14535540627931182, 0.11352765189447668, 10.36359429711007, -2.1930395074393734, -6.250282470010408, -4.881689031462497, -445.634554775733, 94.30069881989306, -8837.042047978748, -12074.25059227865, -13235.111243323556, -6133.9813926660545, 499.23515345242305, -8837.042047978748, -8306708.9841, 1.37646, SYM
+7, 201401, 2013 - 12 - 20 16:15:00, -557.0, F22, CALL, STRAT, 142.5, 140.345, 0.0814452531405243, 0.008813407863715872, 0.009355428262274312, 0.02334242880598363, 0.05141464658820557, 0.7269263150873877, -3.358771076933658, -13.001732844932882, -28.637958149630503, -404.89795750367495, 1870.8354898520474, -7172.696641740786, -25116.653728342328, -77642.50435641785, -5573.258425855083, 9904.346993699035, -7172.696641740786, -107600858.2359, 1.37646, SYM
+8, 201401, 2013 - 12 - 20 16:15:00, -607.0, F22, CALL, STRAT, 143.0, 140.345, 0.08598678226600448, 0.008813407863715872, 0.003929576582252237, 0.010236258301012439, 0.024009328219809185, 0.35838470316321597, -1.748258969026736, -6.21340878871455, -14.573662229424174, -217.5395148200721, 1061.1931941992289, -3283.2053243209966, -12003.018280721177, -39511.74267470002, -2994.344405692364, 5618.038400336425, -3283.2053243209966, -117259822.1709, 1.37646, SYM
+9, 201401, 2013 - 12 - 20 16:15:00, -799.0, F22, CALL, STRAT, 143.5, 140.345, 0.09076344895187359, 0.008813407863715872, 0.0017194411099074047, 0.004596451952699387, 0.01121737082629775, 0.1767420247600966, -0.9100718136522263, -3.67256511020681, -8.962679290211902, -141.2168777833172, 727.1473791081288, -1891.026786204374, -7094.634789685377, -24299.388322293227, -1943.793835936248, 3849.574159412212, -1891.026786204374, -154350243.6813, 1.37646, SYM
+10, 201401, 2013 - 12 - 20 16:15:00, -377.0, F22, CALL, STRAT, 144.0, 140.345, 0.09566038240450792, 0.008813407863715872, 0.0007852689424384662, 0.0021295289144923784, 0.005324993197820229, 0.08842782200919548, -0.47989481865526434, -0.8028324007636266, -2.007522435578226, -33.3372888974667, 180.92034663303465, -407.4960157678369, -1550.905840965067, -5442.743810001693, -458.87444675807006, 957.8062320250265, -407.4960157678369, -72828588.06989999, 1.37646, SYM
+11, 201402, 2014 - 01 - 24 16:15:00, -43.0, F22, PUT, STRAT, 137.5, 140.345, 0.05414565513055749, 0.10463818541333941, 0.14529132959784974, -0.11936326135136956, 0.08106840033227831, 9.046889913827847, -2.3403474666535415, 5.132620238108891, -3.4859412142879673, -389.0162662945974, 100.63494106610229, -8599.471252145018, 9915.158754388978, -9450.995231657676, -5354.653299038616, 532.7691191532583, -8599.471252145018, -8306708.9841, 1.37646, SYM
+12, 201402, 2014 - 01 - 24 16:15:00, -264.0, F22, PUT, STRAT, 138.0, 140.345, 0.052853182910226726, 0.10463818541333941, 0.20369081242765574, -0.16004607860136968, 0.10141337819029916, 11.047155968410756, -2.789510903380204, 42.252164750761594, -26.77313184223898, -2916.44917566044, 736.4308784923738, -74018.27549798116, 81622.4271006569, -72586.63466280713, -40143.75632329569, 3898.7217192677417, -74018.27549798116, -50999329.576799996, 1.37646, SYM
+13, 201401, 2013 - 12 - 20 16:15:00, -376.0, F22, PUT, STRAT, 138.0, 140.345, 0.08897789701177691, 0.008813407863715872, 0.009425028910330369, -0.021620830082859088, 0.04411750741642045, 0.6814450839280415, -3.43983227630679, 8.129432111155017, -16.588182788574088, -256.2233515569436, 1293.376935891353, -4877.913910511415, 15704.378314735444, -44973.45961948536, -3526.811944840706, 6847.236989142353, -4877.913910511415, -72635408.7912, 1.37646, SYM
+14, 201401, 2013 - 12 - 20 16:15:00, -301.0, F22, PUT, STRAT, 138.5, 140.345, 0.08383267513417192, 0.008813407863715872, 0.020991265826436845, -0.045956251827941025, 0.08727871921287762, 1.2701629715541363, -6.0408289559434, 13.832831800210249, -26.270894483076166, -382.319054437795, 1818.2895157389635, -8696.984965596635, 26722.164695430383, -71224.98149804553, -5262.468856714474, 9626.164564746361, -8696.984965596635, -58146962.8887, 1.37646, SYM
+15, 201402, 2014 - 01 - 24 16:15:00, -286.0, F22, PUT, STRAT, 138.5, 140.345, 0.051599724402617266, 0.10463818541333941, 0.28321473137770425, -0.21146513081873966, 0.12351912253075312, 13.136076509490826, -3.2382097361444653, 60.479027414159546, -35.32646904379539, -3756.917881714376, 926.127984537317, -111492.45225751627, 116832.94892344868, -95776.22511698281, -51712.4718746457, 4902.992790754752, -111492.45225751627, -55249273.7082, 1.37646, SYM
+16, 201401, 2013 - 12 - 20 16:15:00, -739.0, F22, PUT, STRAT, 139.0, 140.345, 0.0791166184474159, 0.008813407863715872, 0.047581495319667155, -0.0967612790459439, 0.16434769983129724, 2.257195133461944, -10.131173555213623, 71.50658521495254, -121.45295017532867, -1668.0672036283765, 7486.937257302868, -48400.084510256995, 138135.90554124617, -329280.15202122304, -22960.277831063155, 39636.42175841195, -48400.084510256995, -142759486.9593, 1.37646, SYM
+17, 201401, 2013 - 12 - 20 16:15:00, -669.0, F22, PUT, STRAT, 139.5, 140.345, 0.07513349054261133, 0.008813407863715872, 0.10733307441031315, -0.1949726645282245, 0.27848967340302655, 3.6322892048663644, -15.482297001088007, 130.4367125693822, -186.30959150662477, -2430.001478055598, 10357.656693727877, -98837.84833028633, 251976.70050152476, -505117.8297913038, -33447.99834484408, 54834.231279417974, -98837.84833028633, -129236937.4503, 1.37646, SYM
+18, 201401, 2013 - 12 - 20 16:15:00, -471.0, F22, PUT, STRAT, 140.0, 140.345, 0.07231398622706062, 0.008813407863715872, 0.2318116692147143, -0.357200149447554, 0.391592427081917, 4.915801583071703, -20.16670499598669, 168.24127038979793, -184.4400331555829, -2315.3425456267723, 9498.518053109732, -150286.43988763154, 325007.2726147283, -500049.1307012041, -31869.76400353427, 50285.885228397776, -150286.43988763154, -90987440.2677, 1.37646, SYM
+19, 201401, 2013 - 12 - 20 16:15:00, -364.0, F22, PUT, STRAT, 141.0, 140.345, 0.07172143045750252, 0.008813407863715872, 0.7922715181315709, -0.7543151841866509, 0.333159035321538, 4.147995696473539, -16.876460506586433, 274.5707270439409, -121.26988885703983, -1509.8704335163682, 6143.031624397461, -396952.9396004471, 530413.7500248309, -328783.8408272312, -20782.762569179402, 32521.681960454345, -68777.34640044652, -70317257.4468, 1.37646, SYM
+20, 201401, 2013 - 12 - 20 16:15:00, -394.0, F22, PUT, STRAT, 141.5, 140.345, 0.0739452718231504, 0.008813407863715872, 1.212080035129219, -0.88042603375236, 0.20479158314197934, 2.628816497069195, -11.026098543797652, 346.8878572984298, -80.68788375793986, -1035.7536998452629, 4344.282826256274, -657341.595950662, 670115.460626992, -218758.93991649026, -14256.735376890107, 22998.967457802737, -30955.94375066146, -76112635.8078, 1.37646, SYM
+21, 201402, 2014 - 01 - 24 16:15:00, -40.0, GEE1, CALL, STRAT, 141.5, 140.345, 0.045487609195962696, 0.10463818541333941, 0.3747457492377393, 0.29120692771365, 0.1660598823861943, 15.56832633851715, -3.3830120036011397, -11.648277108545999, -6.642395295447772, -622.733053540686, 135.32048014404558, -20632.901359831147, -22502.057699266377, -18008.69332126275, -8571.671388766126, 716.3970311502808, -20632.901359831147, -7727171.148, 1.37646, SYM
+22, 201401, 2013 - 12 - 20 16:15:00, -12.0, GEE1, CALL, STRAT, 141.5, 140.345, 0.0739452718231504, 0.008813407863715872, 0.05709601681178711, 0.11956012929302556, 0.20479158314197934, 2.628816497069195, -11.027911868706408, -1.4347215515163068, -2.4574989977037522, -31.545797964830342, 132.33494242447688, -943.0845996090299, -2771.5847445726504, -6662.708829943866, -434.2152906667037, 700.5913648061363, -943.0845996090299, -2318151.3444, 1.37646, SYM
+23, 201402, 2014 - 01 - 24 16:15:00, -45.0, GEE1, CALL, STRAT, 142.0, 140.345, 0.044822991783170785, 0.10463818541333941, 0.24229877494137142, 0.21142760067302388, 0.14217904830463807, 13.134711351643713, -2.812643033008342, -9.514242030286075, -6.398057173708713, -591.0620108239671, 126.56893648537539, -15008.155729011005, -18379.544127878875, -17346.250014989233, -8135.732154187577, 670.0656858256148, -15008.155729011005, -8693067.5415, 1.37646, SYM
+24, 201401, 2013 - 12 - 20 16:15:00, -57.0, GEE1, CALL, STRAT, 142.0, 140.345, 0.07732984880519912, 0.008813407863715872, 0.022997617617102506, 0.053564485523868555, 0.10692101346714668, 1.4353175202195965, -6.296783951449458, -3.0531756748605074, -6.09449776762736, -81.813098652517, 358.9166852326191, -1804.3521424785042, -5898.102746139386, -16523.247467602414, -1126.1245777124357, 1900.1325405972727, -1804.3521424785042, -11011218.8859, 1.37646, SYM
+25, 201401, 2013 - 12 - 20 16:15:00, -68.0, GEE1, CALL, STRAT, 142.5, 140.345, 0.0814452531405243, 0.008813407863715872, 0.009355428262274312, 0.02334242880598363, 0.05141464658820557, 0.7269263150873877, -3.358771076933658, -1.5872851588068868, -3.496195967997979, -49.430989425942364, 228.39643323148874, -875.6613494405268, -3066.306020695293, -9478.797659311334, -680.3977970523262, 1209.1482864839038, -875.6613494405268, -13136190.9516, 1.37646, SYM
+26, 201402, 2014 - 01 - 24 16:15:00, -19.0, GEE1, CALL, STRAT, 142.5, 140.345, 0.04429193547308161, 0.10463818541333941, 0.14930517833206025, 0.14535540627931182, 0.11352765189447668, 10.36359429711007, -2.1930395074393734, -2.7617527193069247, -2.1570253859950568, -196.90829164509134, 41.66775064134809, -3904.7395095720058, -5335.133982634753, -5848.072409840642, -2710.3638711780245, 220.5922771068846, -3904.7395095720058, -3670406.2953000003, 1.37646, SYM
+27, 201401, 2013 - 12 - 20 16:15:00, -91.0, GEE1, CALL, STRAT, 143.0, 140.345, 0.08598678226600448, 0.008813407863715872, 0.003929576582252237, 0.010236258301012439, 0.024009328219809185, 0.35838470316321597, -1.748258969026736, -0.9314995053921319, -2.1848488680026357, -32.613007987852654, 159.091566181433, -492.21035339902915, -1799.464025610588, -5923.5067271790795, -448.9050097495967, 842.2429891772894, -492.21035339902915, -17579314.3617, 1.37646, SYM
+28, 201401, 2013 - 12 - 20 16:15:00, -117.0, GEE1, CALL, STRAT, 143.5, 140.345, 0.09076344895187359, 0.008813407863715872, 0.0017194411099074047, 0.004596451952699387, 0.01121737082629775, 0.1767420247600966, -0.9100718136522263, -0.5377848784658282, -1.3124323866768368, -20.678816896931302, 106.47840219731049, -276.9088034867481, -1038.8889491779587, -3558.233333802638, -284.63564305950064, 563.7048518788846, -276.9088034867481, -22601975.6079, 1.37646, SYM
+29, 201401, 2013 - 12 - 20 16:15:00, -126.0, GEE1, CALL, STRAT, 144.0, 140.345, 0.09566038240450792, 0.008813407863715872, 0.0007852689424384662, 0.0021295289144923784, 0.005324993197820229, 0.08842782200919548, -0.47989481865526434, -0.26832064322603966, -0.6709491429253489, -11.141905573158631, 60.46674715056331, -136.19230235211526, -518.3398831872638, -1819.0602654117067, -153.3638734522993, 320.1156107033245, -136.19230235211526, -24340589.1162, 1.37646, SYM
+30, 201402, 2014 - 01 - 24 16:15:00, -19.0, GEE1, PUT, STRAT, 137.5, 140.345, 0.05414565513055749, 0.10463818541333941, 0.14529132959784974, -0.11936326135136956, 0.08106840033227831, 9.046889913827847, -2.3403474666535415, 2.2679019656760215, -1.5402996063132879, -171.8909083627291, 44.46660186641729, -3799.766367226869, 4381.11665891606, -4176.021148871998, -2366.009597249621, 235.40961078864902, -3799.766367226869, -3670406.2953000003, 1.37646, SYM
+31, 201401, 2013 - 12 - 20 16:15:00, -64.0, GEE1, PUT, STRAT, 138.0, 140.345, 0.08897789701177691, 0.008813407863715872, 0.009425028910330369, -0.021620830082859088, 0.04411750741642045, 0.6814450839280415, -3.43983227630679, 1.3837331253029816, -2.8235204746509086, -43.612485371394655, 220.14926568363455, -830.2832188104537, 2673.0856705932674, -7655.056956508147, -600.3084161430988, 1165.48714708806, -830.2832188104537, -12363473.8368, 1.37646, SYM
+32, 201402, 2014 - 01 - 24 16:15:00, -45.0, GEE1, PUT, STRAT, 138.0, 140.345, 0.052853182910226726, 0.10463818541333941, 0.20369081242765574, -0.16004607860136968, 0.10141337819029916, 11.047155968410756, -2.789510903380204, 7.2020735370616356, -4.563602018563462, -497.122018578484, 125.52799065210918, -12616.751505337697, 13912.913710339246, -12372.721817523941, -6842.685736925402, 664.5548385115469, -12616.751505337697, -8693067.5415, 1.37646, SYM
+33, 201401, 2013 - 12 - 20 16:15:00, -51.0, GEE1, PUT, STRAT, 138.5, 140.345, 0.08383267513417192, 0.008813407863715872, 0.020991265826436845, -0.045956251827941025, 0.08727871921287762, 1.2701629715541363, -6.0408289559434, 2.3437688432249923, -4.451214679856759, -64.77831154926095, 308.08227675311343, -1473.5755257323203, 4527.675745737374, -12068.020120931302, -891.6475471509574, 1631.011271767656, -1473.5755257323203, -9852143.2137, 1.37646, SYM
+34, 201402, 2014 - 01 - 24 16:15:00, -40.0, GEE1, PUT, STRAT, 138.5, 140.345, 0.051599724402617266, 0.10463818541333941, 0.28321473137770425, -0.21146513081873966, 0.12351912253075312, 13.136076509490826, -3.2382097361444653, 8.458605232749587, -4.940764901230125, -525.443060379633, 129.5283894457786, -15593.349966086193, 16340.27257670611, -13395.276240137457, -7232.5135489014965, 685.733257448217, -15593.349966086193, -7727171.148, 1.37646, SYM
+35, 201401, 2013 - 12 - 20 16:15:00, -98.0, GEE1, PUT, STRAT, 139.0, 140.345, 0.0791166184474159, 0.008813407863715872, 0.047581495319667155, -0.0967612790459439, 0.16434769983129724, 2.257195133461944, -10.131173555213623, 9.4826053465025, -16.10607458346713, -221.20512307927052, 992.855008410935, -6418.414454675487, 18318.42861034117, -43666.38010565609, -3044.8000371369267, 5256.250787989675, -6418.414454675487, -18931569.312599998, 1.37646, SYM
+36, 201401, 2013 - 12 - 20 16:15:00, -111.0, GEE1, PUT, STRAT, 139.5, 140.345, 0.07513349054261133, 0.008813407863715872, 0.10733307441031315, -0.1949726645282245, 0.27848967340302655, 3.6322892048663644, -15.482297001088007, 21.64196576263292, -30.912353747735946, -403.18410174016645, 1718.5349671207687, -16399.10487991298, 41807.79335675523, -83808.78790259302, -5549.667886812695, 9098.05631093482, -16399.10487991298, -21442899.9357, 1.37646, SYM
+37, 201401, 2013 - 12 - 20 16:15:00, -108.0, GEE1, PUT, STRAT, 140.0, 140.345, 0.07231398622706062, 0.008813407863715872, 0.2318116692147143, -0.357200149447554, 0.391592427081917, 4.915801583071703, -20.16670499598669, 38.577616140335834, -42.29198212484704, -530.9065709717439, 2178.0041395665626, -34460.58494238685, 74523.96059955555, -114660.94716715509, -7307.7165867976655, 11530.52145364535, -34460.58494238685, -20863362.0996, 1.37646, SYM
+38, 201401, 2013 - 12 - 20 16:15:00, -83.0, GEE1, PUT, STRAT, 141.0, 140.345, 0.07172143045750252, 0.008813407863715872, 0.7922715181315709, -0.7543151841866509, 0.333159035321538, 4.147995696473539, -16.876460506586433, 62.608160287492026, -27.652199931687655, -344.28364280730375, 1400.746222046674, -90513.99446933273, 120945.99245071695, -74969.94172708844, -4738.926629785414, 7415.658249224481, -15682.746569332587, -16033880.132100001, 1.37646, SYM
+39, 201401, 2013 - 12 - 20 16:15:00, -56.0, GEE1, PUT, STRAT, 141.5, 140.345, 0.0739452718231504, 0.008813407863715872, 1.212080035129219, -0.88042603375236, 0.20479158314197934, 2.628816497069195, -11.026098543797652, 49.30385789013216, -11.468328655950843, -147.21372383587493, 617.4615184526685, -93429.26236862202, 95244.83704343032, -31092.641206404707, -2026.3380231112837, 3268.888775728308, -4399.8295686219335, -10818039.607199999, 1.37646, SYM"""
+ csv = StringIO(csv)
+ df = read_csv(csv).set_index(['index'])
+ for _ in range(100):
+ library.write('pandas', df)
+ result = library.read('pandas').data
+ assert len(result) == len(df)
+ assert np.all(df.values == result.values)
+ assert np.all(df.columns.values == result.columns.values)
+
+
+def test_append_after_truncate_after_append(library):
+ columns = ['MAIN_UPPER', 'MAIN_LOWER', 'AUX_UPPER', 'AUX_LOWER', 'TARGET_HEDGE_POSITION']
+ empty_df = DataFrame(columns=columns, dtype=np.float64)
+ library.write('sym', empty_df)
+ full_df = DataFrame(data=[np.zeros(5)], columns=columns)
+ library.write('sym', full_df)
+ library.write('sym', empty_df)
+ full_df = DataFrame(data=[np.zeros(5)], columns=columns)
+ library.write('sym', full_df)
+ assert len(library.read('sym', 1).data) == 0
+ assert len(library.read('sym', 2).data) == 1
+ assert len(library.read('sym', 3).data) == 0
+ assert len(library.read('sym', 4).data) == 1
+
+
+def test_can_write_pandas_df_with_object_columns(library):
+ expected = DataFrame(data=dict(A=['a', 'b', None, 'c'], B=[1., 2., 3., 4.]), index=range(4))
+ library.write('objects', expected)
+ saved_df = library.read('objects').data
+
+ assert_frame_equal(saved_df, expected)
+
+
+def panel(i1, i2, i3):
+ return Panel(np.random.randn(i1, i2, i3), range(i1), ['A%d' % i for i in range(i2)],
+ list(rrule(DAILY, count=i3, dtstart=dt(1970, 1, 1), interval=1)))
+
+
+@pytest.mark.parametrize("df_size", list(itertools.combinations_with_replacement([1, 2, 4], r=3)))
+def test_panel_save_read(library, df_size):
+ '''Note - empties are not tested here as they don't work!'''
+ pn = panel(*df_size)
+ library.write('pandas', pn)
+ result = library.read('pandas').data
+ assert np.all(pn.values == result.values), str(pn.values) + "!=" + str(result.values)
+ for i in range(3):
+ assert np.all(pn.axes[i] == result.axes[i])
+ if None not in pn.axes[i].names:
+ assert np.all(pn.axes[i].names == result.axes[i].names), \
+ str(pn.axes[i].names) + "!=" + str(pn.axes[i].names)
+
+
+def test_save_read_ints(library):
+ ts1 = DataFrame(index=[dt(2012, 1, 1) + dtd(hours=x) for x in range(5)],
+ data={'col1':np.arange(5), 'col2':np.arange(5)})
+ ts1.index.name = 'index'
+ library.write('TEST_1', ts1)
+ ts2 = library.read('TEST_1').data
+ assert_frame_equal(ts1, ts2)
+
+
+def test_save_read_datetimes(library):
+ # FEF symbols have datetimes in the CLOSE_REVISION field. Handle specially.
+ ts1 = DataFrame(index=[dt(2012, 1, 1) + dtd(hours=x) for x in range(3)],
+ data={'field1': [1, 2, 3],
+ 'revision': [dt(2013, 1, 1), dt(2013, 1, 2), dt(2013, 1, 3)],
+ 'field2': [4, 5, 6]},
+ )
+ ts1.index.name = 'index'
+ library.write('TEST_1', ts1)
+ ts2 = library.read('TEST_1').data
+ assert_frame_equal(ts1, ts2)
+
+
+def test_labels(library):
+ ts1 = DataFrame(index=[dt(2012, 1, 1), dt(2012, 1, 2)],
+ data={'data': [1., 2.]})
+ ts1.index.name = 'some_index'
+ library.write('TEST_1', ts1)
+ ts2 = library.read('TEST_1').data
+ assert_frame_equal(ts1, ts2)
+
+
+def test_duplicate_labels(library):
+ ts1 = DataFrame(index=[dt(2012, 1, 1) + dtd(hours=x) for x in range(5)],
+ data=[[np.arange(5), np.arange(5, 10)]],
+ columns=['a', 'a']
+ )
+ library.write('TEST_1', ts1)
+ ts2 = library.read('TEST_1').data
+ assert_frame_equal(ts1, ts2)
+
+
+def test_no_labels(library):
+ ts1 = DataFrame(index=[dt(2012, 1, 1) + dtd(hours=x) for x in range(5)],
+ data=[[np.arange(5), np.arange(5, 10)]])
+ library.write('TEST_1', ts1)
+ ts2 = library.read('TEST_1').data
+ assert_frame_equal(ts1, ts2)
+
+
+@pytest.mark.xfail(reason='needs investigating')
+def test_no_index_labels(library):
+ ts1 = DataFrame(index=[dt(2012, 1, 1), dt(2012, 1, 2)],
+ data={'data': [1., 2.]})
+ library.write('TEST_1', ts1)
+ ts2 = library.read('TEST_1').data
+ assert_frame_equal(ts1, ts2)
+
+
+def test_not_unique(library):
+ d = dt.now()
+ ts = DataFrame(index=[d, d], data={'near': [1., 2.]})
+ ts.index.name = 'index'
+ library.write('ts' , ts)
+ ts2 = library.read('ts').data
+ assert_frame_equal(ts, ts2)
diff --git a/tests/integration/store/test_pickle_store.py b/tests/integration/store/test_pickle_store.py
new file mode 100644
index 000000000..f3f27733b
--- /dev/null
+++ b/tests/integration/store/test_pickle_store.py
@@ -0,0 +1,94 @@
+import bson
+from datetime import datetime as dt, timedelta
+from mock import patch
+import numpy as np
+import re
+
+from arctic.arctic import Arctic
+
+
+def test_save_read_bson(library):
+ blob = {'foo': dt(2015, 1, 1), 'bar': ['a', 'b', ['x', 'y', 'z']]}
+ library.write('BLOB', blob)
+ saved_blob = library.read('BLOB').data
+ assert blob == saved_blob
+
+
+def test_save_read_bson_object(library):
+ blob = {'foo': dt(2015, 1, 1), 'object': Arctic}
+ library.write('BLOB', blob)
+ saved_blob = library.read('BLOB').data
+ assert blob == saved_blob
+
+
+def test_get_info_bson_object(library):
+ blob = {'foo': dt(2015, 1, 1), 'object': Arctic}
+ library.write('BLOB', blob)
+ assert library._get_info('BLOB').startswith('Handler: PickleStore')
+
+
+def test_bson_large_object(library):
+ blob = {'foo': dt(2015, 1, 1), 'object': Arctic,
+ 'large_thing': np.random.rand(2.1 * 1024 * 1024).tostring()}
+ assert len(blob['large_thing']) > 16 * 1024 * 1024
+ library.write('BLOB', blob)
+ saved_blob = library.read('BLOB').data
+ assert blob == saved_blob
+
+
+def test_bson_leak_objects_delete(library):
+ blob = {'foo': dt(2015, 1, 1), 'object': Arctic}
+ library.write('BLOB', blob)
+ assert library._collection.count() == 1
+ assert library._collection.versions.count() == 1
+ library.delete('BLOB')
+ assert library._collection.count() == 0
+ assert library._collection.versions.count() == 0
+
+
+def test_bson_leak_objects_prune_previous(library):
+ blob = {'foo': dt(2015, 1, 1), 'object': Arctic}
+
+ yesterday = dt.utcnow() - timedelta(days=1, seconds=1)
+ _id = bson.ObjectId.from_datetime(yesterday)
+ with patch("bson.ObjectId", return_value=_id):
+ library.write('BLOB', blob)
+ assert library._collection.count() == 1
+ assert library._collection.versions.count() == 1
+
+ _id = bson.ObjectId.from_datetime(dt.utcnow() - timedelta(minutes=130))
+ with patch("bson.ObjectId", return_value=_id):
+ library.write('BLOB', {}, prune_previous_version=False)
+ assert library._collection.count() == 1
+ assert library._collection.versions.count() == 2
+
+ # This write should pruned the oldest version in the chunk collection
+ library.write('BLOB', {})
+ assert library._collection.count() == 0
+ assert library._collection.versions.count() == 2
+
+
+def test_prune_previous_doesnt_kill_other_objects(library):
+ blob = {'foo': dt(2015, 1, 1), 'object': Arctic}
+
+ yesterday = dt.utcnow() - timedelta(days=1, seconds=1)
+ _id = bson.ObjectId.from_datetime(yesterday)
+ with patch("bson.ObjectId", return_value=_id):
+ library.write('BLOB', blob, prune_previous_version=False)
+ assert library._collection.count() == 1
+ assert library._collection.versions.count() == 1
+
+ _id = bson.ObjectId.from_datetime(dt.utcnow() - timedelta(hours=10))
+ with patch("bson.ObjectId", return_value=_id):
+ library.write('BLOB', blob, prune_previous_version=False)
+ assert library._collection.count() == 1
+ assert library._collection.versions.count() == 2
+
+ # This write should pruned the oldest version in the chunk collection
+ library.write('BLOB', {})
+ assert library._collection.count() == 1
+ assert library._collection.versions.count() == 2
+
+ library._delete_version('BLOB', 2)
+ assert library._collection.count() == 0
+ assert library._collection.versions.count() == 1
diff --git a/tests/integration/store/test_version_store.py b/tests/integration/store/test_version_store.py
new file mode 100644
index 000000000..dd2ed7291
--- /dev/null
+++ b/tests/integration/store/test_version_store.py
@@ -0,0 +1,858 @@
+import bson
+from bson.son import SON
+from datetime import datetime as dt, timedelta as dtd
+import pandas as pd
+from pandas.util.testing import assert_frame_equal
+from pymongo.errors import OperationFailure
+from pymongo.read_preferences import ReadPreference
+from pymongo.server_type import SERVER_TYPE
+from datetime import datetime
+from mock import patch
+import time
+import pytest
+
+from arctic.exceptions import NoDataFoundException, DuplicateSnapshotException
+
+from ...util import read_str_as_pandas
+
+
+ts1 = read_str_as_pandas(""" times | near
+ 2012-09-08 17:06:11.040 | 1.0
+ 2012-10-08 17:06:11.040 | 2.0
+ 2012-10-09 17:06:11.040 | 2.5
+ 2012-11-08 17:06:11.040 | 3.0""")
+
+ts2 = read_str_as_pandas(""" times | near
+ 2012-09-08 17:06:11.040 | 1.0
+ 2012-10-08 17:06:11.040 | 4.0
+ 2012-10-09 17:06:11.040 | 4.5
+ 2012-10-10 17:06:11.040 | 5.0
+ 2012-11-08 17:06:11.040 | 3.0""")
+
+ts1_append = read_str_as_pandas(""" times | near
+ 2012-09-08 17:06:11.040 | 1.0
+ 2012-10-08 17:06:11.040 | 2.0
+ 2012-10-09 17:06:11.040 | 2.5
+ 2012-11-08 17:06:11.040 | 3.0
+ 2012-11-09 17:06:11.040 | 3.0""")
+
+
+symbol = 'TS1'
+
+
+from pymongo.cursor import _QUERY_OPTIONS
+from pymongo.message import query as __query
+def _query(allow_secondary, library_name):
+ def _internal_query(options, *args, **kwargs):
+ coll_name = args[0]
+ data_coll_name = 'arctic_{}'.format(library_name)
+ versions_coll_name = data_coll_name + '.versions'
+ if allow_secondary and coll_name in (data_coll_name, versions_coll_name):
+ # Reads to the Version and Chunks collections are allowed to slaves
+ assert bool(options & _QUERY_OPTIONS['slave_okay']) == allow_secondary, "{}: options:{}".format(coll_name, options)
+ elif '.$cmd' not in coll_name:
+ # All other collections we force PRIMARY read.
+ assert bool(options & _QUERY_OPTIONS['slave_okay']) == False, "{}: options:{}".format(coll_name, options)
+ return __query(options, *args, **kwargs)
+ return _internal_query
+
+
+# MongoDB always sets slaveOk when talking to a single server.
+# Pretend we're a mongos for the tests that care...
+#
+# A _Query's slaveOk bit is already set for queries with non-primary
+# read preference. If this is a direct connection to a mongod, override
+# and *always* set the slaveOk bit. See bullet point 2 in
+# server-selection.rst#topology-type-single.
+# set_slave_ok = (
+# topology.description.topology_type == TOPOLOGY_TYPE.Single
+# and server.description.server_type != SERVER_TYPE.Mongos)
+
+
+def test_store_item_new_version(library, library_name):
+ with patch('pymongo.message.query', side_effect=_query(False, library_name)), \
+ patch('pymongo.server_description.ServerDescription.server_type', SERVER_TYPE.Mongos):
+ library.write(symbol, ts1)
+ coll = library._collection
+ count = coll.count()
+ assert coll.versions.count() == 1
+
+ # No change to the TS
+ library.write(symbol, ts1, prune_previous_version=False)
+ assert coll.count() == count
+ assert coll.versions.count() == 2
+
+
+def test_store_item_read_preference(library_secondary, library_name):
+ with patch('arctic.arctic.ArcticLibraryBinding.check_quota'), \
+ patch('pymongo.message.query', side_effect=_query(False, library_name)) as query, \
+ patch('pymongo.server_description.ServerDescription.server_type', SERVER_TYPE.Mongos):
+ # write an item
+ library_secondary.write(symbol, ts1)
+ library_secondary.write(symbol, ts1_append, prune_previous_version=False)
+ # delete an individual version
+ library_secondary._delete_version(symbol, 1)
+ # delete the item entirely
+ library_secondary.delete(symbol)
+ assert query.call_count > 0
+
+
+def test_read_item_read_preference_SECONDARY(library_secondary, library_name):
+ # write an item
+ library_secondary.write(symbol, ts1)
+ with patch('pymongo.message.query', side_effect=_query(True, library_name)) as query, \
+ patch('pymongo.server_description.ServerDescription.server_type', SERVER_TYPE.Mongos):
+ library_secondary.read(symbol)
+ assert query.call_count > 0
+
+
+def test_query_falls_back_to_primary(library_secondary, library_name):
+ allow_secondary = [True]
+ def _query(options, *args, **kwargs):
+ # If we're allowing secondary read then raise when reading a chunk.
+ # We should attempt a call to primary only subsequently.
+ if args[0] == 'arctic_{}'.format(library_name) and \
+ bool(options & _QUERY_OPTIONS['slave_okay']) == True:
+ allow_secondary[0] = False
+ raise OperationFailure("some_error")
+ return __query(options, *args, **kwargs)
+
+ library_secondary.write(symbol, ts1)
+ with patch('pymongo.message.query', side_effect=_query), \
+ patch('pymongo.server_description.ServerDescription.server_type', SERVER_TYPE.Mongos):
+ assert library_secondary.read(symbol) is not None
+ # We raised at least once on a secondary read
+ assert allow_secondary[0] == False
+
+
+def test_store_item_metadata(library):
+ library.write(symbol, ts1, metadata={'key': 'value'})
+
+ after = library.read(symbol)
+
+ assert after.metadata['key'] == 'value'
+ assert after.version
+ assert_frame_equal(after.data, ts1)
+
+
+def test_read_metadata(library):
+ library.write(symbol, ts1, metadata={'key': 'value'})
+
+ after = library.read_metadata(symbol)
+
+ assert after.metadata['key'] == 'value'
+ assert after.version
+ assert after.data is None
+
+
+def test_read_metadata_throws_on_deleted_symbol(library):
+ library.write(symbol, ts1, metadata={'key': 'value'})
+ library.delete(symbol)
+
+ with pytest.raises(NoDataFoundException):
+ library.read_metadata(symbol)
+
+
+def test_store_item_and_update(library):
+ coll = library._collection
+
+ # Store the first timeseries
+ none = datetime.now()
+ time.sleep(1)
+ library.write(symbol, ts1)
+ original = datetime.now()
+
+ # Assertions:
+ assert coll.versions.count() == 1
+ assert_frame_equal(library.read(symbol).data, ts1)
+
+ # Update the TimeSeries
+ time.sleep(1)
+ library.write(symbol, ts2, prune_previous_version=False)
+ recent = datetime.now()
+
+ assert coll.versions.count() == 2
+ assert_frame_equal(library.read(symbol).data, ts2)
+
+ # Get the different versions of the DB
+ with pytest.raises(NoDataFoundException):
+ library.read(symbol, as_of=none)
+ assert_frame_equal(library.read(symbol, as_of=original).data, ts1)
+ assert_frame_equal(library.read(symbol, as_of=recent).data, ts2)
+
+ # Now push back in the original version
+ time.sleep(1)
+ library.write(symbol, ts1, prune_previous_version=False)
+
+ assert coll.versions.count() == 3
+ assert_frame_equal(library.read(symbol).data, ts1)
+
+ # Get the different versions of the DB
+ with pytest.raises(NoDataFoundException):
+ library.read(symbol, as_of=none)
+ assert_frame_equal(library.read(symbol, as_of=original).data, ts1)
+ assert_frame_equal(library.read(symbol, as_of=recent).data, ts2)
+ assert_frame_equal(library.read(symbol, as_of=datetime.now()).data, ts1)
+
+
+def test_append_update(library):
+ library.write(symbol, ts1)
+ library.snapshot('snap')
+
+ coll = library._collection
+
+ # Assertions:
+ assert coll.versions.count() == 1
+ assert_frame_equal(library.read(symbol).data, ts1)
+
+ # Append an item
+ dts = list(ts1.index)
+ dts.append(dts[-1] + dtd(days=1))
+ values = list(ts1.near.values)
+ values.append(47.)
+ ts2 = pd.DataFrame(index=dts, data=values, columns=ts1.columns)
+ ts2.index.name = ts1.index.name
+
+ # Saving ts2 shouldn't create any new chunks. Instead it should
+ # reuse the last chunk.
+ library.write(symbol, ts2, prune_previous_version=False)
+ assert coll.versions.count() == 2
+ assert_frame_equal(library.read(symbol, as_of='snap').data, ts1)
+ assert_frame_equal(library.read(symbol).data, ts2)
+
+ # We should be able to save a smaller timeseries too
+ # This isn't likely to happen, so we don't care too much about space saving
+ # just make sure we get it right.
+ library.write(symbol, ts1, prune_previous_version=False)
+ assert_frame_equal(library.read(symbol, as_of=1).data, ts1)
+ assert_frame_equal(library.read(symbol, as_of=2).data, ts2)
+ assert_frame_equal(library.read(symbol, as_of=3).data, ts1)
+
+ # Append an item, and add a whole new chunk
+ dts = list(ts2.index)
+ dts.append(dts[-1] + dtd(days=1))
+ dts.append(dts[-1] + dtd(days=40))
+ values = list(ts2.near.values)
+ values.append(47.)
+ values.append(53.)
+ ts3 = pd.DataFrame(index=dts, data=values, columns=ts1.columns)
+ ts3.index.name = ts1.index.name
+
+ library.write(symbol, ts3, prune_previous_version=False)
+ assert_frame_equal(library.read(symbol, as_of=1).data, ts1)
+ assert_frame_equal(library.read(symbol, as_of=2).data, ts2)
+ assert_frame_equal(library.read(symbol, as_of=3).data, ts1)
+ assert_frame_equal(library.read(symbol, as_of=4).data, ts3)
+
+ library.write(symbol, ts3, prune_previous_version=False)
+ assert_frame_equal(library.read(symbol, as_of=1).data, ts1)
+ assert_frame_equal(library.read(symbol, as_of=2).data, ts2)
+ assert_frame_equal(library.read(symbol, as_of=3).data, ts1)
+ assert_frame_equal(library.read(symbol, as_of=4).data, ts3)
+ assert_frame_equal(library.read(symbol, as_of=5).data, ts3)
+
+
+def test_append(library):
+ library.append(symbol, ts1, upsert=True)
+ library.append(symbol, ts1_append, upsert=True)
+ assert len(library.read(symbol).data) == len(ts1) + len(ts1_append)
+
+
+def test_append_should_overwrite_after_delete(library):
+ library.append(symbol, ts1, upsert=True)
+ library.append(symbol, ts1_append, upsert=True)
+ assert len(library.read(symbol).data) == len(ts1) + len(ts1_append)
+ library.delete(symbol)
+ library.append(symbol, ts1_append, upsert=True)
+ assert len(library.read(symbol).data) == len(ts1_append)
+
+
+def test_append_empty_ts(library):
+ library.append(symbol, ts1, upsert=True)
+ library.append(symbol, pd.DataFrame(), upsert=True)
+ assert len(library.read(symbol).data) == len(ts1)
+
+
+def test_query_version_as_of_int(library):
+ # Store the first timeseries
+ library.write(symbol, ts1)
+ library.write(symbol, ts2, prune_previous_version=False)
+
+ assert_frame_equal(library.read(symbol, as_of=1).data, ts1)
+ assert_frame_equal(library.read(symbol).data, ts2)
+
+
+def test_list_version(library):
+ assert len(list(library.list_versions(symbol))) == 0
+ dates = [None, None, None]
+ now = dt.utcnow()
+ for x in xrange(len(dates)):
+ dates[x] = now - dtd(minutes=130 - x)
+ with patch("bson.ObjectId", return_value=bson.ObjectId.from_datetime(dates[x])):
+ library.write(symbol, ts1, prune_previous_version=False)
+ assert len(list(library.list_versions(symbol))) == 3
+
+ library.write(symbol, ts1, prune_previous_version=True)
+ assert len(list(library.list_versions(symbol))) >= 2
+
+ versions = list(library.list_versions(symbol))
+ for i, x in enumerate([4, 3]):
+ assert versions[i]['symbol'] == symbol
+ assert versions[i]['date'] >= dates[i]
+ assert versions[i]['version'] == x
+
+
+def test_list_version_latest_only(library):
+ assert len(list(library.list_versions(symbol))) == 0
+ dates = [None, None, None]
+ now = dt.utcnow()
+ for x in xrange(len(dates)):
+ dates[x] = now - dtd(minutes=20 - x)
+ with patch("bson.ObjectId", return_value=bson.ObjectId.from_datetime(dates[x])):
+ library.write(symbol, ts1, prune_previous_version=False)
+ assert len(list(library.list_versions(symbol))) == 3
+
+ library.write(symbol, ts1, prune_previous_version=True)
+ assert len(list(library.list_versions(symbol, latest_only=True))) == 1
+
+ versions = list(library.list_versions(symbol))
+ for i, x in enumerate([4, ]):
+ assert versions[i]['symbol'] == symbol
+ assert versions[i]['date'] >= dates[i]
+ assert versions[i]['version'] == x
+
+
+def test_list_version_snapshot(library):
+ library.write('A', ts1)
+ library.snapshot('one')
+ library.write('B', ts2)
+ library.snapshot('two')
+ library.write('A', ts2)
+ library.snapshot('three')
+ library.write('C', ts2)
+
+ assert set(x['symbol'] for x in library.list_versions()) \
+ == set(['A', 'B', 'C'])
+
+ assert set(x['symbol'] for x in library.list_versions(snapshot='one')) \
+ == set(['A'])
+
+ assert set(x['symbol'] for x in library.list_versions(snapshot='two')) \
+ == set(['A', 'B'])
+
+ assert set(x['symbol'] for x in library.list_versions(snapshot='three')) \
+ == set(['A', 'B'])
+
+ assert [x['snapshots'] for x in library.list_versions(symbol='A')] \
+ == [['three', ], ['one', 'two']]
+
+ assert [x['snapshots'] for x in library.list_versions(symbol='B')] \
+ == [['two', 'three']]
+
+ assert all('parent' not in x for x in library.list_versions(symbol='C'))
+
+
+def test_delete_versions(library):
+ library.write(symbol, ts1)
+ library.write(symbol, ts2, prune_previous_version=False)
+ library.write(symbol, ts1, prune_previous_version=False)
+ library.write(symbol, ts2, prune_previous_version=False)
+
+ coll = library._collection
+
+ # Delete version 1 (ts1)
+ library._delete_version(symbol, 1)
+ assert_frame_equal(library.read(symbol, as_of=2).data, ts2)
+ assert_frame_equal(library.read(symbol, as_of=3).data, ts1)
+
+ library._delete_version(symbol, 2)
+ assert_frame_equal(library.read(symbol, as_of=3).data, ts1)
+ assert_frame_equal(library.read(symbol, as_of=4).data, ts2)
+
+ library._delete_version(symbol, 3)
+ assert_frame_equal(library.read(symbol).data, ts2)
+
+ library._delete_version(symbol, 4)
+ assert coll.count() == 0
+
+
+def test_delete_bson_versions(library):
+ coll = library._collection
+
+ a = [{'a':'b'}]
+ c = [{'c':'d'}]
+ library.write(symbol, a)
+ library.write(symbol, c, prune_previous_version=False)
+ library.write(symbol, a, prune_previous_version=False)
+ library.write(symbol, c, prune_previous_version=False)
+ assert coll.versions.count() == 4
+
+ library._delete_version(symbol, 1)
+ assert library.read(symbol, as_of=2).data == c
+ assert library.read(symbol, as_of=3).data == a
+ assert coll.versions.count() == 3
+
+ library._delete_version(symbol, 2)
+ assert library.read(symbol, as_of=3).data == a
+ assert library.read(symbol, as_of=4).data == c
+ assert coll.versions.count() == 2
+
+ library._delete_version(symbol, 3)
+ assert coll.versions.count() == 1
+ assert library.read(symbol).data == c
+
+ library._delete_version(symbol, 4)
+ assert coll.versions.count() == 0
+
+
+def test_delete_item_has_symbol(library):
+ library.write(symbol, ts1)
+ library.write(symbol, ts2, prune_previous_version=False)
+ library.write(symbol, ts1, prune_previous_version=False)
+ library.write(symbol, ts2, prune_previous_version=False)
+
+ library.delete(symbol)
+ for version in (1, 2, 3, 4, None):
+ with pytest.raises(NoDataFoundException):
+ library.read(symbol, version)
+
+ # Has symbol returns false - this should really be has_data
+ assert not library.has_symbol(symbol)
+ assert symbol not in library.list_symbols()
+ assert [x['version'] for x in library.list_versions(symbol)] == []
+
+
+def test_delete_item_snapshot(library):
+ library.write(symbol, ts1)
+ library.write(symbol, ts2, prune_previous_version=False)
+ library.write(symbol, ts1, prune_previous_version=False)
+ library.snapshot('snap')
+ library.write(symbol, ts2, prune_previous_version=False)
+
+ library.delete(symbol)
+
+ for version in (1, 2, 4, None):
+ with pytest.raises(NoDataFoundException):
+ library.read(symbol, version)
+
+ # Can get the version out of the snapshots
+ assert_frame_equal(library.read(symbol, 'snap').data, ts1)
+ assert_frame_equal(library.read(symbol, 3).data, ts1)
+
+ assert not library.has_symbol(symbol)
+ assert not library.has_symbol(symbol, as_of=2)
+ assert library.has_symbol(symbol, as_of=3)
+ assert symbol in library.list_symbols(all_symbols=True)
+ assert symbol in library.list_symbols(snapshot='snap')
+ assert symbol not in library.list_symbols()
+ assert sorted([x['version'] for x in library.list_versions(symbol)]) == [3, 5]
+
+ # Should be able to create another snapshot
+ library.snapshot('snap2')
+ with pytest.raises(NoDataFoundException):
+ library.read(symbol, 'snap2')
+ assert_frame_equal(library.read(symbol, 'snap').data, ts1)
+ assert symbol in library.list_symbols(snapshot='snap')
+ assert symbol not in library.list_symbols(snapshot='snap2')
+
+
+def test_has_symbol(library):
+ assert not library.has_symbol(symbol)
+ library.write(symbol, ts1)
+ assert library.has_symbol(symbol)
+
+
+def test_snapshot(library):
+ library.write(symbol, ts1)
+ library.snapshot('current')
+ library.write(symbol, ts2)
+ assert_frame_equal(library.read(symbol, as_of='current').data, ts1)
+ assert_frame_equal(library.read(symbol).data, ts2)
+ versions = library.list_versions(symbol)
+ assert versions[0]['snapshots'] == []
+ assert versions[1]['snapshots'] == ['current']
+
+ library.snapshot('new')
+ assert_frame_equal(library.read(symbol, as_of='current').data, ts1)
+ assert_frame_equal(library.read(symbol, as_of='new').data, ts2)
+ assert_frame_equal(library.read(symbol).data, ts2)
+ versions = library.list_versions(symbol)
+ assert versions[0]['snapshots'] == ['new']
+ assert versions[1]['snapshots'] == ['current']
+
+ # Replace the current version, and the snapshot shouldn't be deleted
+ library.write(symbol, ts1, prune_previous_version=True)
+ assert_frame_equal(library.read(symbol, as_of='current').data, ts1)
+ assert_frame_equal(library.read(symbol, as_of='new').data, ts2)
+ assert_frame_equal(library.read(symbol).data, ts1)
+ versions = library.list_versions(symbol)
+ assert versions[0]['snapshots'] == []
+ assert versions[1]['snapshots'] == ['new']
+ assert versions[2]['snapshots'] == ['current']
+
+
+def test_snapshot_exclusion(library):
+ library.write(symbol, ts1)
+ library.snapshot('current', skip_symbols=[symbol])
+ versions = list(library.list_versions(symbol))
+ assert len(versions) == 1
+ assert versions[0]['snapshots'] == []
+
+
+def test_snapshot_delete(library):
+ library.write(symbol, ts1)
+ library.snapshot('current')
+ library.write(symbol, ts2)
+
+ # We have two versions of the symbol
+ assert len(list(library.list_versions(symbol))) == 2
+ library.delete_snapshot('current')
+ # Data no longer referenced by snapshot
+ with pytest.raises(NoDataFoundException):
+ library.read(symbol, as_of='current')
+ # But still accessible through the version
+ assert_frame_equal(library.read(symbol, as_of=1).data, ts1)
+ assert_frame_equal(library.read(symbol, as_of=2).data, ts2)
+
+ # Snapshot again
+ library.snapshot('current')
+ library.write(symbol, ts1)
+ assert_frame_equal(library.read(symbol, as_of='current').data, ts2)
+
+
+def test_multiple_snapshots(library):
+ library.write(symbol, ts1)
+ library.snapshot('current')
+ library.write(symbol, ts2)
+ library.snapshot('current2')
+
+ assert 'current' in library.list_snapshots()
+ assert 'current2' in library.list_snapshots()
+
+ assert_frame_equal(library.read(symbol).data, ts2)
+ assert_frame_equal(library.read(symbol, as_of=1).data, ts1)
+ assert_frame_equal(library.read(symbol, as_of=2).data, ts2)
+ assert_frame_equal(library.read(symbol, as_of='current').data, ts1)
+ assert_frame_equal(library.read(symbol, as_of='current2').data, ts2)
+
+ library.delete_snapshot('current')
+ assert_frame_equal(library.read(symbol, as_of='current2').data, ts2)
+ library.delete_snapshot('current2')
+ assert len(list(library.list_versions(symbol))) == 2
+
+
+def test_delete_identical_snapshots(library):
+ library.write(symbol, ts1)
+ library.snapshot('current1')
+ library.snapshot('current2')
+ library.snapshot('current3')
+
+ library.delete_snapshot('current3')
+ assert_frame_equal(library.read(symbol, as_of='current2').data, ts1)
+ library.delete_snapshot('current1')
+ assert_frame_equal(library.read(symbol, as_of='current2').data, ts1)
+ assert_frame_equal(library.read(symbol).data, ts1)
+
+
+def test_list_snapshots(library):
+ library.write(symbol, ts1)
+ library.snapshot('current')
+ library.snapshot('current2')
+
+ assert 'current' in library.list_snapshots()
+ assert 'current2' in library.list_snapshots()
+
+
+def test_duplicate_snapshots(library):
+ library.write(symbol, ts1)
+ library.snapshot('current')
+ with pytest.raises(DuplicateSnapshotException):
+ library.snapshot('current')
+
+
+def test_prunes_multiple_versions(library):
+ coll = library._collection
+
+ a = [{'a':'b'}]
+ c = [{'c':'d'}]
+ # Create an ObjectId
+ now = dt.utcnow()
+ with patch("bson.ObjectId", return_value=bson.ObjectId.from_datetime(now - dtd(minutes=125))):
+ library.write(symbol, a, prune_previous_version=False)
+ with patch("bson.ObjectId", return_value=bson.ObjectId.from_datetime(now - dtd(minutes=122))):
+ library.write(symbol, c, prune_previous_version=False)
+ with patch("bson.ObjectId", return_value=bson.ObjectId.from_datetime(now - dtd(minutes=121))):
+ library.write(symbol, a, prune_previous_version=False)
+ with patch("bson.ObjectId", return_value=bson.ObjectId.from_datetime(now - dtd(minutes=119))):
+ library.write(symbol, c, prune_previous_version=False)
+ assert coll.versions.count() == 4
+
+ # Prunes all versions older than the most recent version that's older than 10 mins
+ library.write(symbol, a, prune_previous_version=True)
+ assert coll.versions.count() == 3
+ assert library.read(symbol, as_of=3).data == a
+ assert library.read(symbol, as_of=4).data == c
+ assert library.read(symbol, as_of=5).data == a
+
+
+def test_prunes_doesnt_prune_snapshots(library):
+ coll = library._collection
+
+ a = [{'a':'b'}]
+ c = [{'c':'d'}]
+ now = dt.utcnow()
+ with patch("bson.ObjectId", return_value=bson.ObjectId.from_datetime(now - dtd(minutes=125))):
+ library.write(symbol, a, prune_previous_version=False)
+ with patch("bson.ObjectId", return_value=bson.ObjectId.from_datetime(now - dtd(minutes=122))):
+ library.write(symbol, c, prune_previous_version=False)
+ library.snapshot('snap')
+ with patch("bson.ObjectId", return_value=bson.ObjectId.from_datetime(now - dtd(minutes=121))):
+ library.write(symbol, a, prune_previous_version=False)
+ with patch("bson.ObjectId", return_value=bson.ObjectId.from_datetime(now - dtd(minutes=119))):
+ library.write(symbol, c, prune_previous_version=False)
+ assert coll.versions.count() == 4
+
+ # Prunes all versions older than the most recent version that's older than 10 mins
+ library.write(symbol, a, prune_previous_version=True)
+ assert coll.versions.count() == 4
+ assert library.read(symbol, as_of='snap').data == c
+ assert library.read(symbol, as_of=3).data == a
+ assert library.read(symbol, as_of=4).data == c
+ assert library.read(symbol, as_of=5).data == a
+
+ # Remove the snapshot, the version should now be pruned
+ library.delete_snapshot('snap')
+ assert coll.versions.count() == 4
+ library.write(symbol, c, prune_previous_version=True)
+ assert coll.versions.count() == 4
+ assert library.read(symbol, as_of=4).data == c
+ assert library.read(symbol, as_of=5).data == a
+ assert library.read(symbol, as_of=6).data == c
+
+
+def test_prunes_multiple_versions_ts(library):
+ coll = library._collection
+
+ a = ts1
+ c = ts2
+ # Create an ObjectId
+ now = dt.utcnow()
+ with patch("bson.ObjectId", return_value=bson.ObjectId.from_datetime(now - dtd(minutes=125))):
+ library.write(symbol, a, prune_previous_version=False)
+ with patch("bson.ObjectId", return_value=bson.ObjectId.from_datetime(now - dtd(minutes=122))):
+ library.write(symbol, c, prune_previous_version=False)
+ with patch("bson.ObjectId", return_value=bson.ObjectId.from_datetime(now - dtd(minutes=121))):
+ library.write(symbol, a, prune_previous_version=False)
+ with patch("bson.ObjectId", return_value=bson.ObjectId.from_datetime(now - dtd(minutes=119))):
+ library.write(symbol, c, prune_previous_version=False)
+ assert coll.versions.count() == 4
+
+ # Prunes all versions older than the most recent version that's older than 10 mins
+ library.write(symbol, a, prune_previous_version=True)
+ assert coll.versions.count() == 3
+ assert_frame_equal(library.read(symbol, as_of=3).data, a)
+ assert_frame_equal(library.read(symbol, as_of=4).data, c)
+ assert_frame_equal(library.read(symbol, as_of=5).data, a)
+
+
+def test_prunes_doesnt_prune_snapshots_ts(library):
+ coll = library._collection
+
+ a = ts1
+ c = ts2
+ now = dt.utcnow()
+ with patch("bson.ObjectId", return_value=bson.ObjectId.from_datetime(now - dtd(minutes=125))):
+ library.write(symbol, a, prune_previous_version=False)
+ with patch("bson.ObjectId", return_value=bson.ObjectId.from_datetime(now - dtd(minutes=122))):
+ library.write(symbol, c, prune_previous_version=False)
+ library.snapshot('snap')
+ with patch("bson.ObjectId", return_value=bson.ObjectId.from_datetime(now - dtd(minutes=121))):
+ library.write(symbol, a, prune_previous_version=False)
+ with patch("bson.ObjectId", return_value=bson.ObjectId.from_datetime(now - dtd(minutes=119))):
+ library.write(symbol, c, prune_previous_version=False)
+ assert coll.versions.count() == 4
+
+ # Prunes all versions older than the most recent version that's older than 10 mins
+ library.write(symbol, a, prune_previous_version=True)
+ assert coll.versions.count() == 4
+ assert_frame_equal(library.read(symbol, as_of='snap').data, c)
+ assert_frame_equal(library.read(symbol, as_of=3).data, a)
+ assert_frame_equal(library.read(symbol, as_of=4).data, c)
+ assert_frame_equal(library.read(symbol, as_of=5).data, a)
+
+ # Remove the snapshot, the version should now be pruned
+ library.delete_snapshot('snap')
+ assert coll.versions.count() == 4
+ library.write(symbol, c, prune_previous_version=True)
+ assert coll.versions.count() == 4
+ assert_frame_equal(library.read(symbol, as_of=4).data, c)
+ assert_frame_equal(library.read(symbol, as_of=5).data, a)
+ assert_frame_equal(library.read(symbol, as_of=6).data, c)
+
+
+def test_prunes_multiple_versions_fully_different_tss(library):
+ coll = library._collection
+
+ a = ts1
+ b = ts2
+ c = b.copy()
+ c.index = [i + dtd(days=365) for i in c.index]
+ c.index.name = b.index.name
+ # Create an ObjectId
+ now = dt.utcnow()
+ with patch("bson.ObjectId", return_value=bson.ObjectId.from_datetime(now - dtd(minutes=125))):
+ library.write(symbol, a, prune_previous_version=False)
+ with patch("bson.ObjectId", return_value=bson.ObjectId.from_datetime(now - dtd(minutes=124))):
+ library.write(symbol, b, prune_previous_version=False)
+ with patch("bson.ObjectId", return_value=bson.ObjectId.from_datetime(now - dtd(minutes=122))):
+ library.write(symbol, c, prune_previous_version=False)
+ # a b and c versions above will be pruned a and b share months
+ with patch("bson.ObjectId", return_value=bson.ObjectId.from_datetime(now - dtd(minutes=121))):
+ library.write(symbol, c, prune_previous_version=False)
+ with patch("bson.ObjectId", return_value=bson.ObjectId.from_datetime(now - dtd(minutes=119))):
+ library.write(symbol, c, prune_previous_version=False)
+ assert coll.versions.count() == 5
+
+ # Prunes all versions older than the most recent version that's older than 10 mins
+ library.write(symbol, c, prune_previous_version=True)
+ assert_frame_equal(library.read(symbol, as_of=4).data, c)
+ assert_frame_equal(library.read(symbol, as_of=5).data, c)
+ assert_frame_equal(library.read(symbol, as_of=6).data, c)
+
+
+def test_prunes_doesnt_prune_snapshots_fully_different_tss(library):
+ coll = library._collection
+
+ a = ts1
+ b = ts2
+ c = b.copy()
+ c.index = [i + dtd(days=365) for i in c.index]
+ c.index.name = b.index.name
+ now = dt.utcnow()
+ with patch("bson.ObjectId", return_value=bson.ObjectId.from_datetime(now - dtd(minutes=125))):
+ library.write(symbol, a, prune_previous_version=False)
+ with patch("bson.ObjectId", return_value=bson.ObjectId.from_datetime(now - dtd(minutes=123))):
+ library.write(symbol, b, prune_previous_version=False)
+ with patch("bson.ObjectId", return_value=bson.ObjectId.from_datetime(now - dtd(minutes=122))):
+ library.write(symbol, c, prune_previous_version=False)
+ library.snapshot('snap')
+ with patch("bson.ObjectId", return_value=bson.ObjectId.from_datetime(now - dtd(minutes=121))):
+ library.write(symbol, c, prune_previous_version=False)
+ with patch("bson.ObjectId", return_value=bson.ObjectId.from_datetime(now - dtd(minutes=118))):
+ library.write(symbol, c, prune_previous_version=False)
+ with patch("bson.ObjectId", return_value=bson.ObjectId.from_datetime(now - dtd(minutes=119))):
+ library.write(symbol, c, prune_previous_version=False)
+ assert coll.versions.count() == 6
+
+ # Prunes all versions older than the most recent version that's older than 10 mins
+ library.write(symbol, c, prune_previous_version=True)
+ assert coll.versions.count() == 5
+ assert_frame_equal(library.read(symbol, as_of='snap').data, c)
+ assert_frame_equal(library.read(symbol, as_of=4).data, c)
+ assert_frame_equal(library.read(symbol, as_of=5).data, c)
+ assert_frame_equal(library.read(symbol, as_of=6).data, c)
+ assert_frame_equal(library.read(symbol, as_of=7).data, c)
+
+ library.delete_snapshot('snap')
+ assert coll.versions.count() == 5
+ library.write(symbol, c, prune_previous_version=True)
+ assert_frame_equal(library.read(symbol, as_of=4).data, c)
+ assert_frame_equal(library.read(symbol, as_of=5).data, c)
+ assert_frame_equal(library.read(symbol, as_of=6).data, c)
+ assert_frame_equal(library.read(symbol, as_of=7).data, c)
+
+
+def test_prunes_previous_version_append_interaction(library):
+ ts = ts1
+ ts2 = ts1.append(pd.DataFrame(index=[ts.index[-1] + dtd(days=1),
+ ts.index[-1] + dtd(days=2), ],
+ data=[3.7, 3.8],
+ columns=['near']))
+ ts2.index.name = ts1.index.name
+ ts3 = ts.append(pd.DataFrame(index=[ts2.index[-1] + dtd(days=1),
+ ts2.index[-1] + dtd(days=2)],
+ data=[4.8, 4.9],
+ columns=['near']))
+ ts3.index.name = ts1.index.name
+ ts4 = ts
+ ts5 = ts2
+ ts6 = ts3
+ now = dt.utcnow()
+ with patch("bson.ObjectId", return_value=bson.ObjectId.from_datetime(now - dtd(minutes=130)),
+ from_datetime=bson.ObjectId.from_datetime):
+ library.write(symbol, ts, prune_previous_version=False)
+ assert_frame_equal(ts, library.read(symbol).data)
+
+ with patch("bson.ObjectId", return_value=bson.ObjectId.from_datetime(now - dtd(minutes=129)),
+ from_datetime=bson.ObjectId.from_datetime):
+ library.write(symbol, ts2, prune_previous_version=False)
+ assert_frame_equal(ts, library.read(symbol, as_of=1).data)
+ assert_frame_equal(ts2, library.read(symbol).data)
+
+ with patch("bson.ObjectId", return_value=bson.ObjectId.from_datetime(now - dtd(minutes=128)),
+ from_datetime=bson.ObjectId.from_datetime):
+ library.write(symbol, ts3, prune_previous_version=False)
+ assert_frame_equal(ts, library.read(symbol, as_of=1).data)
+ assert_frame_equal(ts2, library.read(symbol, as_of=2).data)
+ assert_frame_equal(ts3, library.read(symbol).data)
+
+ with patch("bson.ObjectId", return_value=bson.ObjectId.from_datetime(now - dtd(minutes=127)),
+ from_datetime=bson.ObjectId.from_datetime):
+ library.write(symbol, ts4, prune_previous_version=False)
+ assert_frame_equal(ts, library.read(symbol, as_of=1).data)
+ assert_frame_equal(ts2, library.read(symbol, as_of=2).data)
+ assert_frame_equal(ts3, library.read(symbol, as_of=3).data)
+ assert_frame_equal(ts4, library.read(symbol).data)
+
+ with patch("bson.ObjectId", return_value=bson.ObjectId.from_datetime(now - dtd(minutes=126)),
+ from_datetime=bson.ObjectId.from_datetime):
+ library.write(symbol, ts5, prune_previous_version=False)
+ assert_frame_equal(ts, library.read(symbol, as_of=1).data)
+ assert_frame_equal(ts2, library.read(symbol, as_of=2).data)
+ assert_frame_equal(ts3, library.read(symbol, as_of=3).data)
+ assert_frame_equal(ts4, library.read(symbol, as_of=4).data)
+ assert_frame_equal(ts5, library.read(symbol).data)
+
+ with patch("bson.ObjectId", return_value=bson.ObjectId.from_datetime(now),
+ from_datetime=bson.ObjectId.from_datetime):
+ library.write(symbol, ts6, prune_previous_version=True)
+
+ with pytest.raises(NoDataFoundException):
+ library.read(symbol, as_of=1)
+ with pytest.raises(NoDataFoundException):
+ library.read(symbol, as_of=2)
+ with pytest.raises(NoDataFoundException):
+ library.read(symbol, as_of=3)
+ assert_frame_equal(ts5, library.read(symbol, as_of=5).data)
+ assert_frame_equal(ts6, library.read(symbol).data)
+
+
+def test_list_symbols(library):
+ library.snapshot('snap1')
+ library.write('asdf', {'foo':'bar'}, metadata={'a':1, 'b':10})
+ library.snapshot('snap2')
+ assert 'asdf' in library.list_symbols()
+ assert 'asdf' not in library.list_symbols(snapshot='snap1')
+ assert 'asdf' in library.list_symbols(snapshot='snap2')
+ assert 'asdf' in library.list_symbols(all_symbols=True)
+ assert 'asdf' in library.list_symbols(a=1)
+ assert library.list_symbols(a={'$gt': 5}) == []
+ assert library.list_symbols(b={'$gt': 5}) == ['asdf']
+
+
+def test_list_symbols_regex(library):
+ library.snapshot('snap1')
+ library.write('asdf', {'foo':'bar'}, metadata={'a':1, 'b':10})
+ library.write('furble', {'foo':'bar'}, metadata={'a':1, 'b':10})
+ library.snapshot('snap2')
+ assert 'asdf' in library.list_symbols(regex='asd')
+ assert 'furble' not in library.list_symbols(regex='asd')
+ assert 'asdf' not in library.list_symbols(snapshot='snap1', regex='asd')
+ assert 'asdf' in library.list_symbols(snapshot='snap2', regex='asd')
+ assert 'furble' not in library.list_symbols(snapshot='snap2', regex='asd')
+ assert 'asdf' in library.list_symbols(all_symbols=True, regex='asd')
+ assert 'furble' not in library.list_symbols(all_symbols=True, regex='asd')
+ assert 'asdf' in library.list_symbols(a=1, regex='asd')
+ assert 'furble' not in library.list_symbols(a=1, regex='asd')
+ assert library.list_symbols(a={'$gt': 5}, regex='asd') == []
+ assert library.list_symbols(b={'$gt': 5}, regex='asd') == ['asdf']
diff --git a/tests/integration/store/test_version_store_audit.py b/tests/integration/store/test_version_store_audit.py
new file mode 100644
index 000000000..897f3d269
--- /dev/null
+++ b/tests/integration/store/test_version_store_audit.py
@@ -0,0 +1,215 @@
+from bson import ObjectId
+from datetime import datetime as dt
+from mock import patch
+from pandas.util.testing import assert_frame_equal
+from pymongo.errors import OperationFailure
+import pytest
+
+from arctic.store.audit import ArcticTransaction
+from arctic.exceptions import ConcurrentModificationException, NoDataFoundException
+
+from ...util import read_str_as_pandas
+
+
+ts1 = read_str_as_pandas(""" times | near
+ 2012-09-08 17:06:11.040 | 1.0
+ 2012-10-08 17:06:11.040 | 2.0
+ 2012-10-09 17:06:11.040 | 2.5
+ 2012-11-08 17:06:11.040 | 3.0""")
+
+ts2 = read_str_as_pandas(""" times | near
+ 2012-09-08 17:06:11.040 | 1.0
+ 2012-10-08 17:06:11.040 | 4.0
+ 2012-10-09 17:06:11.040 | 4.5
+ 2012-10-10 17:06:11.040 | 5.0
+ 2012-11-08 17:06:11.040 | 3.0""")
+
+ts3 = read_str_as_pandas(""" times | near
+ 2012-09-08 17:06:11.040 | 1.0
+ 2012-10-08 17:06:11.040 | 4.0
+ 2012-10-09 17:06:11.040 | 4.5
+ 2012-10-10 17:06:11.040 | 5.0
+ 2012-11-08 17:06:11.040 | 3.0
+ 2012-11-09 17:06:11.040 | 44.0""")
+
+ts1_append = read_str_as_pandas(""" times | near
+ 2012-09-08 17:06:11.040 | 1.0
+ 2012-10-08 17:06:11.040 | 2.0
+ 2012-10-09 17:06:11.040 | 2.5
+ 2012-11-08 17:06:11.040 | 3.0
+ 2012-11-09 17:06:11.040 | 3.0""")
+
+symbol = 'TS1'
+
+
+def test_ArcticTransaction_can_do_first_writes(library):
+ with ArcticTransaction(library, 'SYMBOL_NOT_HERE', 'user', 'log') as cwb:
+ cwb.write('SYMBOL_NOT_HERE', ts1)
+ wrote_vi = library.read('SYMBOL_NOT_HERE')
+ assert_frame_equal(wrote_vi.data, ts1)
+
+
+def test_ArcticTransaction_detects_concurrent_writes(library):
+ library.write('FOO', ts1)
+
+ from threading import Event, Thread
+ e1 = Event()
+ e2 = Event()
+
+ def losing_writer():
+ #will attempt to write version 2, should find that version 2 is there and it ends up writing version 3
+ with pytest.raises(ConcurrentModificationException):
+ with ArcticTransaction(library, 'FOO', 'user', 'log') as cwb:
+ cwb.write('FOO', ts1_append, metadata={'foo': 'bar'})
+ e1.wait()
+
+ def winning_writer():
+ #will attempt to write version 2 as well
+ with ArcticTransaction(library, 'FOO', 'user', 'log') as cwb:
+ cwb.write('FOO', ts2, metadata={'foo': 'bar'})
+ e2.wait()
+
+ t1 = Thread(target=losing_writer)
+ t2 = Thread(target=winning_writer)
+ t1.start()
+ t2.start()
+
+ # both read the same timeseries and are locked doing some 'work'
+ e2.set()
+ # t2 should now be able to finish
+ t2.join()
+ e1.set()
+ t1.join()
+
+ # we're expecting the losing_writer to undo its write once it realises that it wrote v3 instead of v2
+ wrote_vi = library.read('FOO')
+ assert_frame_equal(wrote_vi.data, ts2)
+ assert {'foo': 'bar'} == wrote_vi.metadata
+
+
+def test_audit_writes(library):
+ with ArcticTransaction(library, symbol, 'u1', 'l1') as mt:
+ mt.write(symbol, ts1)
+
+ with ArcticTransaction(library, symbol, 'u2', 'l2') as mt:
+ mt.write(symbol, ts2)
+
+ audit_log = library.read_audit_log(symbol)
+ assert audit_log == [{u'new_v': 2, u'symbol': u'TS1', u'message': u'l2', u'user': u'u2', u'orig_v': 1},
+ {u'new_v': 1, u'symbol': u'TS1', u'message': u'l1', u'user': u'u1', u'orig_v': 0}]
+ assert_frame_equal(ts1, library.read(symbol, audit_log[0]['orig_v']).data)
+ assert_frame_equal(ts2, library.read(symbol, audit_log[0]['new_v']).data)
+
+
+def test_metadata_changes_writes(library):
+ with ArcticTransaction(library, symbol, 'u1', 'l1') as mt:
+ mt.write(symbol, ts1, metadata={'original': 'data'})
+
+ with ArcticTransaction(library, symbol, 'u2', 'l2') as mt:
+ mt.write(symbol, ts1, metadata={'some': 'data', 'original': 'data'})
+
+ audit_log = library.read_audit_log(symbol)
+ assert audit_log == [{u'new_v': 2, u'symbol': u'TS1', u'message': u'l2', u'user': u'u2', u'orig_v': 1},
+ {u'new_v': 1, u'symbol': u'TS1', u'message': u'l1', u'user': u'u1', u'orig_v': 0}]
+ assert_frame_equal(ts1, library.read(symbol, audit_log[0]['orig_v']).data)
+ assert_frame_equal(ts1, library.read(symbol, audit_log[0]['new_v']).data)
+
+ assert library.read(symbol, audit_log[0]['orig_v']).metadata == {'original': 'data'}
+ assert library.read(symbol, audit_log[0]['new_v']).metadata == {'some': 'data', 'original': 'data'}
+
+
+def test_cleanup_orphaned_versions_integration(library):
+ _id = ObjectId.from_datetime(dt(2013, 1, 1))
+ with patch('bson.ObjectId', return_value=_id):
+ with ArcticTransaction(library, symbol, 'u1', 'l1') as mt:
+ mt.write(symbol, ts1)
+ assert library._versions.find({'parent': {'$size': 1}}).count() == 1
+ library._cleanup_orphaned_versions(False)
+ assert library._versions.find({'parent': {'$size': 1}}).count() == 1
+
+
+def test_corrupted_read_writes_new(library):
+ with ArcticTransaction(library, symbol, 'u1', 'l1') as mt:
+ mt.write(symbol, ts1)
+
+ res = library.read(symbol)
+ assert res.version == 1
+
+ with ArcticTransaction(library, symbol, 'u1', 'l2') as mt:
+ mt.write(symbol, ts2)
+
+ res = library.read(symbol)
+ assert res.version == 2
+
+ with patch.object(library, 'read') as l:
+ l.side_effect = OperationFailure('some failure')
+ with ArcticTransaction(library, symbol, 'u1', 'l2') as mt:
+ mt.write(symbol, ts3, metadata={'a': 1, 'b': 2})
+
+ res = library.read(symbol)
+ # Corrupted data still increments on write to next version correctly with new data
+ assert res.version == 3
+ assert_frame_equal(ts3, library.read(symbol, 3).data)
+ assert res.metadata == {'a': 1, 'b': 2}
+
+ with patch.object(library, 'read') as l:
+ l.side_effect = OperationFailure('some failure')
+ with ArcticTransaction(library, symbol, 'u1', 'l2') as mt:
+ mt.write(symbol, ts3, metadata={'a': 1, 'b': 2})
+
+ res = library.read(symbol)
+ # Corrupted data still increments to next version correctly with ts & metadata unchanged
+ assert res.version == 4
+ assert_frame_equal(ts3, library.read(symbol, 4).data)
+ assert res.metadata == {'a': 1, 'b': 2}
+
+
+def test_write_after_delete(library):
+ with ArcticTransaction(library, symbol, 'u1', 'l') as mt:
+ mt.write(symbol, ts1)
+ library.delete(symbol)
+
+ with ArcticTransaction(library, symbol, 'u1', 'l') as mt:
+ mt.write(symbol, ts1_append)
+ assert_frame_equal(library.read(symbol).data, ts1_append)
+
+
+def test_ArcticTransaction_write_skips_for_exact_match(library):
+ ts = read_str_as_pandas("""times | PX_LAST
+ 2014-10-31 21:30:00.000 | 204324.674
+ 2014-11-13 21:30:00.000 | 193964.45
+ 2014-11-14 21:30:00.000 | 193650.403""")
+
+ with ArcticTransaction(library, symbol, 'u1', 'l1') as mt:
+ mt.write(symbol, ts)
+
+ version = library.read(symbol).version
+
+ # try and store same TimeSeries again
+ with ArcticTransaction(library, symbol, 'u1', 'l2') as mt:
+ mt.write(symbol, ts)
+
+ assert library.read(symbol).version == version
+
+
+def test_ArcticTransaction_write_doesnt_skip_for_close_ts(library):
+ orig_ts = read_str_as_pandas("""times | PX_LAST
+ 2014-10-31 21:30:00.000 | 204324.674
+ 2014-11-13 21:30:00.000 | 193964.45
+ 2014-11-14 21:30:00.000 | 193650.403""")
+
+ with ArcticTransaction(library, symbol, 'u1', 'l1') as mt:
+ mt.write(symbol, orig_ts)
+
+ assert_frame_equal(library.read(symbol).data, orig_ts)
+
+ # try and store slighty different TimeSeries
+ new_ts = read_str_as_pandas("""times | PX_LAST
+ 2014-10-31 21:30:00.000 | 204324.672
+ 2014-11-13 21:30:00.000 | 193964.453
+ 2014-11-14 21:30:00.000 | 193650.406""")
+
+ with ArcticTransaction(library, symbol, 'u1', 'l2') as mt:
+ mt.write(symbol, new_ts)
+
+ assert_frame_equal(library.read(symbol).data, new_ts)
diff --git a/tests/integration/test_arctic.py b/tests/integration/test_arctic.py
new file mode 100644
index 000000000..46a8fc4f7
--- /dev/null
+++ b/tests/integration/test_arctic.py
@@ -0,0 +1,146 @@
+from datetime import datetime as dt, timedelta as dtd
+from mock import patch
+from pandas import DataFrame
+from pandas.util.testing import assert_frame_equal
+import pytest
+import time
+import numpy as np
+
+from arctic.arctic import Arctic, VERSION_STORE
+from arctic.exceptions import LibraryNotFoundException, QuotaExceededException
+
+from ..util import get_large_ts
+
+
+def test_connect_to_Arctic_string(mongo_host):
+ arctic = Arctic(mongo_host=mongo_host)
+ assert arctic.list_libraries() == []
+ assert arctic.mongo_host == mongo_host
+
+
+def test_connect_to_Arctic_connection(mongodb, mongo_host):
+ arctic = Arctic(mongodb)
+ assert arctic.list_libraries() == []
+ assert arctic.mongo_host == mongo_host
+
+
+def test_simple(library):
+ sym = 'symbol'
+ data = get_large_ts(100)
+
+ library.write(sym, data)
+ orig = dt.now()
+ time.sleep(1) # Move the timestamp on 1ms
+ data2 = get_large_ts(100)
+ library.write(sym, data2, prune_previous_version=False)
+
+ # Get the timeseries, it should be the same
+ read2 = library.read(sym).data
+ assert_frame_equal(read2, data2)
+
+ # Ensure we can get the previous version
+ read = library.read(sym, as_of=orig).data
+ assert_frame_equal(read, data)
+
+
+def test_indexes(arctic):
+ c = arctic._conn
+ arctic.initialize_library("library", VERSION_STORE, segment='month')
+ chunk = c.arctic.library.index_information()
+ assert chunk == {u'_id_': {u'key': [(u'_id', 1)], u'ns': u'arctic.library', u'v': 1},
+ u'symbol_1_parent_1_segment_1': {u'background': True,
+ u'key': [(u'symbol', 1),
+ (u'parent', 1),
+ (u'segment', 1)],
+ u'ns': u'arctic.library',
+ u'unique': True,
+ u'v': 1},
+ u'symbol_1_sha_1': {u'background': True,
+ u'key': [(u'symbol', 1), (u'sha', 1)],
+ u'ns': u'arctic.library',
+ u'unique': True,
+ u'v': 1},
+ u'symbol_hashed': {u'background': True,
+ u'key': [(u'symbol', u'hashed')],
+ u'ns': u'arctic.library',
+ u'v': 1}}
+ snapshots = c.arctic.library.snapshots.index_information()
+ assert snapshots == {u'_id_': {u'key': [(u'_id', 1)],
+ u'ns': u'arctic.library.snapshots',
+ u'v': 1},
+ u'name_1': {u'background': True,
+ u'key': [(u'name', 1)],
+ u'ns': u'arctic.library.snapshots',
+ u'unique': True,
+ u'v': 1}}
+ versions = c.arctic.library.versions.index_information()
+ assert versions == {u'_id_': {u'key': [(u'_id', 1)],
+ u'ns': u'arctic.library.versions',
+ u'v': 1},
+ u'symbol_1__id_-1': {u'background': True,
+ u'key': [(u'symbol', 1), (u'_id', -1)],
+ u'ns': u'arctic.library.versions',
+ u'v': 1},
+ u'symbol_1_version_-1': {u'background': True,
+ u'key': [(u'symbol', 1), (u'version', -1)],
+ u'ns': u'arctic.library.versions',
+ u'unique': True,
+ u'v': 1}}
+ version_nums = c.arctic.library.version_nums.index_information()
+ assert version_nums == {u'_id_': {u'key': [(u'_id', 1)],
+ u'ns': u'arctic.library.version_nums',
+ u'v': 1},
+ u'symbol_1': {u'background': True,
+ u'key': [(u'symbol', 1)],
+ u'ns': u'arctic.library.version_nums',
+ u'unique': True,
+ u'v': 1}}
+
+
+def test_delete_library(arctic, library, library_name):
+ mongo = arctic._conn
+ # create a library2 library too - ensure that this isn't deleted
+ arctic.initialize_library('user.library2', VERSION_STORE, segment='month')
+ library.write('asdf', get_large_ts(1))
+ assert 'TEST' in mongo.arctic_test.collection_names()
+ assert 'TEST.versions' in mongo.arctic_test.collection_names()
+ assert 'library2' in mongo.arctic_user.collection_names()
+ assert 'library2.versions' in mongo.arctic_user.collection_names()
+
+ arctic.delete_library(library_name)
+ assert 'TEST' not in mongo.arctic_user.collection_names()
+ assert 'TEST.versions' not in mongo.arctic_user.collection_names()
+ with pytest.raises(LibraryNotFoundException):
+ arctic[library_name]
+ with pytest.raises(LibraryNotFoundException):
+ arctic['arctic_{}'.format(library_name)]
+ assert 'library2' in mongo.arctic_user.collection_names()
+ assert 'library2.versions' in mongo.arctic_user.collection_names()
+
+
+def test_quota(arctic, library, library_name):
+ thing = list(range(100))
+ library._arctic_lib.set_quota(10)
+ assert arctic.get_quota(library_name) == 10
+ assert library._arctic_lib.get_quota() == 10
+ library.write('thing', thing)
+ with pytest.raises(QuotaExceededException):
+ library.write('ts', thing)
+ library.write('ts', thing)
+ library.write('ts', thing)
+ library.write('ts', thing)
+ with pytest.raises(QuotaExceededException):
+ arctic.check_quota(library_name)
+
+
+def test_check_quota(arctic, library, library_name):
+ with patch('arctic.logging.logger.info') as info:
+ arctic.check_quota(library_name)
+ assert info.call_count == 1
+
+
+def test_default_mongo_retry_timout():
+ now = time.time()
+ with pytest.raises(LibraryNotFoundException):
+ Arctic('unresolved-host', serverSelectionTimeoutMS=0)['some.lib']
+ assert time.time() - now < 1.
diff --git a/tests/integration/test_compress_integration.py b/tests/integration/test_compress_integration.py
new file mode 100644
index 000000000..f084506ba
--- /dev/null
+++ b/tests/integration/test_compress_integration.py
@@ -0,0 +1,35 @@
+import random
+import lz4
+import string
+import pytest
+from datetime import datetime as dt
+
+import arctic._compress as c
+
+
+@pytest.mark.parametrize("n, length", [(300, 5e4), # micro TS
+ (5, 2e6), # Futures TS
+ (10, 2e6), # Futures TS
+ (100, 2e6), # Large TS
+ (250, 2e6)]) # Even Bigger TS
+def test_performance_sequential(n, length):
+ _str = random_string(length)
+ _strarr = [_str for _ in range(n)]
+ now = dt.now()
+ [c.decompress(y) for y in [c.compressHC(x) for x in _strarr]]
+ clz4_time = (dt.now() - now).total_seconds()
+ now = dt.now()
+ c.decompressarr(c.compressarrHC(_strarr))
+ clz4_time_p = (dt.now() - now).total_seconds()
+ now = dt.now()
+ [lz4.decompress(y) for y in [lz4.compressHC(x) for x in _strarr]]
+ lz4_time = (dt.now() - now).total_seconds()
+ print
+ print "LZ4 Test %sx len:%s" % (n, length)
+ print " Cython LZ4 %s s" % clz4_time
+ print " Cython LZ4 Parallel %s s" % clz4_time_p
+ print " LZ4 %s s" % lz4_time
+
+
+def random_string(N):
+ return ''.join(random.choice(list(string.printable) + ['hello', 'world', 'hellworld', 'Hello', 'w0rld']) for _ in xrange(int(N)))
diff --git a/tests/integration/test_decorators.py b/tests/integration/test_decorators.py
new file mode 100644
index 000000000..fd3e3f487
--- /dev/null
+++ b/tests/integration/test_decorators.py
@@ -0,0 +1,7 @@
+from arctic.decorators import _get_host
+
+
+def test_get_host_VersionStore(library, mongo_host):
+ assert _get_host(library) == {'mnodes': [mongo_host],
+ 'mhost': mongo_host,
+ 'l': u'arctic_test.TEST'}
diff --git a/tests/integration/test_howtos.py b/tests/integration/test_howtos.py
new file mode 100644
index 000000000..59477c464
--- /dev/null
+++ b/tests/integration/test_howtos.py
@@ -0,0 +1,13 @@
+import glob
+import fcntl
+import os
+import pytest
+import subprocess
+
+HOWTO_DIR = os.path.realpath(os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'howtos'))
+
+
+@pytest.mark.parametrize('howto', sorted([x.split('/')[-1]
+ for x in glob.glob(os.path.join(HOWTO_DIR, 'how_to_*.py'))]))
+def test_howto(howto, mongo_host):
+ execfile(HOWTO_DIR + "/" + howto, {'mongo_host': mongo_host})
diff --git a/tests/integration/tickstore/__init__.py b/tests/integration/tickstore/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/integration/tickstore/conftest.py b/tests/integration/tickstore/conftest.py
new file mode 100644
index 000000000..cfc4a8edc
--- /dev/null
+++ b/tests/integration/tickstore/conftest.py
@@ -0,0 +1,23 @@
+import pytest
+
+from arctic.tickstore import toplevel
+from arctic.tickstore import tickstore
+
+
+def pytest_generate_tests(metafunc):
+ if 'tickstore_lib' in metafunc.fixturenames:
+ metafunc.parametrize("tickstore_lib", ['tickstore'], indirect=True)
+
+
+@pytest.fixture(scope='function')
+def tickstore_lib(arctic, request):
+ if request.param == "tickstore":
+ store = tickstore
+ arctic.initialize_library('test.tickstore', store.TICK_STORE_TYPE)
+ return arctic['test.tickstore']
+
+
+@pytest.fixture(scope='function')
+def toplevel_tickstore(arctic):
+ arctic.initialize_library('test.toplevel_tickstore', toplevel.TICK_STORE_TYPE)
+ return arctic['test.toplevel_tickstore']
diff --git a/tests/integration/tickstore/test_toplevel.py b/tests/integration/tickstore/test_toplevel.py
new file mode 100644
index 000000000..b66703dea
--- /dev/null
+++ b/tests/integration/tickstore/test_toplevel.py
@@ -0,0 +1,170 @@
+from datetime import datetime as dt, timedelta as dtd
+from dateutil.rrule import rrule, DAILY
+import pytest
+import pandas as pd
+from pandas.util.testing import assert_frame_equal
+import numpy as np
+
+from arctic.date import DateRange, mktz
+from arctic.tickstore import toplevel
+from arctic.tickstore import tickstore
+from arctic.exceptions import NoDataFoundException, LibraryNotFoundException, OverlappingDataException
+
+
+FEED_2010_LEVEL1 = toplevel.TickStoreLibrary('FEED_2010.LEVEL1', DateRange(dt(2010, 1, 1), dt(2010, 12, 31, 23, 59, 59)))
+FEED_2011_LEVEL1 = toplevel.TickStoreLibrary('FEED_2011.LEVEL1', DateRange(dt(2011, 1, 1), dt(2011, 12, 31, 23, 59, 59)))
+FEED_2012_LEVEL1 = toplevel.TickStoreLibrary('FEED_2012.LEVEL1', DateRange(dt(2012, 1, 1), dt(2012, 12, 31, 23, 59, 59)))
+
+@pytest.mark.parametrize(('start', 'end', 'expected'),
+ [(dt(2010, 2, 1), dt(2010, 4, 1), [FEED_2010_LEVEL1]),
+ (dt(2011, 2, 1), dt(2011, 4, 1), [FEED_2011_LEVEL1]),
+ (dt(2010, 2, 1), dt(2011, 4, 1), [FEED_2010_LEVEL1, FEED_2011_LEVEL1]),
+ (dt(2011, 2, 1), dt(2012, 4, 1), [FEED_2011_LEVEL1, FEED_2012_LEVEL1]),
+ (dt(2010, 2, 1), dt(2012, 4, 1), [FEED_2010_LEVEL1, FEED_2011_LEVEL1, FEED_2012_LEVEL1]),
+ (dt(2009, 2, 1), dt(2010, 12, 31), [FEED_2010_LEVEL1]),
+ (dt(2012, 2, 1), dt(2013, 12, 31), [FEED_2012_LEVEL1]),
+ (dt(2009, 2, 1), dt(2009, 12, 31), []),
+ (dt(2013, 2, 1), dt(2013, 12, 31), []),
+ ])
+def should_return_libraries_for_the_given_daterange(toplevel_tickstore, start, end, expected):
+ toplevel_tickstore._collection.insert_one({'start': dt(2010, 1, 1),
+ 'end': dt(2010, 12, 31, 23, 59, 59),
+ 'library_name': 'FEED_2010.LEVEL1'})
+ toplevel_tickstore._collection.insert_one({'start': dt(2011, 1, 1),
+ 'end': dt(2011, 12, 31, 23, 59, 59),
+ 'library_name': 'FEED_2011.LEVEL1'})
+ toplevel_tickstore._collection.insert_one({'start': dt(2012, 1, 1),
+ 'end': dt(2012, 12, 31, 23, 59, 59),
+ 'library_name': 'FEED_2012.LEVEL1'})
+ libraries = toplevel_tickstore._get_library_metadata(DateRange(start=start, end=end))
+ assert libraries == expected
+
+
+def should_raise_exceptions_if_no_libraries_are_found_in_the_date_range_when_reading_data(toplevel_tickstore):
+ toplevel_tickstore._collection.insert_one({'start': dt(2010, 1, 1),
+ 'end': dt(2010, 12, 31, 23, 59, 59),
+ 'library_name': 'FEED_2010.LEVEL1'})
+ with pytest.raises(NoDataFoundException) as e:
+ toplevel_tickstore.read('blah', DateRange(start=dt(2012, 1, 1), end=dt(2012, 3, 1)))
+ assert "No underlying libraries exist for the given date range" in str(e)
+
+
+def should_return_data_when_date_range_falls_in_a_single_underlying_library(toplevel_tickstore, arctic):
+ arctic.initialize_library('FEED_2010.LEVEL1', tickstore.TICK_STORE_TYPE)
+ tickstore = arctic['FEED_2010.LEVEL1']
+ arctic.initialize_library('test_current.toplevel_tickstore', tickstore.TICK_STORE_TYPE)
+ tickstore_current = arctic['test_current.toplevel_tickstore']
+ toplevel_tickstore._collection.insert_one({'start': dt(2010, 1, 1),
+ 'end': dt(2010, 12, 31, 23, 59, 59),
+ 'library_name': 'FEED_2010.LEVEL1'})
+ dates = pd.date_range('20100101', periods=6, tz=mktz('Europe/London'))
+ df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))
+ tickstore.write('blah', df)
+ tickstore_current.write('blah', df)
+ res = toplevel_tickstore.read('blah', DateRange(start=dt(2010, 1, 1), end=dt(2010, 1, 6)), list('ABCD'))
+
+ assert_frame_equal(df, res.tz_localize(mktz('Europe/London')))
+
+
+def should_return_data_when_date_range_spans_libraries(toplevel_tickstore, arctic):
+ arctic.initialize_library('FEED_2010.LEVEL1', tickstore.TICK_STORE_TYPE)
+ arctic.initialize_library('FEED_2011.LEVEL1', tickstore.TICK_STORE_TYPE)
+ tickstore_2010 = arctic['FEED_2010.LEVEL1']
+ tickstore_2011 = arctic['FEED_2011.LEVEL1']
+ toplevel_tickstore._collection.insert_one({'start': dt(2010, 1, 1),
+ 'end': dt(2010, 12, 31, 23, 59, 59),
+ 'library_name': 'FEED_2010.LEVEL1'})
+ toplevel_tickstore._collection.insert_one({'start': dt(2011, 1, 1),
+ 'end': dt(2011, 12, 31, 23, 59, 59),
+ 'library_name': 'FEED_2011.LEVEL1'})
+ dates = pd.date_range('20100101', periods=6, tz=mktz('Europe/London'))
+ df_10 = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))
+ tickstore_2010.write('blah', df_10)
+ dates = pd.date_range('20110101', periods=6, tz=mktz('Europe/London'))
+ df_11 = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))
+ tickstore_2011.write('blah', df_11)
+ res = toplevel_tickstore.read('blah', DateRange(start=dt(2010, 1, 2), end=dt(2011, 1, 4)), list('ABCD'))
+ expected_df = pd.concat([df_10[1:], df_11[:4]])
+ assert_frame_equal(expected_df, res.tz_localize(mktz('Europe/London')))
+
+
+def should_add_underlying_library_where_none_exists(toplevel_tickstore, arctic):
+ arctic.initialize_library('FEED_2010.LEVEL1', tickstore.TICK_STORE_TYPE)
+ toplevel_tickstore.add(DateRange(start=dt(2010, 1, 1), end=dt(2010, 12, 31, 23, 59, 59, 999000)), 'FEED_2010.LEVEL1')
+ assert toplevel_tickstore._collection.find_one({'library_name': 'FEED_2010.LEVEL1'})
+
+
+def should_add_underlying_library_where_another_library_exists_in_a_non_overlapping_daterange(toplevel_tickstore, arctic):
+ toplevel_tickstore._collection.insert_one({'library_name': 'FEED_2011.LEVEL1', 'start': dt(2011, 1, 1), 'end': dt(2011, 12, 31)})
+ arctic.initialize_library('FEED_2010.LEVEL1', tickstore.TICK_STORE_TYPE)
+ toplevel_tickstore.add(DateRange(start=dt(2010, 1, 1), end=dt(2010, 12, 31, 23, 59, 59, 999000)), 'FEED_2010.LEVEL1')
+ assert set([ res['library_name'] for res in toplevel_tickstore._collection.find()]) == set(['FEED_2010.LEVEL1', 'FEED_2011.LEVEL1'])
+
+
+def should_raise_exception_if_library_does_not_exist(toplevel_tickstore):
+ with pytest.raises(LibraryNotFoundException) as e:
+ toplevel_tickstore.add(DateRange(start=dt(2010, 1, 1), end=dt(2010, 12, 31, 23, 59, 59, 999000)), 'FEED_2010.LEVEL1')
+ assert toplevel_tickstore._collection.find_one({'library_name': 'FEED_2010.LEVEL1'})
+ assert "Library FEED_2010.LEVEL1 was not correctly initialized" in str(e)
+
+
+def should_raise_exception_if_date_range_for_library_overlaps_with_existing_libraries(toplevel_tickstore, arctic):
+ toplevel_tickstore._collection.insert_one({'library_name': 'FEED_2010.LEVEL1', 'start': dt(2010, 1, 1), 'end': dt(2010, 6, 30)})
+ arctic.initialize_library('FEED_2010a.LEVEL1', tickstore.TICK_STORE_TYPE)
+ with pytest.raises(OverlappingDataException) as e:
+ toplevel_tickstore.add(DateRange(start=dt(2010, 6, 1), end=dt(2010, 12, 31, 23, 59, 59, 999000)), 'FEED_2010a.LEVEL1')
+ assert toplevel_tickstore._collection.find_one({'library_name': 'FEED_2010.LEVEL1'})
+ assert "There are libraries that overlap with the date range:" in str(e)
+
+
+def should_successfully_do_a_roundtrip_write_and_read_spanning_multiple_underlying_libraries(toplevel_tickstore, arctic):
+ arctic.initialize_library('FEED_2010.LEVEL1', tickstore.TICK_STORE_TYPE)
+ arctic.initialize_library('FEED_2011.LEVEL1', tickstore.TICK_STORE_TYPE)
+ arctic.initialize_library('test_current.toplevel_tickstore', tickstore.TICK_STORE_TYPE)
+ toplevel_tickstore.add(DateRange(start=dt(2010, 1, 1), end=dt(2010, 12, 31, 23, 59, 59, 999000)), 'FEED_2010.LEVEL1')
+ toplevel_tickstore.add(DateRange(start=dt(2011, 1, 1), end=dt(2011, 12, 31, 23, 59, 59, 999000)), 'FEED_2011.LEVEL1')
+ tickstore_current = arctic['test_current.toplevel_tickstore']
+ dates = pd.date_range('20101201', periods=57, tz=mktz('Europe/London'))
+ data = pd.DataFrame(np.random.randn(57, 4), index=dates, columns=list('ABCD'))
+ toplevel_tickstore.write('blah', data)
+ tickstore_current.write('blah', data)
+ res = toplevel_tickstore.read('blah', DateRange(start=dt(2010, 12, 1), end=dt(2011, 2, 1)), columns=list('ABCD'))
+ assert_frame_equal(data, res.tz_localize(mktz('Europe/London')))
+ lib2010 = arctic['FEED_2010.LEVEL1']
+ res = lib2010.read('blah', DateRange(start=dt(2010, 12, 1), end=dt(2011, 1, 1)), columns=list('ABCD'))
+ assert_frame_equal(data[dt(2010, 12, 1): dt(2010, 12, 31)], res.tz_localize(mktz('Europe/London')))
+ lib2011 = arctic['FEED_2011.LEVEL1']
+ res = lib2011.read('blah', DateRange(start=dt(2011, 1, 1), end=dt(2011, 2, 1)), columns=list('ABCD'))
+ assert_frame_equal(data[dt(2011, 1, 1): dt(2011, 2, 1)], res.tz_localize(mktz('Europe/London')))
+
+
+@pytest.mark.parametrize(('start', 'end', 'startr', 'endr'),
+ [(dt(2010, 1, 1), dt(2011, 12, 31), 0, 10),
+ (dt(2010, 1, 1), dt(2010, 12, 31), 0, 8),
+ (dt(2011, 1, 1), dt(2011, 12, 31), 7, 10),
+ ])
+def should_list_symbols_from_the_underlying_library(toplevel_tickstore, arctic, start, end, startr, endr):
+ arctic.initialize_library('FEED_2010.LEVEL1', tickstore.TICK_STORE_TYPE)
+ arctic.initialize_library('FEED_2011.LEVEL1', tickstore.TICK_STORE_TYPE)
+ toplevel_tickstore.add(DateRange(start=dt(2010, 1, 1), end=dt(2010, 12, 31, 23, 59, 59, 999000)), 'FEED_2010.LEVEL1')
+ toplevel_tickstore.add(DateRange(start=dt(2011, 1, 1), end=dt(2011, 12, 31, 23, 59, 59, 999000)), 'FEED_2011.LEVEL1')
+ dtstart = dt(2010, 1, 1, tzinfo=mktz('Europe/London'))
+ for i in range(10):
+ dates = pd.date_range(dtstart, periods=50, tz=mktz('Europe/London'))
+ df = pd.DataFrame(np.random.randn(50, 4), index=dates, columns=list('ABCD'))
+ dtstart = dates[-1] + dtd(days=1)
+ toplevel_tickstore.write('sym' + str(i), df)
+ expected_symbols = ['sym' + str(i) for i in range(startr, endr)]
+ assert expected_symbols == toplevel_tickstore.list_symbols(DateRange(start=start, end=end))
+
+
+def should_add_underlying_libraries_when_intialized(arctic):
+ arctic.initialize_library('FEED_2010.LEVEL1', tickstore.TICK_STORE_TYPE)
+ arctic.initialize_library('FEED_2011.LEVEL1', tickstore.TICK_STORE_TYPE)
+ arctic.initialize_library('FEED.LEVEL1', toplevel.TICK_STORE_TYPE)
+ toplevel_tickstore = arctic['FEED.LEVEL1']
+ cur = toplevel_tickstore._collection.find(projection={'_id': 0})
+ results = {result['library_name']: {'start': result['start'], 'end': result['end']} for result in cur}
+ expected_results = {'FEED_2010.LEVEL1': {'start': dt(2010, 1, 1), 'end': dt(2010, 12, 31, 23, 59, 59, 999000)},
+ 'FEED_2011.LEVEL1': {'start': dt(2011, 1, 1), 'end': dt(2011, 12, 31, 23, 59, 59, 999000)}}
+ assert expected_results == results
diff --git a/tests/integration/tickstore/test_ts_delete.py b/tests/integration/tickstore/test_ts_delete.py
new file mode 100644
index 000000000..dd8ecc5f9
--- /dev/null
+++ b/tests/integration/tickstore/test_ts_delete.py
@@ -0,0 +1,54 @@
+from datetime import datetime as dt
+from mock import patch
+import numpy as np
+from pandas.util.testing import assert_frame_equal
+import pytest
+
+from arctic import arctic as m
+from arctic.date import DateRange, CLOSED_OPEN, mktz
+from arctic.exceptions import OverlappingDataException, \
+ NoDataFoundException
+
+
+def test_delete(tickstore_lib):
+ DUMMY_DATA = [
+ {'a': 1.,
+ 'b': 2.,
+ 'index': dt(2013, 1, 1, tzinfo=mktz('Europe/London'))
+ },
+ {'a': 3.,
+ 'b': 4.,
+ 'index': dt(2013, 1, 30, tzinfo=mktz('Europe/London'))
+ },
+ ]
+ tickstore_lib.chunk_size = 1
+ tickstore_lib.write('SYM', DUMMY_DATA)
+ tickstore_lib.delete('SYM')
+ with pytest.raises(NoDataFoundException):
+ tickstore_lib.read('SYM', date_range=DateRange(20130102), columns=None)
+
+ # Delete with a date-range
+ tickstore_lib.write('SYM', DUMMY_DATA)
+ tickstore_lib.delete('SYM', DateRange(dt(2013, 1, 1, tzinfo=mktz('Europe/London')), dt(2013, 1, 2, tzinfo=mktz('Europe/London'))))
+ df = tickstore_lib.read('SYM', columns=None)
+ assert np.allclose(df['b'].values, np.array([4.]))
+
+
+def test_delete_daterange(tickstore_lib):
+ DUMMY_DATA = [
+ {'a': 1.,
+ 'b': 2.,
+ 'index': dt(2013, 1, 1, tzinfo=mktz('Europe/London'))
+ },
+ {'a': 3.,
+ 'b': 4.,
+ 'index': dt(2013, 2, 1, tzinfo=mktz('Europe/London'))
+ },
+ ]
+ tickstore_lib.chunk_size = 1
+ tickstore_lib.write('SYM', DUMMY_DATA)
+
+ # Delete with a date-range
+ tickstore_lib.delete('SYM', DateRange(dt(2013, 1, 1, tzinfo=mktz('Europe/London')), dt(2013, 2, 1, tzinfo=mktz('Europe/London')), CLOSED_OPEN))
+ df = tickstore_lib.read('SYM', columns=None)
+ assert np.allclose(df['b'].values, np.array([4.]))
diff --git a/tests/integration/tickstore/test_ts_read.py b/tests/integration/tickstore/test_ts_read.py
new file mode 100644
index 000000000..e16dc30f8
--- /dev/null
+++ b/tests/integration/tickstore/test_ts_read.py
@@ -0,0 +1,391 @@
+from datetime import datetime as dt
+from mock import patch
+import numpy as np
+from numpy.testing.utils import assert_array_equal
+from pandas.util.testing import assert_frame_equal
+import pandas as pd
+import pytest
+import pytz
+
+from arctic import arctic as m
+from arctic.date import DateRange, mktz, CLOSED_CLOSED, CLOSED_OPEN, OPEN_CLOSED, OPEN_OPEN
+from arctic.exceptions import OverlappingDataException, NoDataFoundException
+
+
+def test_read(tickstore_lib):
+ data = [{'ASK': 1545.25,
+ 'ASKSIZE': 1002.0,
+ 'BID': 1545.0,
+ 'BIDSIZE': 55.0,
+ 'CUMVOL': 2187387.0,
+ 'DELETED_TIME': 0,
+ 'INSTRTYPE': 'FUT',
+ 'PRICE': 1545.0,
+ 'SIZE': 1.0,
+ 'TICK_STATUS': 0,
+ 'TRADEHIGH': 1561.75,
+ 'TRADELOW': 1537.25,
+ 'index': 1185076787070},
+ {'CUMVOL': 354.0,
+ 'DELETED_TIME': 0,
+ 'PRICE': 1543.75,
+ 'SIZE': 354.0,
+ 'TRADEHIGH': 1543.75,
+ 'TRADELOW': 1543.75,
+ 'index': 1185141600600}]
+ tickstore_lib.write('FEED::SYMBOL', data)
+
+ df = tickstore_lib.read('FEED::SYMBOL', columns=['BID', 'ASK', 'PRICE'])
+
+ assert_array_equal(df['ASK'].values, np.array([1545.25, np.nan]))
+ assert_array_equal(df['BID'].values, np.array([1545, np.nan]))
+ assert_array_equal(df['PRICE'].values, np.array([1545, 1543.75]))
+ assert_array_equal(df.index.values, np.array(['2007-07-22T04:59:47.070000000+0100',
+ '2007-07-22T23:00:00.600000000+0100'], dtype='datetime64[ns]'))
+ assert tickstore_lib._collection.find_one()['c'] == 2
+
+
+def test_read_symbol_as_column(tickstore_lib):
+ data = [{'ASK': 1545.25,
+ 'index': 1185076787070},
+ {'CUMVOL': 354.0,
+ 'index': 1185141600600}]
+ tickstore_lib.write('FEED::SYMBOL', data)
+
+ df = tickstore_lib.read('FEED::SYMBOL', columns=['SYMBOL'])
+ assert all(df['SYMBOL'].values == ['FEED::SYMBOL'])
+
+
+def test_read_multiple_symbols(tickstore_lib):
+ data1 = [{'ASK': 1545.25,
+ 'ASKSIZE': 1002.0,
+ 'BID': 1545.0,
+ 'BIDSIZE': 55.0,
+ 'CUMVOL': 2187387.0,
+ 'DELETED_TIME': 0,
+ 'INSTRTYPE': 'FUT',
+ 'PRICE': 1545.0,
+ 'SIZE': 1.0,
+ 'TICK_STATUS': 0,
+ 'TRADEHIGH': 1561.75,
+ 'TRADELOW': 1537.25,
+ 'index': 1185076787070}, ]
+ data2 = [{'CUMVOL': 354.0,
+ 'DELETED_TIME': 0,
+ 'PRICE': 1543.75,
+ 'SIZE': 354.0,
+ 'TRADEHIGH': 1543.75,
+ 'TRADELOW': 1543.75,
+ 'index': 1185141600600}]
+
+ tickstore_lib.write('BAR', data2)
+ tickstore_lib.write('FOO', data1)
+
+ df = tickstore_lib.read(['FOO', 'BAR'], columns=['BID', 'ASK', 'PRICE'])
+
+ assert all(df['SYMBOL'].values == ['FOO', 'BAR'])
+ assert_array_equal(df['ASK'].values, np.array([1545.25, np.nan]))
+ assert_array_equal(df['BID'].values, np.array([1545, np.nan]))
+ assert_array_equal(df['PRICE'].values, np.array([1545, 1543.75]))
+ assert_array_equal(df.index.values, np.array(['2007-07-22T04:59:47.070000000+0100',
+ '2007-07-22T23:00:00.600000000+0100'], dtype='datetime64[ns]'))
+ assert tickstore_lib._collection.find_one()['c'] == 1
+
+
+
+@pytest.mark.parametrize('chunk_size', [1, 100])
+def test_read_all_cols_all_dtypes(tickstore_lib, chunk_size):
+ data = [{'f': 0.1,
+ 'of': 0.2,
+ 's': 's',
+ 'os': 'os',
+ 'l': 1,
+ 'ol': 2,
+ 'index': dt(1970, 1, 1, tzinfo=mktz('UTC')),
+ },
+ {'f': 0.3,
+ 'nf': 0.4,
+ 's': 't',
+ 'ns': 'ns',
+ 'l': 3,
+ 'nl': 4,
+ 'index': dt(1970, 1, 1, 0, 0, 1, tzinfo=mktz('UTC')),
+ },
+ ]
+ tickstore_lib.chunk_size = 3
+ tickstore_lib.write('sym', data)
+ df = tickstore_lib.read('sym', columns=None)
+
+ # The below is probably more trouble than it's worth, but we *should*
+ # be able to roundtrip data and get the same answer...
+
+ # Ints become floats
+ data[0]['l'] = float(data[0]['l'])
+ # Treat missing strings as None
+ data[0]['ns'] = None
+ data[1]['os'] = None
+ # Strip TZ from the data for the moment
+ data[0]['index'] = dt(1970, 1, 1)
+ data[1]['index'] = dt(1970, 1, 1, 0, 0, 1)
+ expected = pd.DataFrame(data)
+ expected = expected.set_index('index')
+ expected = expected[df.columns]
+ assert_frame_equal(expected, df, check_names=False)
+
+
+DUMMY_DATA = [
+ {'a': 1.,
+ 'b': 2.,
+ 'index': dt(2013, 1, 1, tzinfo=mktz('Europe/London'))
+ },
+ {'b': 3.,
+ 'c': 4.,
+ 'index': dt(2013, 1, 2, tzinfo=mktz('Europe/London'))
+ },
+ {'b': 5.,
+ 'c': 6.,
+ 'index': dt(2013, 1, 3, tzinfo=mktz('Europe/London'))
+ },
+ {'b': 7.,
+ 'c': 8.,
+ 'index': dt(2013, 1, 4, tzinfo=mktz('Europe/London'))
+ },
+ {'b': 9.,
+ 'c': 10.,
+ 'index': dt(2013, 1, 5, tzinfo=mktz('Europe/London'))
+ },
+ ]
+
+
+def test_date_range(tickstore_lib):
+ tickstore_lib.write('SYM', DUMMY_DATA)
+ df = tickstore_lib.read('SYM', date_range=DateRange(20130101, 20130103), columns=None)
+ assert_array_equal(df['a'].values, np.array([1, np.nan, np.nan]))
+ assert_array_equal(df['b'].values, np.array([2., 3., 5.]))
+ assert_array_equal(df['c'].values, np.array([np.nan, 4., 6.]))
+
+ tickstore_lib.delete('SYM')
+
+ # Chunk every 3 symbols and lets have some fun
+ tickstore_lib.chunk_size = 3
+ tickstore_lib.write('SYM', DUMMY_DATA)
+
+ with patch.object(tickstore_lib._collection, 'find', side_effect=tickstore_lib._collection.find) as f:
+ df = tickstore_lib.read('SYM', date_range=DateRange(20130101, 20130103), columns=None)
+ assert_array_equal(df['b'].values, np.array([2., 3., 5.]))
+ assert tickstore_lib._collection.find(f.call_args_list[-1][0][0]).count() == 1
+ df = tickstore_lib.read('SYM', date_range=DateRange(20130102, 20130103), columns=None)
+ assert_array_equal(df['b'].values, np.array([3., 5.]))
+ assert tickstore_lib._collection.find(f.call_args_list[-1][0][0]).count() == 1
+ df = tickstore_lib.read('SYM', date_range=DateRange(20130103, 20130103), columns=None)
+ assert_array_equal(df['b'].values, np.array([5.]))
+ assert tickstore_lib._collection.find(f.call_args_list[-1][0][0]).count() == 1
+
+ df = tickstore_lib.read('SYM', date_range=DateRange(20130102, 20130104), columns=None)
+ assert_array_equal(df['b'].values, np.array([3., 5., 7.]))
+ assert tickstore_lib._collection.find(f.call_args_list[-1][0][0]).count() == 2
+ df = tickstore_lib.read('SYM', date_range=DateRange(20130102, 20130105), columns=None)
+ assert_array_equal(df['b'].values, np.array([3., 5., 7., 9.]))
+ assert tickstore_lib._collection.find(f.call_args_list[-1][0][0]).count() == 2
+
+ df = tickstore_lib.read('SYM', date_range=DateRange(20130103, 20130104), columns=None)
+ assert_array_equal(df['b'].values, np.array([5., 7.]))
+ assert tickstore_lib._collection.find(f.call_args_list[-1][0][0]).count() == 2
+ df = tickstore_lib.read('SYM', date_range=DateRange(20130103, 20130105), columns=None)
+ assert_array_equal(df['b'].values, np.array([5., 7., 9.]))
+ assert tickstore_lib._collection.find(f.call_args_list[-1][0][0]).count() == 2
+
+ df = tickstore_lib.read('SYM', date_range=DateRange(20130104, 20130105), columns=None)
+ assert_array_equal(df['b'].values, np.array([7., 9.]))
+ assert tickstore_lib._collection.find(f.call_args_list[-1][0][0]).count() == 1
+
+ # Test the different open-closed behaviours
+ df = tickstore_lib.read('SYM', date_range=DateRange(20130104, 20130105, CLOSED_CLOSED), columns=None)
+ assert_array_equal(df['b'].values, np.array([7., 9.]))
+ df = tickstore_lib.read('SYM', date_range=DateRange(20130104, 20130105, CLOSED_OPEN), columns=None)
+ assert_array_equal(df['b'].values, np.array([7.]))
+ df = tickstore_lib.read('SYM', date_range=DateRange(20130104, 20130105, OPEN_CLOSED), columns=None)
+ assert_array_equal(df['b'].values, np.array([9.]))
+ df = tickstore_lib.read('SYM', date_range=DateRange(20130104, 20130105, OPEN_OPEN), columns=None)
+ assert_array_equal(df['b'].values, np.array([]))
+
+
+def test_date_range_end_not_in_range(tickstore_lib):
+ DUMMY_DATA = [
+ {'a': 1.,
+ 'b': 2.,
+ 'index': dt(2013, 1, 1, tzinfo=mktz('Europe/London'))
+ },
+ {'b': 3.,
+ 'c': 4.,
+ 'index': dt(2013, 1, 2, 10, 1, tzinfo=mktz('Europe/London'))
+ },
+ ]
+
+ tickstore_lib.chunk_size = 1
+ tickstore_lib.write('SYM', DUMMY_DATA)
+ with patch.object(tickstore_lib._collection, 'find', side_effect=tickstore_lib._collection.find) as f:
+ df = tickstore_lib.read('SYM', date_range=DateRange(20130101, dt(2013, 1, 2, 9, 0)), columns=None)
+ assert_array_equal(df['b'].values, np.array([2.]))
+ assert tickstore_lib._collection.find(f.call_args_list[-1][0][0]).count() == 1
+
+
+def test_date_range_no_bounds(tickstore_lib):
+ DUMMY_DATA = [
+ {'a': 1.,
+ 'b': 2.,
+ 'index': dt(2013, 1, 1, tzinfo=mktz('Europe/London'))
+ },
+ {'a': 3.,
+ 'b': 4.,
+ 'index': dt(2013, 1, 30, tzinfo=mktz('Europe/London'))
+ },
+ {'b': 5.,
+ 'c': 6.,
+ 'index': dt(2013, 2, 2, 10, 1, tzinfo=mktz('Europe/London'))
+ },
+ ]
+
+ tickstore_lib.chunk_size = 1
+ tickstore_lib.write('SYM', DUMMY_DATA)
+
+ # 1) No start, no end
+ df = tickstore_lib.read('SYM', columns=None)
+ assert_array_equal(df['b'].values, np.array([2., 4.]))
+ # 1.2) Start before the real start
+ df = tickstore_lib.read('SYM', date_range=DateRange(20121231), columns=None)
+ assert_array_equal(df['b'].values, np.array([2., 4.]))
+ # 2.1) Only go one month out
+ df = tickstore_lib.read('SYM', date_range=DateRange(20130101), columns=None)
+ assert_array_equal(df['b'].values, np.array([2., 4.]))
+ # 2.2) Only go one month out
+ df = tickstore_lib.read('SYM', date_range=DateRange(20130102), columns=None)
+ assert_array_equal(df['b'].values, np.array([4.]))
+ # 3) No start
+ df = tickstore_lib.read('SYM', date_range=DateRange(end=20130102), columns=None)
+ assert_array_equal(df['b'].values, np.array([2.]))
+ # 4) Outside bounds
+ df = tickstore_lib.read('SYM', date_range=DateRange(end=20131212), columns=None)
+ assert_array_equal(df['b'].values, np.array([2., 4., 5.]))
+
+
+def test_date_range_BST(tickstore_lib):
+ DUMMY_DATA = [
+ {'a': 1.,
+ 'b': 2.,
+ 'index': dt(2013, 6, 1, 12, 00, tzinfo=mktz('Europe/London'))
+ },
+ {'a': 3.,
+ 'b': 4.,
+ 'index': dt(2013, 6, 1, 13, 00, tzinfo=mktz('Europe/London'))
+ },
+ ]
+ tickstore_lib.chunk_size = 1
+ tickstore_lib.write('SYM', DUMMY_DATA)
+
+ df = tickstore_lib.read('SYM', columns=None)
+ assert_array_equal(df['b'].values, np.array([2., 4.]))
+
+# df = tickstore_lib.read('SYM', columns=None, date_range=DateRange(dt(2013, 6, 1, 12),
+# dt(2013, 6, 1, 13)))
+# assert_array_equal(df['b'].values, np.array([2., 4.]))
+ df = tickstore_lib.read('SYM', columns=None, date_range=DateRange(dt(2013, 6, 1, 12, tzinfo=mktz('Europe/London')),
+ dt(2013, 6, 1, 13, tzinfo=mktz('Europe/London'))))
+ assert_array_equal(df['b'].values, np.array([2., 4.]))
+
+ df = tickstore_lib.read('SYM', columns=None, date_range=DateRange(dt(2013, 6, 1, 12, tzinfo=mktz('UTC')),
+ dt(2013, 6, 1, 13, tzinfo=mktz('UTC'))))
+ assert_array_equal(df['b'].values, np.array([4., ]))
+
+
+def test_read_no_data(tickstore_lib):
+ with pytest.raises(NoDataFoundException):
+ tickstore_lib.read('missing_sym', DateRange(20131212, 20131212))
+
+
+def test_write_no_tz(tickstore_lib):
+ DUMMY_DATA = [
+ {'a': 1.,
+ 'b': 2.,
+ 'index': dt(2013, 6, 1, 12, 00)
+ }]
+ with pytest.raises(ValueError):
+ tickstore_lib.write('SYM', DUMMY_DATA)
+
+
+def test_read_out_of_order(tickstore_lib):
+ DUMMY_DATA = [
+ {'a': 1.,
+ 'b': 2.,
+ 'index': dt(2013, 6, 1, 12, 00, tzinfo=mktz('UTC'))
+ },
+ {'a': 3.,
+ 'b': 4.,
+ 'index': dt(2013, 6, 1, 11, 00, tzinfo=mktz('UTC')) # Out-of-order
+ },
+ {'a': 3.,
+ 'b': 4.,
+ 'index': dt(2013, 6, 1, 13, 00, tzinfo=mktz('UTC'))
+ },
+ ]
+ tickstore_lib.chunk_size = 3
+ tickstore_lib.write('SYM', DUMMY_DATA)
+ tickstore_lib.read('SYM', columns=None)
+ assert len(tickstore_lib.read('SYM', columns=None, date_range=DateRange(dt(2013, 6, 1, tzinfo=mktz('UTC')), dt(2013, 6, 2, tzinfo=mktz('UTC'))))) == 3
+ assert len(tickstore_lib.read('SYM', columns=None, date_range=DateRange(dt(2013, 6, 1, tzinfo=mktz('UTC')), dt(2013, 6, 1, 12, tzinfo=mktz('UTC'))))) == 2
+
+
+def test_read_longs(tickstore_lib):
+ DUMMY_DATA = [
+ {'a': 1,
+ 'index': dt(2013, 6, 1, 12, 00, tzinfo=mktz('Europe/London'))
+ },
+ {
+ 'b': 4,
+ 'index': dt(2013, 6, 1, 13, 00, tzinfo=mktz('Europe/London'))
+ },
+ ]
+ tickstore_lib.chunk_size = 3
+ tickstore_lib.write('SYM', DUMMY_DATA)
+ tickstore_lib.read('SYM', columns=None)
+ read = tickstore_lib.read('SYM', columns=None, date_range=DateRange(dt(2013, 6, 1), dt(2013, 6, 2)))
+ assert read['a'][0] == 1
+ assert np.isnan(read['b'][0])
+
+
+def test_read_with_image(tickstore_lib):
+ DUMMY_DATA = [
+ {'a': 1.,
+ 'index': dt(2013, 6, 1, 12, 00, tzinfo=mktz('Europe/London'))
+ },
+ {
+ 'b': 4.,
+ 'index': dt(2013, 6, 1, 13, 00, tzinfo=mktz('Europe/London'))
+ },
+ ]
+ # Add an image
+ tickstore_lib.write('SYM', DUMMY_DATA)
+ tickstore_lib._collection.update_one({},
+ {'$set':
+ {'im': {'i':
+ {'a': 37.,
+ 'c': 2.,
+ },
+ 't': dt(2013, 6, 1, 11, 0)
+ }
+ }
+ }
+ )
+
+ tickstore_lib.read('SYM', columns=None)
+ read = tickstore_lib.read('SYM', columns=None, date_range=DateRange(dt(2013, 6, 1), dt(2013, 6, 2)))
+ assert read['a'][0] == 1
+
+ # Read with the image as well
+ read = tickstore_lib.read('SYM', columns=None, date_range=DateRange(dt(2013, 6, 1), dt(2013, 6, 2)),
+ include_images=True)
+ assert read['a'][0] == 37
+ assert read['a'][1] == 1
+ assert np.isnan(read['b'][0])
+ assert read['b'][2] == 4
+ assert read.index[0] == dt(2013, 6, 1, 11)
diff --git a/tests/integration/tickstore/test_ts_write.py b/tests/integration/tickstore/test_ts_write.py
new file mode 100644
index 000000000..5484f2d42
--- /dev/null
+++ b/tests/integration/tickstore/test_ts_write.py
@@ -0,0 +1,77 @@
+from datetime import datetime as dt
+from mock import patch
+import numpy as np
+from pandas.util.testing import assert_frame_equal
+import pytest
+
+from arctic import arctic as m
+from arctic.date import mktz
+from arctic.exceptions import OverlappingDataException, \
+ NoDataFoundException
+
+
+DUMMY_DATA = [
+ {'a': 1.,
+ 'b': 2.,
+ 'index': dt(2013, 1, 1, tzinfo=mktz('Europe/London'))
+ },
+ {'b': 3.,
+ 'c': 4.,
+ 'index': dt(2013, 1, 2, tzinfo=mktz('Europe/London'))
+ },
+ {'b': 5.,
+ 'c': 6.,
+ 'index': dt(2013, 1, 3, tzinfo=mktz('Europe/London'))
+ },
+ {'b': 7.,
+ 'c': 8.,
+ 'index': dt(2013, 1, 4, tzinfo=mktz('Europe/London'))
+ },
+ {'b': 9.,
+ 'c': 10.,
+ 'index': dt(2013, 1, 5, tzinfo=mktz('Europe/London'))
+ },
+ ]
+
+
+def test_ts_write_simple(tickstore_lib):
+ assert tickstore_lib.stats()['chunks']['count'] == 0
+ tickstore_lib.write('SYM', DUMMY_DATA)
+ assert tickstore_lib.stats()['chunks']['count'] == 1
+ assert len(tickstore_lib.read('SYM')) == 5
+ assert tickstore_lib.list_symbols() == ['SYM']
+
+
+def test_overlapping_load(tickstore_lib):
+ data = DUMMY_DATA
+ tickstore_lib.write('SYM', DUMMY_DATA)
+ with pytest.raises(OverlappingDataException):
+ tickstore_lib.write('SYM', data)
+
+ data = DUMMY_DATA[2:]
+ with pytest.raises(OverlappingDataException):
+ tickstore_lib.write('SYM', data)
+
+ data = DUMMY_DATA[2:3]
+ with pytest.raises(OverlappingDataException):
+ tickstore_lib.write('SYM', data)
+
+ # overlapping at the beginning is ok
+ data = [DUMMY_DATA[0]]
+ tickstore_lib.write('SYM', data)
+
+ # overlapping at the end is ok
+ data = [DUMMY_DATA[-1]]
+ tickstore_lib.write('SYM', data)
+
+
+def test_ts_write_pandas(tickstore_lib):
+ data = DUMMY_DATA
+ tickstore_lib.write('SYM', data)
+
+ data = tickstore_lib.read('SYM', columns=None).tz_localize(mktz('Europe/London'))
+ tickstore_lib.delete('SYM')
+ tickstore_lib.write('SYM', data)
+
+ read = tickstore_lib.read('SYM', columns=None).tz_localize(mktz('Europe/London'))
+ assert_frame_equal(read, data, check_names=False)
diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/unit/date/__init__.py b/tests/unit/date/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/unit/date/test_daterange.py b/tests/unit/date/test_daterange.py
new file mode 100644
index 000000000..54835a95f
--- /dev/null
+++ b/tests/unit/date/test_daterange.py
@@ -0,0 +1,240 @@
+from datetime import datetime as dt
+import operator
+import pytest
+import itertools
+
+from arctic.date import DateRange, string_to_daterange, CLOSED_CLOSED, CLOSED_OPEN, OPEN_CLOSED, OPEN_OPEN
+
+
+test_ranges_for_bounding = {
+ "unbounded": (DateRange(),
+ None, None, True, None, None),
+ "unbounded_right": (DateRange('20110101'),
+ dt(2011, 1, 1), None, True, True, None),
+ "unbounded_left": (DateRange(None, '20111231'),
+ None, dt(2011, 12, 31), True, None, True),
+ "closed_by_default": (DateRange('20110101', '20111231'),
+ dt(2011, 1, 1), dt(2011, 12, 31), False, True, True),
+ "closed_explicitly": (DateRange('20110101', '20111231', CLOSED_CLOSED),
+ dt(2011, 1, 1), dt(2011, 12, 31), False, True, True),
+ "closed_open": (DateRange('20110101', '20111231', CLOSED_OPEN),
+ dt(2011, 1, 1), dt(2011, 12, 31), False, True, False),
+ "open_closed": (DateRange('20110101', '20111231', OPEN_CLOSED),
+ dt(2011, 1, 1), dt(2011, 12, 31), False, False, True),
+ "open_open": (DateRange('20110101', '20111231', OPEN_OPEN),
+ dt(2011, 1, 1), dt(2011, 12, 31), False, False, False),
+}
+test_ranges_for_bounding = sorted(test_ranges_for_bounding.iteritems(), key=operator.itemgetter(1))
+
+
+def eq_nan(*args):
+ if all(arg is None for arg in args):
+ return True
+ return all(arg == args[0] for arg in args[1:])
+
+
+@pytest.mark.parametrize(("dt_range", "start", "end", "is_unbounded", "start_in_range", "end_in_range"),
+ [i[1] for i in test_ranges_for_bounding],
+ ids=[i[0] for i in test_ranges_for_bounding])
+def test_daterange_bounding(dt_range, start, end, is_unbounded, start_in_range, end_in_range):
+ assert eq_nan(start, dt_range.start)
+ assert eq_nan(end, dt_range.end)
+ assert dt_range.unbounded is is_unbounded
+ assert dt_range.start is None or (start_in_range is (dt_range.start in dt_range))
+ assert dt_range.end is None or (end_in_range is (dt_range.end in dt_range))
+
+
+test_ranges_for_parse = [
+ [20110102, 20111231],
+ ['20110102', '20111231'],
+ ['2011-01-02', '2011-12-31'],
+ [dt(2011, 1, 2), dt(2011, 12, 31)],
+]
+
+@pytest.mark.parametrize("date_range", test_ranges_for_parse)
+def test_daterange_arg_parsing(date_range):
+ d1 = DateRange(date_range[0], date_range[1])
+ assert d1.start == dt(2011, 1, 2)
+ assert d1.end == dt(2011, 12, 31)
+ assert d1.unbounded is False
+
+
+def test_ambiguous_parse():
+ with pytest.raises(ValueError):
+ DateRange('02/01/2011')
+
+
+def test_daterange_eq():
+ d1 = DateRange('20110101', '20111231')
+ d2 = DateRange('20110101', '20111231')
+ assert d1 == d2
+ d1 = DateRange(None, '20111231')
+ d2 = DateRange(None, '20111231')
+ assert d1 == d2
+ d1 = DateRange('20111231', None)
+ d2 = DateRange('20111231', None)
+ assert d1 == d2
+ d1 = DateRange(None, None)
+ d2 = DateRange(None, None)
+ assert d1 == d2
+ d1 = DateRange('20110102', '20111231')
+ d2 = DateRange('20110101', '20111231')
+ assert not d1 == d2
+
+
+def test_daterange_hash():
+ d1 = DateRange('20110101', '20111231')
+ d2 = DateRange('20110101', '20111231')
+ assert hash(d1) == hash(d2)
+ d1 = DateRange(None, '20111231')
+ d2 = DateRange(None, '20111231')
+ assert hash(d1) == hash(d2)
+ d1 = DateRange('20111231', None)
+ d2 = DateRange('20111231', None)
+ assert hash(d1) == hash(d2)
+ d1 = DateRange(None, None)
+ d2 = DateRange(None, None)
+ assert hash(d1) == hash(d2)
+ d1 = DateRange('20110102', '20111231')
+ d2 = DateRange('20110101', '20111231')
+ assert not hash(d1) == hash(d2)
+
+
+def test_daterange_invalid_start():
+ with pytest.raises(TypeError) as ex:
+ DateRange(1.1, None)
+ assert "unsupported type for start" in str(ex.value)
+
+
+def test_daterange_invalid_end():
+ with pytest.raises(TypeError) as ex:
+ DateRange(None, object())
+ assert "unsupported type for end" in str(ex.value)
+
+
+def test_daterange_index():
+ start, end = dt(2000, 1, 1), dt(3000, 1, 1)
+ dr = DateRange(start, end)
+ assert dr[0] == start
+ assert dr[1] == end
+
+
+def test_daterange_index_error():
+ start, end = dt(2000, 1, 1), dt(3000, 1, 1)
+ dr = DateRange(start, end)
+ with pytest.raises(IndexError):
+ dr[None]
+ with pytest.raises(IndexError):
+ dr[3]
+
+
+def test_as_dates():
+ """Various permutations of datetime/None, and date/None values."""
+ dtime = dt(2010, 12, 13, 10, 30)
+ for testdt in [dtime, dtime.date()]:
+ vals = [testdt, None]
+ for start, end in itertools.product(vals, vals):
+ dr = DateRange(start, end)
+ dad = dr.as_dates()
+ if dr.start:
+ assert dad.start == dr.start.date() if isinstance(dr.start, dt) else dr.start
+ else:
+ assert not dad.start
+ if dr.end:
+ assert dad.end == dr.end.date() if isinstance(dr.end, dt) else dr.end
+ else:
+ assert not dad.end
+
+
+DR1 = DateRange('20110101', '20110102')
+DR2 = DateRange('201101011030', '201101021030')
+DR3 = DateRange('201101011030')
+DR4 = DateRange(None, '201101011030')
+DR5 = DateRange('201101011030')
+DR6 = DateRange('20110101', '20110102', OPEN_OPEN)
+DR7 = DateRange('20110101', '20110102', OPEN_CLOSED)
+DR7 = DateRange('20110101', '20110102', CLOSED_OPEN)
+
+STRING_DR_TESTS = [('20110101', DR1, DateRange(DR1.start.date(), DR1.end.date())),
+ ('20110101-20110102', DR1, DateRange(DR1.start.date(), DR1.end.date())),
+ ('201101011030', DR2, DateRange(DR2.start.date(), DR2.end.date())),
+ ('-201101011030', DR4, DateRange(None, DR2.start.date())),
+ ('201101011030-', DR5, DateRange(DR2.start.date())),
+ ('(20110101-20110102)', DR6, DateRange(DR6.start.date(), DR6.end.date(), DR6.interval)),
+ ('(20110101-20110102]', DR6, DateRange(DR6.start.date(), DR6.end.date(), DR6.interval)),
+ ('[20110101-20110102)', DR6, DateRange(DR6.start.date(), DR6.end.date(), DR6.interval)),
+ ('[20110101-20110102]', DR1, DateRange(DR1.start.date(), DR1.end.date(), DR1.interval)),
+ ]
+
+
+@pytest.mark.parametrize(['instr', 'expected_ts', 'expected_dt'], STRING_DR_TESTS)
+def test_string_to_daterange(instr, expected_ts, expected_dt):
+ assert string_to_daterange(instr) == expected_ts
+ assert string_to_daterange(instr, as_dates=True) == expected_dt
+
+
+def test_string_to_daterange_raises():
+ with pytest.raises(ValueError) as e:
+ string_to_daterange('20120101-20130101-20140101')
+ assert str(e.value) == "Too many dates in input string [20120101-20130101-20140101] with delimiter (-)"
+
+QUERY_TESTS = [(DateRange('20110101', '20110102'), {'$gte': dt(2011, 1, 1), '$lte': dt(2011, 1, 2)}),
+ (DateRange('20110101', '20110102', OPEN_OPEN), {'$gt': dt(2011, 1, 1), '$lt': dt(2011, 1, 2)}),
+ (DateRange('20110101', '20110102', OPEN_CLOSED), {'$gt': dt(2011, 1, 1), '$lte': dt(2011, 1, 2)}),
+ (DateRange('20110101', '20110102', CLOSED_OPEN), {'$gte': dt(2011, 1, 1), '$lt': dt(2011, 1, 2)}),
+ (DateRange('20110101', '20110102'), {'$gte': dt(2011, 1, 1), '$lte': dt(2011, 1, 2)}),
+ (DateRange('20110101', None), {'$gte': dt(2011, 1, 1)}),
+ (DateRange(None, '20110102'), {'$lte': dt(2011, 1, 2)}),
+ (DateRange(), {})]
+
+
+@pytest.mark.parametrize(['date_range', 'expected'], QUERY_TESTS)
+def test_mongo_query(date_range, expected):
+ assert date_range.mongo_query() == expected
+
+
+QUERY_TESTS_DB = [(DateRange('20110101', '20110102'), ('>=', dt(2011, 1, 1), '<=', dt(2011, 1, 2))),
+ (DateRange('20110101', '20110102', OPEN_OPEN), ('>', dt(2011, 1, 1), '<', dt(2011, 1, 2))),
+ (DateRange('20110101', '20110102', OPEN_CLOSED), ('>', dt(2011, 1, 1), '<=', dt(2011, 1, 2))),
+ (DateRange('20110101', '20110102', CLOSED_OPEN), ('>=', dt(2011, 1, 1), '<', dt(2011, 1, 2))),
+ (DateRange('20110101', '20110102'), ('>=', dt(2011, 1, 1), '<=', dt(2011, 1, 2))),
+ (DateRange('20110101', None), ('>=', dt(2011, 1, 1), '<=' , None)),
+ (DateRange(None, '20110102'), ('>=', None, '<=', dt(2011, 1, 2))),
+ (DateRange(), ('>=', None , '<=' , None))]
+@pytest.mark.parametrize(['date_range', 'expected'], QUERY_TESTS_DB)
+def test_get_date_bounds(date_range, expected):
+ assert date_range.get_date_bounds() == expected
+
+
+@pytest.mark.parametrize(["dr"], [(DR1,), (DR2,), (DR3,), (DR4,), (DR5,), (DR6,), (DR7,)])
+def test_intersection_with_self(dr):
+ assert dr == dr.intersection(dr)
+
+
+def test_intersection_returns_inner_boundaries():
+ # #start:
+ assert DateRange('20110103',).intersection(DateRange('20110102')).start == dt(2011, 1, 3)
+ assert DateRange('20110102',).intersection(DateRange('20110103')).start == dt(2011, 1, 3)
+ assert DateRange(None,).intersection(DateRange('20110103')).start == dt(2011, 1, 3)
+ assert DateRange('20110103').intersection(DateRange(None)).start == dt(2011, 1, 3)
+
+ # #end:
+ assert DateRange(None, '20110103',).intersection(DateRange(None, '20110102')).end == dt(2011, 1, 2)
+ assert DateRange(None, '20110102',).intersection(DateRange(None, '20110103')).end == dt(2011, 1, 2)
+ assert DateRange(None, None,).intersection(DateRange(None, '20110103')).end == dt(2011, 1, 3)
+ assert DateRange(None, '20110103').intersection(DateRange(None, None)).end == dt(2011, 1, 3)
+
+
+def test_intersection_preserves_boundaries():
+ # Non-matching boundaries
+ assert DateRange('20110101', '20110102', OPEN_OPEN) == DateRange('20110101', '20110103', OPEN_CLOSED).intersection(DateRange('20110101', '20110102', OPEN_OPEN))
+ assert DateRange('20110101', '20110102', OPEN_OPEN) == DateRange('20110101', '20110102', OPEN_OPEN).intersection(DateRange('20110101', '20110103', OPEN_CLOSED))
+ assert DateRange('20110102', '20110103', OPEN_OPEN) == DateRange('20110102', '20110103', OPEN_OPEN).intersection(DateRange('20110101', '20110103', CLOSED_OPEN))
+
+ assert DateRange('20110102', '20110103', CLOSED_OPEN) == DateRange('20110102', '20110103', CLOSED_OPEN).intersection(DateRange('20110101', '20110103', CLOSED_OPEN))
+ assert DateRange('20110102', '20110103', CLOSED_OPEN) == DateRange('20110101', '20110103', CLOSED_OPEN).intersection(DateRange('20110102', '20110103', CLOSED_OPEN))
+
+ # Matching boundaries
+ assert DateRange('20110101', '20110102', OPEN_OPEN) == DateRange('20110101', '20110102', CLOSED_OPEN).intersection(DateRange('20110101', '20110102', OPEN_OPEN))
+ assert DateRange('20110101', '20110102', OPEN_OPEN) == DateRange('20110101', '20110102', OPEN_OPEN).intersection(DateRange('20110101', '20110102', OPEN_CLOSED))
+
diff --git a/tests/unit/date/test_datetime_to_ms_roundtrip.py b/tests/unit/date/test_datetime_to_ms_roundtrip.py
new file mode 100644
index 000000000..e0358b657
--- /dev/null
+++ b/tests/unit/date/test_datetime_to_ms_roundtrip.py
@@ -0,0 +1,80 @@
+import pytest
+import datetime
+from datetime import datetime as dt
+import pytz
+from arctic.date import mktz, datetime_to_ms, ms_to_datetime
+
+
+def assert_roundtrip(tz):
+ ts = datetime.datetime(1982, 7, 1, 16, 5)
+
+ ts1 = ts.replace(tzinfo=tz)
+ ts2 = ms_to_datetime(datetime_to_ms(ts1.astimezone(mktz("UTC"))), tz)
+ ts1 = ts1.replace(tzinfo=None) if tz == mktz() else ts1
+ #logger.info(ts2.tzinfo)
+
+ assert(ts2.hour == ts1.hour)
+# assert(ts2.tzinfo == ts1.tzinfo)
+ assert ts2 == ts1
+
+
+def get_tz():
+ #tz = mktz("Europe/London")
+ #tz = pytz.timezone("Europe/London")
+ #tz = pytz.timezone("UTC")
+ tz = pytz.timezone("Europe/London")
+ tmp = ms_to_datetime(0, tz)
+ tz = tmp.tzinfo
+ return tz
+
+
+def test_UTC_roundtrip():
+ tz = pytz.timezone("UTC")
+ assert_roundtrip(tz)
+
+
+def test_weird_get_tz_London():
+ tz = get_tz()
+ assert_roundtrip(tz)
+
+
+@pytest.mark.xfail
+def test_pytz_London():
+ # Don't use pytz
+ tz = pytz.timezone("Europe/London")
+ assert_roundtrip(tz)
+
+
+def test_mktz_London():
+ tz = mktz("Europe/London")
+ assert_roundtrip(tz)
+
+
+def test_datetime_roundtrip_lon_no_tz():
+ pdt = datetime.datetime(2012, 6, 12, 12, 12, 12, 123000)
+ pdt2 = ms_to_datetime(datetime_to_ms(pdt))
+ assert pdt2 == pdt
+
+ pdt = datetime.datetime(2012, 1, 12, 12, 12, 12, 123000)
+ pdt2 = ms_to_datetime(datetime_to_ms(pdt))
+ assert pdt2 == pdt
+
+
+def test_datetime_roundtrip_lon_tz():
+ pdt = datetime.datetime(2012, 6, 12, 12, 12, 12, 123000, tzinfo=mktz('Europe/London'))
+ pdt2 = ms_to_datetime(datetime_to_ms(pdt))
+ assert pdt2 == pdt.replace(tzinfo=None)
+
+ pdt = datetime.datetime(2012, 1, 12, 12, 12, 12, 123000, tzinfo=mktz('Europe/London'))
+ pdt2 = ms_to_datetime(datetime_to_ms(pdt))
+ assert pdt2 == pdt.replace(tzinfo=None)
+
+
+def test_datetime_roundtrip_est_tz():
+ pdt = datetime.datetime(2012, 6, 12, 12, 12, 12, 123000, tzinfo=mktz('EST'))
+ pdt2 = ms_to_datetime(datetime_to_ms(pdt))
+ assert pdt2.replace(tzinfo=mktz('Europe/London')) == pdt
+
+ pdt = datetime.datetime(2012, 1, 12, 12, 12, 12, 123000, tzinfo=mktz('EST'))
+ pdt2 = ms_to_datetime(datetime_to_ms(pdt))
+ assert pdt2.replace(tzinfo=mktz('Europe/London')) == pdt
diff --git a/tests/unit/date/test_mktz.py b/tests/unit/date/test_mktz.py
new file mode 100644
index 000000000..fc53918c2
--- /dev/null
+++ b/tests/unit/date/test_mktz.py
@@ -0,0 +1,33 @@
+from datetime import datetime as dt
+from mock import patch
+from pytest import raises
+
+from arctic.date import mktz, TimezoneError
+
+
+def test_mktz():
+ tz = mktz()
+ d = dt(2012, 2, 2, tzinfo=tz)
+ assert d.tzname() == 'GMT'
+ d = dt(2012, 7, 2, tzinfo=tz)
+ assert d.tzname() == 'BST'
+
+ tz = mktz('UTC')
+ d = dt(2012, 2, 2, tzinfo=tz)
+ assert d.tzname() == 'UTC'
+ d = dt(2012, 7, 2, tzinfo=tz)
+ assert d.tzname() == 'UTC' # --------replace_empty_timezones_with_default -----------------
+
+
+def test_mktz_zone():
+ tz = mktz('UTC')
+ assert tz.zone == "UTC"
+ tz = mktz('/usr/share/zoneinfo/UTC')
+ assert tz.zone == "UTC"
+
+
+def test_mktz_fails_if_invalid_timezone():
+ with patch('os.path.exists') as file_exists:
+ file_exists.return_value = False
+ with raises(TimezoneError):
+ mktz('junk')
diff --git a/tests/unit/date/test_util.py b/tests/unit/date/test_util.py
new file mode 100644
index 000000000..236fb201b
--- /dev/null
+++ b/tests/unit/date/test_util.py
@@ -0,0 +1,54 @@
+import pytest
+import pytz
+
+from datetime import datetime as dt
+from arctic.date import datetime_to_ms, ms_to_datetime, mktz, to_pandas_closed_closed, DateRange, OPEN_OPEN, CLOSED_CLOSED
+
+
+@pytest.mark.parametrize('pdt', [
+ dt(2007, 3, 25, 1, tzinfo=mktz('Europe/London')),
+ dt(2004, 10, 31, 23, 3, tzinfo=mktz('Europe/London')),
+ dt(1990, 4, 5, 0, 0, tzinfo=mktz('Europe/London')),
+ dt(2007, 3, 25, 1, tzinfo=mktz('EST')),
+ dt(2004, 10, 31, 23, 3, tzinfo=mktz('EST')),
+ dt(1990, 4, 5, 0, 0, tzinfo=mktz('EST')),
+ ]
+)
+def test_datetime_to_ms_and_back(pdt):
+ i = datetime_to_ms(pdt)
+ pdt = pdt.astimezone(mktz())
+ pdt = pdt.replace(tzinfo=None)
+ pdt2 = ms_to_datetime(i)
+ assert pdt == pdt2
+
+
+def test_datetime_to_ms_and_back_microseconds():
+ pdt = dt(2012, 8, 1, 12, 34, 56, 999999, tzinfo=mktz('Europe/London'))
+ i = datetime_to_ms(pdt)
+ pdt = pdt.replace(tzinfo=None)
+ pdt2 = ms_to_datetime(i)
+
+ assert pdt != pdt2
+ assert pdt.year == pdt2.year
+ assert pdt.month == pdt2.month
+ assert pdt.day == pdt2.day
+ assert pdt.hour == pdt2.hour
+ assert pdt.minute == pdt2.minute
+ assert pdt.second == pdt2.second
+ # Microsecond precision loss inevitable.
+ assert pdt.microsecond // 1000 == pdt2.microsecond // 1000
+ assert pdt.tzinfo is None
+
+
+def test_daterange_closedclosed_None():
+ assert to_pandas_closed_closed(None) is None
+
+
+def test_daterange_closedclosed():
+ date_range = DateRange(dt(2013, 1, 1, tzinfo=mktz('Europe/London')),
+ dt(2014, 2, 1, tzinfo=mktz('Europe/London')), OPEN_OPEN)
+ expected = DateRange(dt(2013, 1, 1, 0, 0, 0, 1000, tzinfo=mktz('Europe/London')),
+ dt(2014, 1, 31, 23, 59, 59, 999000, tzinfo=mktz('Europe/London')),
+ CLOSED_CLOSED)
+ act = to_pandas_closed_closed(date_range)
+ assert act == expected
diff --git a/tests/unit/scripts/__init__.py b/tests/unit/scripts/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/unit/scripts/test_arctic_fsck.py b/tests/unit/scripts/test_arctic_fsck.py
new file mode 100644
index 000000000..5f45569a0
--- /dev/null
+++ b/tests/unit/scripts/test_arctic_fsck.py
@@ -0,0 +1,36 @@
+from mock import patch, sentinel, call
+
+from arctic.scripts.arctic_fsck import main
+
+from ...util import run_as_main
+
+
+def test_main():
+ with patch('arctic.scripts.arctic_fsck.Arctic') as Arctic, \
+ patch('arctic.scripts.arctic_fsck.get_mongodb_uri') as get_mongodb_uri, \
+ patch('arctic.scripts.arctic_fsck.do_db_auth') as do_db_auth:
+ run_as_main(main, '--host', '%s:%s' % (sentinel.host, sentinel.port),
+ '-v', '--library', 'sentinel.library', 'lib2', '-f')
+ get_mongodb_uri.assert_called_once_with('sentinel.host:sentinel.port')
+ Arctic.assert_called_once_with(get_mongodb_uri.return_value)
+ assert do_db_auth.call_args_list == [call('%s:%s' % (sentinel.host, sentinel.port),
+ Arctic.return_value._conn,
+ 'arctic_sentinel'),
+ call('%s:%s' % (sentinel.host, sentinel.port),
+ Arctic.return_value._conn,
+ 'arctic')]
+ assert Arctic.return_value.__getitem__.return_value._fsck.call_args_list == [call(False),
+ call(False), ]
+
+
+def test_main_dry_run():
+ with patch('arctic.scripts.arctic_fsck.Arctic') as Arctic, \
+ patch('arctic.scripts.arctic_fsck.get_mongodb_uri') as get_mongodb_uri, \
+ patch('arctic.scripts.arctic_fsck.do_db_auth') as do_db_auth:
+ run_as_main(main, '--host', '%s:%s' % (sentinel.host, sentinel.port),
+ '-v', '--library', 'sentinel.library', 'sentinel.lib2')
+ get_mongodb_uri.assert_called_once_with('sentinel.host:sentinel.port')
+ Arctic.assert_called_once_with(get_mongodb_uri.return_value)
+ assert do_db_auth.call_count == 0
+ assert Arctic.return_value.__getitem__.return_value._fsck.call_args_list == [call(True),
+ call(True), ]
diff --git a/tests/unit/scripts/test_initialize_library.py b/tests/unit/scripts/test_initialize_library.py
new file mode 100644
index 000000000..76866efc7
--- /dev/null
+++ b/tests/unit/scripts/test_initialize_library.py
@@ -0,0 +1,87 @@
+from mock import patch
+import pytest
+
+from arctic.scripts import arctic_init_library as mil
+
+from ...util import run_as_main
+
+
+def test_init_library():
+ # Create the user agains the current mongo database
+ with patch('pymongo.MongoClient') as MongoClient, \
+ patch('arctic.scripts.arctic_init_library.logger', autospec=True) as logger, \
+ patch('arctic.scripts.arctic_init_library.Arctic', autospec=True) as Arctic, \
+ patch('arctic.scripts.arctic_init_library.get_mongodb_uri', autospec=True) as get_mongodb_uri, \
+ patch('arctic.scripts.arctic_init_library.do_db_auth', autospec=True) as do_db_auth:
+ run_as_main(mil.main, '--host', 'hostname', '--library', 'arctic_user.library', '--type', 'VersionStore')
+
+ get_mongodb_uri.assert_called_once_with('hostname')
+ MongoClient.assert_called_once_with(get_mongodb_uri.return_value)
+ do_db_auth.assert_called_once_with('hostname', MongoClient.return_value, 'arctic_user')
+ Arctic.assert_called_once_with(MongoClient.return_value)
+ Arctic.return_value.initialize_library.assert_called_once_with('arctic_user.library', 'VersionStore', hashed=False)
+ assert logger.warn.call_count == 0
+
+
+def test_init_library_no_admin():
+ # Create the user agains the current mongo database
+ with patch('pymongo.MongoClient') as MongoClient, \
+ patch('arctic.scripts.arctic_init_library.logger', autospec=True), \
+ patch('arctic.scripts.arctic_init_library.Arctic', autospec=True) as Arctic, \
+ patch('arctic.scripts.arctic_init_library.get_mongodb_uri', autospec=True) as get_mongodb_uri, \
+ patch('arctic.scripts.arctic_init_library.do_db_auth', autospec=True) as do_db_auth:
+ run_as_main(mil.main, '--host', 'hostname', '--library', 'arctic_user.library', '--type', 'VersionStore')
+
+ get_mongodb_uri.assert_called_once_with('hostname')
+ MongoClient.assert_called_once_with(get_mongodb_uri.return_value)
+ Arctic.assert_called_once_with(MongoClient.return_value)
+ Arctic.return_value.initialize_library.assert_called_once_with('arctic_user.library', 'VersionStore', hashed=False)
+
+
+def test_init_library_hashed():
+ # Create the user agains the current mongo database
+ with patch('pymongo.MongoClient') as MongoClient, \
+ patch('arctic.scripts.arctic_init_library.logger', autospec=True) as logger, \
+ patch('arctic.scripts.arctic_init_library.Arctic', autospec=True) as Arctic, \
+ patch('arctic.scripts.arctic_init_library.get_mongodb_uri', autospec=True) as get_mongodb_uri, \
+ patch('arctic.scripts.arctic_init_library.do_db_auth', autospec=True) as do_db_auth:
+ run_as_main(mil.main, '--host', 'hostname', '--library', 'arctic_user.library', '--type', 'VersionStore', '--hashed')
+
+ get_mongodb_uri.assert_called_once_with('hostname')
+ MongoClient.assert_called_once_with(get_mongodb_uri.return_value)
+ do_db_auth.assert_called_once_with('hostname', MongoClient.return_value, 'arctic_user')
+ Arctic.assert_called_once_with(MongoClient.return_value)
+ Arctic.return_value.initialize_library.assert_called_once_with('arctic_user.library', 'VersionStore', hashed=True)
+ assert logger.warn.call_count == 0
+
+
+def test_init_library_no_admin_no_user_creds():
+ with patch('pymongo.MongoClient') as MongoClient, \
+ patch('arctic.scripts.arctic_init_library.logger', autospec=True) as logger, \
+ patch('arctic.scripts.arctic_init_library.Arctic', autospec=True) as Arctic, \
+ patch('arctic.scripts.arctic_init_library.get_mongodb_uri', autospec=True) as get_mongodb_uri, \
+ patch('arctic.scripts.arctic_init_library.do_db_auth', autospec=True, return_value=False) as do_db_auth:
+
+ MongoClient.return_value['arctic_user'].authenticate.return_value = False
+ run_as_main(mil.main, '--host', 'hostname', '--library', 'arctic_user.library', '--type', 'VersionStore')
+
+ get_mongodb_uri.assert_called_once_with('hostname')
+ MongoClient.assert_called_once_with(get_mongodb_uri.return_value)
+ assert Arctic.call_count == 0
+
+
+def test_bad_library_name():
+ with pytest.raises(Exception):
+ with patch('argparse.ArgumentParser.error', side_effect=Exception) as error:
+ run_as_main(mil.main, '--library', 'user.library')
+ error.assert_called_once_with('Must specify the full path of the library e.g. arctic_jblackburn.library!')
+
+ with pytest.raises(Exception):
+ with patch('argparse.ArgumentParser.error', side_effect=Exception) as error:
+ run_as_main(mil.main, '--library', 'arctic_jblackburn')
+ error.assert_called_once_with('Must specify the full path of the library e.g. arctic_jblackburn.library!')
+
+ with pytest.raises(Exception):
+ with patch('argparse.ArgumentParser.error', side_effect=Exception) as error:
+ run_as_main(mil.main)
+ error.assert_called_once_with('Must specify the full path of the library e.g. arctic_jblackburn.library!')
diff --git a/tests/unit/scripts/test_utils.py b/tests/unit/scripts/test_utils.py
new file mode 100644
index 000000000..d377d117d
--- /dev/null
+++ b/tests/unit/scripts/test_utils.py
@@ -0,0 +1,105 @@
+from mock import patch, Mock, call, sentinel, MagicMock
+import pytest
+
+from arctic.scripts import arctic_init_library as mil
+from arctic.scripts.utils import do_db_auth
+from ...util import run_as_main
+
+
+def test_do_db_auth():
+ # Create the user agains the current mongo database
+ admin_creds = Mock()
+ user_creds = Mock()
+ connection = MagicMock()
+ with patch('arctic.scripts.utils.logger', autospec=True) as logger, \
+ patch('arctic.scripts.utils.get_auth', autospec=True, side_effect=[admin_creds, user_creds]) as get_auth:
+ assert do_db_auth('hostname', connection, 'arctic_user')
+
+ assert get_auth.call_args_list == [call('hostname', 'admin', 'admin'),
+ call('hostname', 'arctic', 'arctic_user')]
+ connection.admin.authenticate.assert_called_once_with(admin_creds.user,
+ admin_creds.password)
+ # Must also ensure that we auth against the user's db too ; the user
+ # may well have read-only access to the admin database, but not to their user_db!
+ connection.__getitem__.assert_called_once_with('arctic_user')
+ connection.__getitem__.return_value.authenticate.assert_called_once_with(user_creds.user, user_creds.password)
+ assert logger.error.call_count == 0
+
+
+def test_do_db_auth_no_admin():
+ user_creds = Mock()
+ connection = MagicMock()
+ # Create the user agains the current mongo database
+ with patch('arctic.scripts.utils.logger', autospec=True) as logger, \
+ patch('arctic.scripts.utils.get_auth', side_effect=[None, user_creds],
+ autospec=True) as get_auth:
+
+ connection.admin.authenticate.return_value = False
+ assert do_db_auth('hostname', connection, 'arctic_user')
+
+ assert logger.call_count == 0
+ assert get_auth.call_args_list == [call('hostname', 'admin', 'admin'),
+ call('hostname', 'arctic', 'arctic_user')]
+ connection['arctic_user'].authenticate.assert_called_once_with(user_creds.user, user_creds.password)
+
+
+def test_do_db_auth_no_user_creds():
+ user_creds = Mock()
+ connection = MagicMock()
+ with patch('arctic.scripts.utils.logger', autospec=True) as logger, \
+ patch('arctic.scripts.utils.get_auth', side_effect=[None, user_creds],
+ autospec=True) as get_auth:
+ connection['arctic_user'].authenticate.return_value = False
+ assert not do_db_auth('hostname', connection, 'arctic_user')
+
+ assert get_auth.call_args_list == [call('hostname', 'admin', 'admin'),
+ call('hostname', 'arctic', 'arctic_user')]
+ logger.error.assert_called_once_with("Failed to authenticate to db 'arctic_user' on 'hostname',"
+ " using user credentials")
+
+
+def test_do_db_auth_no_admin_user_creds_fails():
+ connection = MagicMock()
+ with patch('arctic.scripts.utils.logger', autospec=True) as logger, \
+ patch('arctic.scripts.utils.get_auth', side_effect=[None, None],
+ autospec=True) as get_auth:
+ connection.admin.authenticate.return_value = False
+ assert not do_db_auth('hostname', connection, 'arctic_user')
+
+ assert get_auth.call_args_list == [call('hostname', 'admin', 'admin'),
+ call('hostname', 'arctic', 'arctic_user')]
+ logger.error.assert_called_once_with("You need credentials for db 'arctic_user' on 'hostname',"
+ " or admin credentials")
+
+
+def test_do_db_auth_admin_user_creds_fails():
+ connection = MagicMock()
+ with patch('arctic.scripts.utils.logger', autospec=True) as logger, \
+ patch('arctic.scripts.utils.get_auth', side_effect=[Mock(), None],
+ autospec=True) as get_auth:
+ connection.admin.authenticate.return_value = False
+ assert not do_db_auth('hostname', connection, 'arctic_user')
+
+ assert get_auth.call_args_list == [call('hostname', 'admin', 'admin'),
+ call('hostname', 'arctic', 'arctic_user')]
+ logger.error.assert_called_once_with("Failed to authenticate to '%s' as Admin. Giving up." % ('hostname'))
+
+
+def test_do_db_auth_role():
+ # Create the user agains the current mongo database
+ admin_creds = Mock()
+ user_creds = Mock()
+ connection = MagicMock()
+ with patch('arctic.scripts.utils.logger', autospec=True) as logger, \
+ patch('arctic.scripts.utils.get_auth', autospec=True, side_effect=[admin_creds, user_creds]) as get_auth:
+ assert do_db_auth('hostname', connection, 'arctic_user')
+
+ assert get_auth.call_args_list == [call('hostname', 'admin', 'admin'),
+ call('hostname', 'arctic', 'arctic_user')]
+ connection.admin.authenticate.assert_called_once_with(admin_creds.user,
+ admin_creds.password)
+ # Must also ensure that we auth against the user's db too ; the user
+ # may well have read-only access to the admin database, but not to their user_db!
+ connection.__getitem__.assert_called_once_with('arctic_user')
+ connection.__getitem__.return_value.authenticate.assert_called_once_with(user_creds.user, user_creds.password)
+ assert logger.error.call_count == 0
diff --git a/tests/unit/store/__init__.py b/tests/unit/store/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/unit/store/test_ndarray_store.py b/tests/unit/store/test_ndarray_store.py
new file mode 100644
index 000000000..89636b75a
--- /dev/null
+++ b/tests/unit/store/test_ndarray_store.py
@@ -0,0 +1,66 @@
+import numpy as np
+from pytest import raises
+from arctic.store._ndarray_store import NdarrayStore, _promote_struct_dtypes
+
+
+def test_dtype_parsing():
+ store = NdarrayStore()
+ dtypes = []
+
+ dtypes.append(np.dtype(np.object_))
+ dtypes.append(np.dtype(np.float128))
+ dtypes.append(np.dtype('int64'))
+ dtypes.append(np.dtype([('A', 'int64')]))
+ dtypes.append(np.dtype([('A', 'int64'), ('B', '1 dimensional arrays, saving as Blob')
+ store.to_records.assert_called_once_with(sentinel.df)
+
+
+def test_can_convert_to_records_without_objects_returns_true_otherwise():
+ store = PandasStore()
+ store.to_records = Mock(return_value=np.rec.array([(1356998400000000000L, 'a')],
+ dtype=[('index', ',version=1.0,metadata={'metadata': 'foo'}"
+ assert str(item) == expected
+ assert repr(item) == expected
+
+
+def test_versioned_item_str_handles_none():
+ item = VersionedItem(symbol=None,
+ library=None,
+ data=None,
+ version=None,
+ metadata=None)
+
+ assert str(item)
diff --git a/tests/unit/store/test_version_store.py b/tests/unit/store/test_version_store.py
new file mode 100644
index 000000000..1caa22da2
--- /dev/null
+++ b/tests/unit/store/test_version_store.py
@@ -0,0 +1,175 @@
+import bson
+import datetime
+from datetime import datetime as dt, timedelta as dtd
+from mock import patch, MagicMock, sentinel, create_autospec, Mock, call, ANY
+import pytest
+
+import pymongo
+from pymongo import ReadPreference
+
+from arctic.date import mktz
+from arctic.store import version_store
+from arctic.store.version_store import VersionStore, VersionedItem
+from arctic.arctic import ArcticLibraryBinding, Arctic
+from arctic.exceptions import ConcurrentModificationException
+from pymongo.errors import OperationFailure
+from pymongo.collection import Collection
+
+
+def test_delete_version_version_not_found():
+ with patch('arctic.store.version_store.VersionStore.__init__', return_value=None, autospec=True):
+ with patch('arctic.store.version_store.logger') as logger:
+ vs = version_store.VersionStore(sentinel.connection)
+ vs._versions = MagicMock()
+ with patch.object(vs._versions, 'find_one', return_value=None, autospec=True):
+ vs._delete_version(sentinel.symbol, sentinel.version)
+ logger.error.assert_called_once_with("Can't delete sentinel.symbol:sentinel.version as not found in DB")
+
+
+def test_list_versions_LondonTime():
+ # Object ID's are stored in UTC. We need to ensure that the returned times
+ # for versions are in the local London TimeZone
+ vs = create_autospec(VersionStore, instance=True,
+ _versions=Mock())
+ vs._find_snapshots.return_value = 'snap'
+ vs._versions.find.return_value = [{'_id': bson.ObjectId.from_datetime(dt(2013, 4, 1, 9, 0)),
+ 'symbol': 's', 'version': 10}]
+
+ version = list(VersionStore.list_versions(vs, "symbol"))[0]
+ assert version == {'symbol': version['symbol'], 'version': version['version'],
+ # We return naive datetimes in 'default' time, which is London for us
+ 'date': dt(2013, 4, 1, 10, 0),
+ 'snapshots': 'snap'}
+
+
+def test_read_as_of_LondonTime():
+ # When we do a read, with naive as_of, that as_of is treated in London Time.
+ vs = create_autospec(VersionStore, instance=True,
+ _versions=Mock(), _allow_secondary=False)
+ VersionStore._read_metadata(vs, 'symbol', dt(2013, 4, 1, 9, 0))
+ versions = vs._versions.with_options.return_value
+ versions.find_one.assert_called_once_with({'symbol':'symbol', '_id':
+ {'$lt': bson.ObjectId.from_datetime(dt(2013, 4, 1, 9, 0, tzinfo=mktz()) + dtd(seconds=1))}},
+ sort=[('_id', pymongo.DESCENDING)])
+
+
+def test_read_as_of_NotNaive():
+ # When we do a read, with naive as_of, that as_of is treated in London Time.
+ vs = create_autospec(VersionStore, instance=True,
+ _versions=Mock(), _allow_secondary=False)
+ VersionStore._read_metadata(vs, 'symbol', dt(2013, 4, 1, 9, 0, tzinfo=mktz('Europe/Paris')))
+ versions = vs._versions.with_options.return_value
+ versions.find_one.assert_called_once_with({'symbol':'symbol', '_id':
+ {'$lt': bson.ObjectId.from_datetime(dt(2013, 4, 1, 9, 0, tzinfo=mktz('Europe/Paris')) + dtd(seconds=1))}},
+ sort=[('_id', pymongo.DESCENDING)])
+
+
+def test_read_metadata_no_asof():
+ # When we do a read, with naive as_of, that as_of is treated in London Time.
+ vs = create_autospec(VersionStore, instance=True,
+ _versions=Mock(), _allow_secondary=False)
+ VersionStore._read_metadata(vs, sentinel.symbol)
+ versions = vs._versions.with_options.return_value
+ assert versions.find_one.call_args_list == [call({'symbol': sentinel.symbol},
+ sort=[('version', pymongo.DESCENDING)])]
+
+
+def test_write_ensure_index():
+ write_handler = Mock(write=Mock(__name__=""))
+ vs = create_autospec(VersionStore, instance=True,
+ _collection=Mock(),
+ _version_nums=Mock(find_one_and_update=Mock(return_value={'version':1})),
+ _versions=Mock(insert_one=lambda x:None),
+ _arctic_lib=Mock(),
+ _publish_changes=False)
+ vs._collection.database.connection.nodes = []
+ vs._write_handler.return_value = write_handler
+ VersionStore.write(vs, 'sym', sentinel.data, prune_previous_version=False)
+ vs._ensure_index.assert_called_once_with()
+
+
+def test_write_check_quota():
+ write_handler = Mock(write=Mock(__name__=""))
+ vs = create_autospec(VersionStore, instance=True,
+ _collection=Mock(),
+ _version_nums=Mock(find_one_and_update=Mock(return_value={'version':1})),
+ _versions=Mock(insert_one=lambda x:None),
+ _arctic_lib=create_autospec(ArcticLibraryBinding),
+ _publish_changes=False)
+ vs._collection.database.connection.nodes = []
+ vs._write_handler.return_value = write_handler
+ VersionStore.write(vs, 'sym', sentinel.data, prune_previous_version=False)
+ assert vs._arctic_lib.check_quota.call_count == 1
+
+
+def test_initialize_library():
+ arctic_lib = create_autospec(ArcticLibraryBinding)
+ arctic_lib.arctic = create_autospec(Arctic, _allow_secondary=False)
+ with patch('arctic.store.version_store.enable_powerof2sizes', autospec=True) as enable_powerof2sizes, \
+ patch('arctic.store.version_store.enable_sharding', autospec=True) as enable_sharding:
+ arctic_lib.get_top_level_collection.return_value.database.create_collection.__name__ = 'some_name'
+ arctic_lib.get_top_level_collection.return_value.database.collection_names.__name__ = 'some_name'
+ VersionStore.initialize_library(arctic_lib, hashed=sentinel.hashed)
+ assert enable_powerof2sizes.call_args_list == [call(arctic_lib.arctic, arctic_lib.get_name())]
+ assert enable_sharding.call_args_list == [call(arctic_lib.arctic, arctic_lib.get_name(), hashed=sentinel.hashed)]
+
+
+def test_ensure_index():
+ th = Mock()
+ vs = create_autospec(VersionStore, _collection=Mock())
+ with patch('arctic.store.version_store._TYPE_HANDLERS', [th]):
+ VersionStore._ensure_index(vs)
+ assert vs._collection.snapshots.create_index.call_args_list == [call([('name', 1)], unique=True, background=True)]
+ assert vs._collection.versions.create_index.call_args_list == [call([('symbol', 1), ('_id', -1)], background=True),
+ call([('symbol', 1), ('version', -1)], unique=True, background=True)]
+ assert vs._collection.version_nums.create_index.call_args_list == [call('symbol', unique=True, background=True)]
+ th._ensure_index.assert_called_once_with(vs._collection)
+
+
+def test_prune_previous_versions_0_timeout():
+ self = create_autospec(VersionStore, _versions=Mock())
+ self.name = sentinel.name
+ self._versions = create_autospec(Collection)
+ self._versions.with_options.return_value.find.__name__ = 'find'
+ self._versions.with_options.return_value.find.return_value = []
+ with patch('arctic.store.version_store.dt') as dt:
+ dt.utcnow.return_value = datetime.datetime(2013, 10, 1)
+ VersionStore._prune_previous_versions(self, sentinel.symbol, keep_mins=0)
+ assert self._versions.with_options.call_args_list == [call(read_preference=ReadPreference.PRIMARY)]
+ assert self._versions.with_options.return_value.find.call_args_list == [
+ call({'$or': [{'parent': {'$exists': False}},
+ {'parent': {'$size': 0}}],
+ 'symbol': sentinel.symbol,
+ '_id': {'$lt': bson.ObjectId('524a10810000000000000000')}},
+ sort=[('version', -1)],
+ skip=1,
+ projection=['_id', 'type'])]
+
+
+def test_read_handles_operation_failure():
+ self = create_autospec(VersionStore, _versions=Mock(), _arctic_lib=Mock(),
+ _allow_secondary=True)
+ self._collection = create_autospec(Collection)
+ self._read_metadata.side_effect = [sentinel.meta1, sentinel.meta2]
+ self._read_metadata.__name__ = 'name'
+ self._do_read.__name__ = 'name' # feh: mongo_retry decorator cares about this
+ self._do_read.side_effect = [OperationFailure('error'), sentinel.read]
+ VersionStore.read(self, sentinel.symbol, sentinel.as_of, sentinel.from_version)
+ # Assert that, for the two read calls, the second uses the new metadata
+ assert self._do_read.call_args_list == [call(sentinel.symbol, sentinel.meta1, sentinel.from_version,
+ read_preference=ReadPreference.NEAREST)]
+ assert self._do_read_retry.call_args_list == [call(sentinel.symbol, sentinel.meta2, sentinel.from_version,
+ read_preference=ReadPreference.PRIMARY)]
+
+
+def test_read_reports_random_errors():
+ self = create_autospec(VersionStore, _versions=Mock(), _arctic_lib=Mock(),
+ _allow_secondary=True)
+ self._collection = create_autospec(Collection)
+ self._do_read.__name__ = 'name' # feh: mongo_retry decorator cares about this
+ self._do_read.side_effect = Exception('bad')
+ with pytest.raises(Exception) as e:
+ with patch('arctic.store.version_store.log_exception') as le:
+ VersionStore.read(self, sentinel.symbol, sentinel.as_of, sentinel.from_version)
+ assert 'bad' in str(e)
+ assert le.call_count == 1
diff --git a/tests/unit/store/test_version_store_audit.py b/tests/unit/store/test_version_store_audit.py
new file mode 100644
index 000000000..34977446f
--- /dev/null
+++ b/tests/unit/store/test_version_store_audit.py
@@ -0,0 +1,186 @@
+from mock import create_autospec, Mock, sentinel, ANY, call
+from pymongo.errors import OperationFailure
+import pytest
+import pandas as pd
+
+from arctic.store.audit import ArcticTransaction
+from arctic.store.version_store import VersionedItem, VersionStore
+from arctic.exceptions import ConcurrentModificationException, NoDataFoundException
+
+
+def test_ConcurrentWriteBlock_simple():
+ vs = create_autospec(VersionStore, _collection=Mock())
+ ts1 = pd.DataFrame(index=[1, 2], data={'a':[1.0, 2.0]})
+ vs.read.return_value = VersionedItem(symbol=sentinel.symbol, library=sentinel.library, version=1, metadata=None, data=ts1)
+ vs.write.return_value = VersionedItem(symbol=sentinel.symbol, library=sentinel.library, version=2,
+ metadata=None, data=None)
+ vs.list_versions.return_value = [{'version': 2}, {'version': 1}]
+
+ with ArcticTransaction(vs, sentinel.symbol, sentinel.user, sentinel.log) as cwb:
+ cwb.write(sentinel.symbol, pd.DataFrame(index=[3, 4], data={'a': [1.0, 2.0]}), metadata=sentinel.meta)
+
+ assert not vs._delete_version.called
+ vs.write.assert_called_once_with(sentinel.symbol, ANY, prune_previous_version=True, metadata=sentinel.meta)
+ vs.list_versions.assert_called_once_with(sentinel.symbol)
+
+
+def test_ConcurrentWriteBlock_writes_if_metadata_changed():
+ vs = create_autospec(VersionStore, _collection=Mock())
+ ts1 = pd.DataFrame(index=[1, 2], data={'a':[1.0, 2.0]})
+ vs.read.return_value = VersionedItem(symbol=sentinel.symbol, library=sentinel.library, version=1, metadata=None, data=ts1)
+ vs.write.return_value = VersionedItem(symbol=sentinel.symbol, library=sentinel.library, version=2, metadata=None, data=None)
+ vs.list_versions.return_value = [{'version': 2},
+ {'version': 1}]
+
+ with ArcticTransaction(vs, sentinel.symbol, sentinel.user, sentinel.log) as cwb:
+ assert cwb._do_write is False
+ cwb.write(sentinel.symbol, ts1, metadata={1: 2})
+ assert cwb._do_write is True
+
+ assert not vs._delete_version.called
+ vs.write.assert_called_once_with(sentinel.symbol, ANY, prune_previous_version=True, metadata={1: 2})
+ vs.list_versions.assert_called_once_with(sentinel.symbol)
+
+ # Won't write on exit with same data and metadata
+ vs.read.return_value = VersionedItem(symbol=sentinel.symbol, library=sentinel.library, version=2, metadata={1: 2}, data=ts1)
+ with ArcticTransaction(vs, sentinel.symbol, sentinel.user, sentinel.log) as cwb:
+ assert cwb._do_write is False
+ cwb.write(sentinel.symbol, ts1, metadata={1: 2})
+ assert cwb._do_write is False
+
+
+def test_ConcurrentWriteBlock_writes_if_base_data_corrupted():
+
+ vs = create_autospec(VersionStore, _collection=Mock())
+ ts1 = pd.DataFrame(index=[1, 2], data={'a':[1.0, 2.0]})
+ vs.read.side_effect = OperationFailure('some failure')
+ vs.write.return_value = VersionedItem(symbol=sentinel.symbol, library=sentinel.library, version=2,
+ metadata=None, data=None)
+ vs.read_metadata.return_value = VersionedItem(symbol=sentinel.symbol, library=sentinel.library, version=1,
+ metadata=None, data=None)
+ vs.list_versions.return_value = [{'version': 2}, {'version': 1}]
+
+ with ArcticTransaction(vs, sentinel.symbol, sentinel.user, sentinel.log) as cwb:
+ cwb.write(sentinel.symbol, ts1, metadata={1: 2})
+
+ vs.write.assert_called_once_with(sentinel.symbol, ANY, prune_previous_version=True, metadata={1: 2})
+ assert vs.list_versions.call_args_list == [call(sentinel.symbol)]
+
+
+def test_ConcurrentWriteBlock_writes_no_data_found():
+ vs = create_autospec(VersionStore, _collection=Mock())
+ ts1 = pd.DataFrame(index=[1, 2], data={'a':[1.0, 2.0]})
+ vs.read.side_effect = NoDataFoundException('no data')
+ vs.write.return_value = VersionedItem(symbol=sentinel.symbol, library=sentinel.library, version=1,
+ metadata=None, data=None)
+ vs.list_versions.side_effect = [[],
+ [{'version': 1}],
+ ]
+
+ with ArcticTransaction(vs, sentinel.symbol, sentinel.user, sentinel.log) as cwb:
+ cwb.write(sentinel.symbol, ts1, metadata={1: 2})
+
+ assert vs.write.call_args_list == [call(sentinel.symbol, ANY, prune_previous_version=True, metadata={1: 2})]
+ assert vs.list_versions.call_args_list == [call(sentinel.symbol, latest_only=True),
+ call(sentinel.symbol)]
+
+
+def test_ConcurrentWriteBlock_writes_no_data_found_deleted():
+ vs = create_autospec(VersionStore, _collection=Mock())
+ ts1 = pd.DataFrame(index=[1, 2], data={'a':[1.0, 2.0]})
+ vs.read.side_effect = NoDataFoundException('no data')
+ vs.write.return_value = VersionedItem(symbol=sentinel.symbol, library=sentinel.library, version=3,
+ metadata=None, data=None)
+ vs.list_versions.side_effect = [[{'version': 2}, {'version': 1}],
+ [{'version': 3}, {'version': 2}],
+ ]
+
+ with ArcticTransaction(vs, sentinel.symbol, sentinel.user, sentinel.log) as cwb:
+ cwb.write(sentinel.symbol, ts1, metadata={1: 2})
+
+ assert vs.write.call_args_list == [call(sentinel.symbol, ANY, prune_previous_version=True, metadata={1: 2})]
+ assert vs.list_versions.call_args_list == [call(sentinel.symbol, latest_only=True),
+ call(sentinel.symbol)]
+
+
+def test_ConcurrentWriteBlock_does_nothing_when_data_not_modified():
+ vs = create_autospec(VersionStore, _collection=Mock())
+ ts1 = pd.DataFrame(index=[1, 2], data={'a':[1.0, 2.0]})
+ vs.read.return_value = VersionedItem(symbol=sentinel.symbol, library=sentinel.library, version=1, metadata=None, data=ts1)
+ vs.write.return_value = VersionedItem(symbol=sentinel.symbol, library=sentinel.library, version=2, metadata=None, data=None)
+ vs.list_versions.side_effect = [{'version': 2}, {'version': 1}]
+
+ with ArcticTransaction(vs, sentinel.symbol, sentinel.user, sentinel.log) as cwb:
+ cwb.write(sentinel.symbol, pd.DataFrame(index=[1, 2], data={'a':[1.0, 2.0]}))
+
+ assert not vs._delete_version.called
+ assert not vs.write.called
+
+
+def test_ConcurrentWriteBlock_does_nothing_when_data_is_None():
+ vs = create_autospec(VersionStore, _collection=Mock())
+ ts1 = pd.DataFrame(index=[1, 2], data={'a':[1.0, 2.0]})
+ vs.read.return_value = VersionedItem(symbol=sentinel.symbol, library=sentinel.library, version=1, metadata=None, data=ts1)
+ vs.write.return_value = VersionedItem(symbol=sentinel.symbol, library=sentinel.library, version=2,
+ metadata=None, data=None)
+ vs.list_versions.return_value = [{'version': 1}, {'version': 2}]
+
+ with ArcticTransaction(vs, sentinel.symbol, sentinel.user, sentinel.log) as cwb:
+ pass
+ assert not vs._delete_version.called
+ assert not vs.write.called
+
+
+def test_ConcurrentWriteBlock_guards_against_inconsistent_ts():
+ vs = create_autospec(VersionStore, _collection=Mock())
+ ts1 = pd.DataFrame(index=[1, 2], data={'a':[1.0, 2.0]})
+ vs.read.return_value = VersionedItem(symbol=sentinel.symbol, library=sentinel.library, version=1, metadata=None, data=ts1)
+ vs.write.return_value = VersionedItem(symbol=sentinel.symbol, library=sentinel.library, version=2, metadata=None, data=None)
+ vs.list_versions.side_effect = [{'version': 2}, {'version': 1}]
+
+ ts1 = pd.DataFrame(index=[1, 2], data={'a': [2.0, 3.0]})
+ with pytest.raises(ConcurrentModificationException):
+ with ArcticTransaction(vs, sentinel.symbol, sentinel.user, sentinel.log, modify_timeseries=ts1) as cwb:
+ pass
+
+
+def test_ConcurrentWriteBlock_detects_concurrent_writes():
+ vs = create_autospec(VersionStore, _collection=Mock())
+ ts1 = pd.DataFrame(index=[1, 2], data={'a':[1.0, 2.0]})
+ vs.read.return_value = VersionedItem(symbol=sentinel.symbol, library=sentinel.library, version=1, metadata=None, data=ts1)
+ vs.write.side_effect = [VersionedItem(symbol=sentinel.symbol, library=sentinel.library, version=2, metadata=None, data=None),
+ VersionedItem(symbol=sentinel.symbol, library=sentinel.library, version=3, metadata=None, data=None)]
+ #note that we return some extra version 5, it is possible that we have a write coming in after our own write that gets picked up
+ vs.list_versions.side_effect = [[{'version': 5}, {'version': 2}, {'version': 1}, ],
+ [{'version': 5}, {'version': 3}, {'version': 2}, {'version': 1}, ]]
+ from threading import Event, Thread
+ e1 = Event()
+ e2 = Event()
+
+ def losing_writer():
+ #will attempt to write version 2, should find that version 2 is there and it ends up writing version 3
+ with pytest.raises(ArcticTransaction):
+ with ArcticTransaction(vs, sentinel.symbol, sentinel.user, sentinel.log) as cwb:
+ cwb.write(sentinel.symbol, pd.DataFrame([1.0, 2.0], [3, 4]))
+ e1.wait()
+
+ def winning_writer():
+ #will attempt to write version 2 as well
+ with ArcticTransaction(vs, sentinel.symbol, sentinel.user, sentinel.log) as cwb:
+ cwb.write(sentinel.symbol, pd.DataFrame([1.0, 2.0], [5, 6]))
+ e2.wait()
+
+ t1 = Thread(target=losing_writer)
+ t2 = Thread(target=winning_writer)
+ t1.start()
+ t2.start()
+
+ # both read the same timeseries and are locked doing some 'work'
+ e2.set()
+ # t2 should now be able to finish
+ t2.join()
+ e1.set()
+ t1.join()
+
+ # we're expecting the losing_writer to undo its write once it realises that it wrote v3 instead of v2
+ vs._delete_version.assert_called_once_with(sentinel.symbol, 3)
diff --git a/tests/unit/store/test_version_store_utils.py b/tests/unit/store/test_version_store_utils.py
new file mode 100644
index 000000000..7834e56d2
--- /dev/null
+++ b/tests/unit/store/test_version_store_utils.py
@@ -0,0 +1,17 @@
+import pytest
+import numpy as np
+
+from arctic.store._version_store_utils import _split_arrs
+
+
+def test_split_arrs_empty():
+ split = _split_arrs(np.empty(0), [])
+ assert np.all(split == np.empty(0, dtype=np.object))
+
+
+def test_split_arrs():
+ to_split = np.ones(10)
+ split = _split_arrs(to_split, [3])
+ assert len(split) == 2
+ assert np.all(split[0] == np.ones(3))
+ assert np.all(split[1] == np.ones(7))
diff --git a/tests/unit/test_arctic.py b/tests/unit/test_arctic.py
new file mode 100644
index 000000000..8e8ca8425
--- /dev/null
+++ b/tests/unit/test_arctic.py
@@ -0,0 +1,333 @@
+import cPickle as pickle
+from mock import patch, MagicMock, sentinel, create_autospec, Mock, call
+import pytest
+from pymongo.errors import OperationFailure
+from pymongo.mongo_client import MongoClient
+
+from arctic.auth import Credential
+from arctic.arctic import Arctic, ArcticLibraryBinding, \
+ register_library_type, LIBRARY_TYPES
+from arctic.exceptions import LibraryNotFoundException, \
+ ArcticException, QuotaExceededException
+
+
+def test_arctic_lazy_init():
+ with patch('pymongo.MongoClient', return_value=MagicMock(), autospec=True) as mc, \
+ patch('arctic.arctic.mongo_retry', side_effect=lambda x:x, autospec=True), \
+ patch('arctic.arctic.get_auth', autospec=True) as ga:
+ store = Arctic('cluster')
+ assert not mc.called
+ # do something to trigger lazy arctic init
+ store.list_libraries()
+ assert mc.called
+
+
+def test_arctic_auth():
+ with patch('pymongo.MongoClient', return_value=MagicMock(), autospec=True), \
+ patch('arctic.arctic.mongo_retry', autospec=True), \
+ patch('arctic.arctic.get_auth', autospec=True) as ga:
+ ga.return_value = Credential('db', 'admin_user', 'admin_pass')
+ store = Arctic('cluster')
+ # do something to trigger lazy arctic init
+ store.list_libraries()
+ ga.assert_called_once_with('cluster', 'arctic', 'admin')
+ store._adminDB.authenticate.assert_called_once_with('admin_user', 'admin_pass')
+ ga.reset_mock()
+
+ # Get a 'missing' library
+ with pytest.raises(LibraryNotFoundException):
+ with patch('arctic.arctic.ArcticLibraryBinding.get_library_type', return_value=None, autospec=True):
+ ga.return_value = Credential('db', 'user', 'pass')
+ store._conn['arctic_jblackburn'].name = 'arctic_jblackburn'
+ store['jblackburn.library']
+
+ # Creating the library will have attempted to auth against it
+ ga.assert_called_once_with('cluster', 'arctic', 'arctic_jblackburn')
+ store._conn['arctic_jblackburn'].authenticate.assert_called_once_with('user', 'pass')
+
+
+def test_arctic_auth_custom_app_name():
+ with patch('pymongo.MongoClient', return_value=MagicMock(), autospec=True), \
+ patch('arctic.arctic.mongo_retry', autospec=True), \
+ patch('arctic.arctic.get_auth', autospec=True) as ga:
+ ga.return_value = Credential('db', 'admin_user', 'admin_pass')
+ store = Arctic('cluster', app_name=sentinel.app_name)
+ # do something to trigger lazy arctic init
+ store.list_libraries()
+ assert ga.call_args_list == [call('cluster', sentinel.app_name, 'admin')]
+ ga.reset_mock()
+
+ # Get a 'missing' library
+ with pytest.raises(LibraryNotFoundException):
+ with patch('arctic.arctic.ArcticLibraryBinding.get_library_type', return_value=None, autospec=True):
+ ga.return_value = Credential('db', 'user', 'pass')
+ store._conn['arctic_jblackburn'].name = 'arctic_jblackburn'
+ store['jblackburn.library']
+
+ # Creating the library will have attempted to auth against it
+ assert ga.call_args_list == [call('cluster', sentinel.app_name, 'arctic_jblackburn')]
+
+
+def test_arctic_connect_hostname():
+ with patch('pymongo.MongoClient', return_value=MagicMock(), autospec=True) as mc, \
+ patch('arctic.arctic.mongo_retry', autospec=True) as ar, \
+ patch('arctic.arctic.get_mongodb_uri', autospec=True) as gmu:
+ store = Arctic('hostname', socketTimeoutMS=sentinel.socket_timeout,
+ connectTimeoutMS=sentinel.connect_timeout,
+ serverSelectionTimeoutMS=sentinel.select_timeout)
+ # do something to trigger lazy arctic init
+ store.list_libraries()
+ ar(mc).assert_called_once_with(host=gmu('hostname'), maxPoolSize=4,
+ socketTimeoutMS=sentinel.socket_timeout,
+ connectTimeoutMS=sentinel.connect_timeout,
+ serverSelectionTimeoutMS=sentinel.select_timeout)
+
+
+def test_arctic_connect_with_environment_name():
+ with patch('pymongo.MongoClient', return_value=MagicMock(), autospec=True) as mc, \
+ patch('arctic.arctic.mongo_retry', autospec=True) as ar, \
+ patch('arctic.arctic.get_auth', autospec=True), \
+ patch('arctic.arctic.get_mongodb_uri') as gmfe:
+ store = Arctic('live', socketTimeoutMS=sentinel.socket_timeout,
+ connectTimeoutMS=sentinel.connect_timeout,
+ serverSelectionTimeoutMS=sentinel.select_timeout)
+ # do something to trigger lazy arctic init
+ store.list_libraries()
+ assert gmfe.call_args_list == [call('live')]
+ assert ar(mc).call_args_list == [call(host=gmfe.return_value, maxPoolSize=4,
+ socketTimeoutMS=sentinel.socket_timeout,
+ connectTimeoutMS=sentinel.connect_timeout,
+ serverSelectionTimeoutMS=sentinel.select_timeout)]
+
+
+@pytest.mark.parametrize(
+ ["library", "expected_library", "expected_database"], [
+ ('library', 'library', 'arctic'),
+ ('user.library', 'library', 'arctic_user'),
+ ])
+def test_database_library_specifier(library, expected_library, expected_database):
+ mongo = MagicMock()
+ with patch('arctic.arctic.ArcticLibraryBinding._auth'):
+ ml = ArcticLibraryBinding(mongo, library)
+
+ assert ml.library == expected_library
+ mongo._conn.__getitem__.assert_called_with(expected_database)
+
+
+def test_arctic_repr():
+ with patch('pymongo.MongoClient', return_value=MagicMock(), autospec=True):
+ with patch('arctic.arctic.mongo_retry', autospec=True):
+ with patch('arctic.arctic.get_auth', autospec=True) as ga:
+ ga.return_value = Credential('db', 'admin_user', 'admin_pass')
+ store = Arctic('cluster')
+ assert str(store) == repr(store)
+
+
+def test_lib_repr():
+ mongo = MagicMock()
+ with patch('arctic.arctic.ArcticLibraryBinding._auth'):
+ ml = ArcticLibraryBinding(mongo, 'asdf')
+ assert str(ml) == repr(ml)
+
+
+def test_register_library_type():
+ class DummyType(object):
+ pass
+ register_library_type("new_dummy_type", DummyType)
+ assert LIBRARY_TYPES['new_dummy_type'] == DummyType
+
+ with pytest.raises(ArcticException) as e:
+ register_library_type("new_dummy_type", DummyType)
+ assert "ArcticException: Library new_dummy_type already registered as " in str(e)
+
+
+def test_set_quota():
+ self = create_autospec(ArcticLibraryBinding)
+ ArcticLibraryBinding.set_quota(self, 10000)
+ self.set_library_metadata.assert_called_once_with('QUOTA', 10000)
+ assert self.quota_countdown == 0
+ assert self.quota == 10000
+
+
+def test_get_quota():
+ self = create_autospec(ArcticLibraryBinding)
+ self.get_library_metadata.return_value = 42
+ assert ArcticLibraryBinding.get_quota(self) == 42
+ self.get_library_metadata.assert_called_once_with('QUOTA')
+
+
+def test_check_quota_Zero():
+ self = create_autospec(ArcticLibraryBinding)
+ self.quota = 0
+ ArcticLibraryBinding.check_quota(self)
+
+
+def test_check_quota_None():
+ self = create_autospec(ArcticLibraryBinding)
+ self.quota = None
+ self.get_library_metadata.return_value = None
+ ArcticLibraryBinding.check_quota(self)
+ self.get_library_metadata.assert_called_once_with('QUOTA')
+ assert self.quota == 0
+
+
+def test_check_quota_Zero2():
+ self = create_autospec(ArcticLibraryBinding)
+ self.quota = None
+ self.get_library_metadata.return_value = 0
+ ArcticLibraryBinding.check_quota(self)
+ self.get_library_metadata.assert_called_once_with('QUOTA')
+ assert self.quota == 0
+
+
+def test_check_quota_countdown():
+ self = create_autospec(ArcticLibraryBinding)
+ self.quota = 10
+ self.quota_countdown = 10
+ ArcticLibraryBinding.check_quota(self)
+ assert self.quota_countdown == 9
+
+
+def test_check_quota():
+ self = create_autospec(ArcticLibraryBinding)
+ self.arctic = create_autospec(Arctic)
+ self.quota = 1024 * 1024 * 1024
+ self.quota_countdown = 0
+ self.arctic.__getitem__.return_value = Mock(stats=Mock(return_value={'totals':
+ {'size': 900 * 1024 * 1024,
+ 'count': 100,
+ }
+ }))
+ with patch('arctic.logging.logger.warn') as warn:
+ ArcticLibraryBinding.check_quota(self)
+ self.arctic.__getitem__.assert_called_once_with(self.get_name.return_value)
+ warn.assert_called_once_with('Mongo Quota: 0.879 / 1 GB used')
+ assert self.quota_countdown == 6
+
+
+def test_check_quota_info():
+ self = create_autospec(ArcticLibraryBinding)
+ self.arctic = create_autospec(Arctic)
+ self.quota = 1024 * 1024 * 1024
+ self.quota_countdown = 0
+ self.arctic.__getitem__.return_value = Mock(stats=Mock(return_value={'totals':
+ {'size': 1 * 1024 * 1024,
+ 'count': 100,
+ }
+ }))
+ with patch('arctic.logging.logger.info') as info:
+ ArcticLibraryBinding.check_quota(self)
+ self.arctic.__getitem__.assert_called_once_with(self.get_name.return_value)
+ info.assert_called_once_with('Mongo Quota: 0.001 / 1 GB used')
+ assert self.quota_countdown == 51153
+
+
+def test_check_quota_exceeded():
+ self = create_autospec(ArcticLibraryBinding)
+ self.arctic = create_autospec(Arctic)
+ self.quota = 1024 * 1024 * 1024
+ self.quota_countdown = 0
+ self.arctic.__getitem__.return_value = Mock(stats=Mock(return_value={'totals':
+ {'size': 1024 * 1024 * 1024,
+ 'count': 100,
+ }
+ }))
+ with pytest.raises(QuotaExceededException) as e:
+ ArcticLibraryBinding.check_quota(self)
+ assert "Quota Exceeded: 1.000 / 1 GB used" in str(e)
+
+
+def test_initialize_library():
+ self = create_autospec(Arctic)
+ self._conn = create_autospec(MongoClient)
+ lib = create_autospec(ArcticLibraryBinding)
+ lib.database_name = sentinel.db_name
+ lib.get_quota.return_value = None
+ lib_type = Mock()
+ with patch.dict('arctic.arctic.LIBRARY_TYPES', {sentinel.lib_type: lib_type}), \
+ patch('arctic.arctic.ArcticLibraryBinding', return_value=lib, autospec=True) as ML:
+ Arctic.initialize_library(self, sentinel.lib_name, sentinel.lib_type, thing=sentinel.thing)
+ assert ML.call_args_list == [call(self, sentinel.lib_name)]
+ assert ML.return_value.set_library_type.call_args_list == [call(sentinel.lib_type)]
+ assert ML.return_value.set_quota.call_args_list == [call(10 * 1024 * 1024 * 1024)]
+ assert lib_type.initialize_library.call_args_list == [call(ML.return_value, thing=sentinel.thing)]
+
+
+def test_initialize_library_too_many_ns():
+ self = create_autospec(Arctic)
+ self._conn = create_autospec(MongoClient)
+ lib = create_autospec(ArcticLibraryBinding)
+ lib.database_name = sentinel.db_name
+ self._conn.__getitem__.return_value.collection_names.return_value = [x for x in xrange(3001)]
+ lib_type = Mock()
+ with pytest.raises(ArcticException) as e:
+ with patch.dict('arctic.arctic.LIBRARY_TYPES', {sentinel.lib_type: lib_type}), \
+ patch('arctic.arctic.ArcticLibraryBinding', return_value=lib, autospec=True) as ML:
+ Arctic.initialize_library(self, sentinel.lib_name, sentinel.lib_type, thing=sentinel.thing)
+ assert self._conn.__getitem__.call_args_list == [call(sentinel.db_name),
+ call(sentinel.db_name)]
+ assert lib_type.initialize_library.call_count == 0
+ assert 'Too many namespaces 3001, not creating: sentinel.lib_name' in str(e)
+
+
+def test_get_library():
+ self = create_autospec(Arctic)
+ self._library_cache = {}
+ library_type = Mock()
+ register_library_type(sentinel.lib_type, library_type)
+ with patch('arctic.arctic.ArcticLibraryBinding', autospec=True) as ML:
+ ML.return_value.get_library_type.return_value = sentinel.lib_type
+ library = Arctic.get_library(self, sentinel.lib_name)
+ del LIBRARY_TYPES[sentinel.lib_type]
+ assert ML.call_args_list == [call(self, sentinel.lib_name)]
+ assert library_type.call_args_list == [call(ML.return_value)]
+ assert library == library_type.return_value
+
+
+def test_get_library_not_initialized():
+ self = create_autospec(Arctic,
+ mongo_host=sentinel.host)
+ self._library_cache = {}
+ with pytest.raises(LibraryNotFoundException) as e, \
+ patch('arctic.arctic.ArcticLibraryBinding', autospec=True) as ML:
+ ML.return_value.get_library_type.return_value = None
+ Arctic.get_library(self, sentinel.lib_name)
+ assert "Library %s was not correctly initialized in %s." % (sentinel.lib_name, self) in str(e)
+
+
+def test_get_library_auth_issue():
+ self = create_autospec(Arctic,
+ mongo_host=sentinel.host)
+ self._library_cache = {}
+ with pytest.raises(LibraryNotFoundException) as e, \
+ patch('arctic.arctic.ArcticLibraryBinding', autospec=True) as ML:
+ ML.return_value.get_library_type.side_effect = OperationFailure('database error: not authorized for query on arctic_marketdata.index.ARCTIC')
+ Arctic.get_library(self, sentinel.lib_name)
+ assert "Library %s was not correctly initialized in %s." % (sentinel.lib_name, self) in str(e)
+
+
+def test_get_library_not_registered():
+ self = create_autospec(Arctic)
+ self._library_cache = {}
+ with pytest.raises(LibraryNotFoundException) as e, \
+ patch('arctic.arctic.ArcticLibraryBinding', autospec=True) as ML:
+ ML.return_value.get_library_type.return_value = sentinel.lib_type
+ Arctic.get_library(self, sentinel.lib_name)
+ assert ("Couldn't load LibraryType '%s' for '%s' (has the class been registered?)" %
+ (sentinel.lib_type, sentinel.lib_name)
+ )in str(e)
+
+
+def test_mongo_host_get_set():
+ sentinel.mongo_host = Mock(nodes={("host", "port")})
+ arctic = Arctic(sentinel.mongo_host)
+ assert arctic.mongo_host == "host:port"
+
+
+def test_arctic_set_get_state():
+ sentinel.mongo_host = Mock(nodes={("host", "port")})
+ store = Arctic(sentinel.mongo_host, allow_secondary="allow_secondary")
+ buff = pickle.dumps(store)
+ mnew = pickle.loads(buff)
+ assert mnew.mongo_host == "host:port"
+ assert mnew._allow_secondary == "allow_secondary"
diff --git a/tests/unit/test_auth.py b/tests/unit/test_auth.py
new file mode 100644
index 000000000..fe23d2659
--- /dev/null
+++ b/tests/unit/test_auth.py
@@ -0,0 +1,17 @@
+from mock import create_autospec, sentinel
+from pymongo.database import Database
+from pymongo.errors import PyMongoError
+
+from arctic import auth
+
+
+def test_authenticate():
+ db = create_autospec(Database)
+ db.authenticate.return_value = sentinel.ret
+ assert auth.authenticate(db, sentinel.user, sentinel.password) == sentinel.ret
+
+
+def test_authenticate_fails():
+ db = create_autospec(Database)
+ db.authenticate.side_effect = PyMongoError("error")
+ assert auth.authenticate(db, sentinel.user, sentinel.password) is False
diff --git a/tests/unit/test_compress.py b/tests/unit/test_compress.py
new file mode 100644
index 000000000..69aebbf51
--- /dev/null
+++ b/tests/unit/test_compress.py
@@ -0,0 +1,72 @@
+import lz4
+import pytest
+import random
+import string
+
+import arctic._compress as c
+
+
+def test_roundtrip():
+ _str = "hello world"
+ cstr = c.compress(_str)
+ assert _str == c.decompress(cstr)
+
+
+@pytest.mark.parametrize("n", [1, 1e2, 1e3, 1e6])
+def test_roundtrip_multi(n):
+ _str = random_string(n)
+ cstr = c.compress(_str)
+ assert _str == c.decompress(cstr)
+
+
+def test_roundtripHC():
+ _str = "hello world"
+ cstr = c.compressHC(_str)
+ assert _str == c.decompress(cstr)
+
+
+def test_roundtripLZ4():
+ _str = "hello world"
+ cstr = lz4.compress(_str)
+ assert _str == c.decompress(cstr)
+
+
+def test_roundtripLZ4Back():
+ _str = "hello world"
+ cstr = c.compress(_str)
+ assert _str == lz4.decompress(cstr)
+
+
+def test_roundtripLZ4HC():
+ _str = "hello world"
+ cstr = lz4.compressHC(_str)
+ assert _str == c.decompress(cstr)
+
+
+def test_roundtripLZ4HCBack():
+ _str = "hello world"
+ cstr = c.compressHC(_str)
+ assert _str == lz4.decompress(cstr)
+
+
+@pytest.mark.parametrize("n, length", [(1, 10), (100, 10), (1000, 10)])
+def test_roundtrip_arr(n, length):
+ _strarr = [random_string(length) for _ in range(n)]
+ cstr = c.compressarr(_strarr)
+ assert _strarr == c.decompressarr(cstr)
+
+
+@pytest.mark.parametrize("n, length", [(1, 10), (100, 10), (1000, 10)])
+def test_roundtrip_arrHC(n, length):
+ _strarr = [random_string(length) for _ in range(n)]
+ cstr = c.compressarrHC(_strarr)
+ assert _strarr == c.decompressarr(cstr)
+
+
+def test_arr_zero():
+ assert [] == c.compressarrHC([])
+ assert [] == c.decompressarr([])
+
+
+def random_string(N):
+ return ''.join(random.choice(string.printable) for _ in range(int(N)))
diff --git a/tests/unit/test_compression.py b/tests/unit/test_compression.py
new file mode 100644
index 000000000..8aef0bb25
--- /dev/null
+++ b/tests/unit/test_compression.py
@@ -0,0 +1,101 @@
+from mock import patch, Mock
+
+from arctic._compression import use_lz4hc, _should_use_lz4hc, _is_interactive_mode, compress, compress_array, decompress, decompress_array
+from arctic import _compression
+
+
+def teardown_function(function):
+ _compression.USE_LZ4HC = True
+
+
+def test_use_lz4hc():
+ use_lz4hc(True)
+ assert _compression.USE_LZ4HC is True
+ use_lz4hc(False)
+ assert _compression.USE_LZ4HC is False
+
+
+def test_use_lz4hc_True():
+ use_lz4hc(True)
+ assert _should_use_lz4hc() is True
+
+
+def test_use_lz4hc_False():
+ use_lz4hc(False)
+ assert _should_use_lz4hc() is False
+
+
+def test__is_interactive_mode():
+ assert _is_interactive_mode() is False # in a test!
+
+
+def test_compress():
+ assert len(compress("foobar")) > 0
+
+
+def test_compress_LZ4HC():
+ use_lz4hc(True)
+ cfn = Mock()
+ with patch('arctic._compression.clz4.compressHC', cfn):
+ compress("foo")
+ assert cfn.call_count == 1
+
+
+def test_compress_LZ4():
+ use_lz4hc(False)
+ cfn = Mock()
+ with patch('arctic._compression.clz4.compress', cfn):
+ compress("foo")
+ assert cfn.call_count == 1
+
+
+def test_compressarr():
+ assert len(compress_array(["foobar"*10])) > 0
+ assert isinstance(compress_array(["foobar"*10]), list)
+
+
+def test_compressarr_LZ4HC():
+ assert len(compress_array(["foobar"*10])) > 0
+ assert isinstance(compress_array(["foobar"*10]), list)
+
+
+def test_compress_array_usesLZ4HC():
+ use_lz4hc(True)
+ cfn = Mock()
+ with patch('arctic._compression.clz4.compressarrHC', cfn):
+ compress_array(["foo"] * 100)
+ assert cfn.call_count == 1
+
+
+def test_compress_array_usesLZ4():
+ use_lz4hc(False)
+ cfn = Mock()
+ with patch('arctic._compression.clz4.compressarr', cfn):
+ compress_array(["foo"] * 100)
+ assert cfn.call_count == 1
+
+
+def test_compress_array_LZ4HC_sequential():
+ use_lz4hc(True)
+ cfn = Mock()
+ with patch('arctic._compression.clz4.compressHC', cfn):
+ compress_array(["foo"] * 4)
+ assert cfn.call_count == 4
+
+
+def test_compress_array_LZ4_sequential():
+ use_lz4hc(False)
+ cfn = Mock()
+ with patch('arctic._compression.clz4.compress', cfn):
+ compress_array(["foo"] * 49)
+ assert cfn.call_count == 49
+
+
+def test_decompress():
+ assert decompress(compress("foo")) == "foo"
+
+
+def test_decompress_array():
+ ll = ['foo%s' % i for i in range(100)]
+ assert decompress_array(compress_array(ll)) == ll
+
diff --git a/tests/unit/test_decorators_unit.py b/tests/unit/test_decorators_unit.py
new file mode 100644
index 000000000..7085e8cfd
--- /dev/null
+++ b/tests/unit/test_decorators_unit.py
@@ -0,0 +1,162 @@
+from mock import patch, create_autospec, sentinel, Mock, PropertyMock, MagicMock
+import pytest
+from pymongo.errors import AutoReconnect, OperationFailure, DuplicateKeyError, ServerSelectionTimeoutError
+from pymongo.read_preferences import ReadPreference
+
+from arctic import decorators
+from arctic.decorators import mongo_retry, _get_host
+from pymongo.collection import Collection
+
+
+def test_mongo_retry():
+ retries = [2]
+ self = MagicMock()
+ self._arctic_lib.arctic.mongo_host = sentinel.host
+ self._collection.database.client.nodes = set([('a', 12)])
+ self._arctic_lib.get_name.return_value = sentinel.lib_name
+ with patch('arctic.decorators._handle_error', autospec=True) as he:
+ @mongo_retry
+ def foo(self):
+ if retries[0] == 2:
+ retries[0] -= 1
+ raise OperationFailure('error')
+ elif retries[0] == 1:
+ retries[0] -= 1
+ raise AutoReconnect('error')
+ return "success"
+ foo(self)
+ assert he.call_count == 2
+ assert isinstance(he.call_args_list[0][0][1], OperationFailure)
+ assert he.call_args_list[0][0][2] == 1
+ assert he.call_args_list[0][1] == {'mnodes': ['a:12'],
+ 'mhost': 'sentinel.host',
+ 'l': sentinel.lib_name}
+ assert isinstance(he.call_args_list[1][0][1], AutoReconnect)
+ assert he.call_args_list[1][0][2] == 2
+
+
+def test_mongo_retry_fails():
+ error = OperationFailure('error')
+ retries = [16]
+ with patch('arctic.decorators._log_exception', autospec=True) as le:
+ @mongo_retry
+ def foo():
+ if retries[0]:
+ retries[0] -= 1
+ raise error
+ return "success"
+ with pytest.raises(OperationFailure):
+ foo()
+ assert le.call_count == 15
+ assert le.call_args[0][0] == 'foo'
+ assert le.call_args[0][1] == error
+
+
+def test_retry_nested():
+ error = OperationFailure('error')
+ with patch('arctic.decorators._log_exception', autospec=True) as le:
+ @mongo_retry
+ def foo():
+ @mongo_retry
+ def bar():
+ raise error
+ try:
+ bar()
+ except:
+ raise error
+ with pytest.raises(OperationFailure):
+ foo()
+ assert le.call_count == 15
+ assert le.call_args[0][0] == 'bar'
+ assert le.call_args[0][1] == error
+
+
+def test_all_other_exceptions_logged():
+ with patch('arctic.decorators._log_exception', autospec=True) as le:
+ def foo():
+ raise Exception("Unexpected Error")
+ foo.__module__ = 'arctic.foo'
+ foo = mongo_retry(foo)
+ with pytest.raises(Exception) as e:
+ foo()
+ assert "Unexpected Error" in str(e)
+ assert le.call_count == 1
+ assert le.call_args[0][0] == "foo"
+
+
+def test_other_exceptions_not_logged_outside_of_arctic():
+ with patch('arctic.decorators._log_exception', autospec=True) as le:
+ @mongo_retry
+ def foo():
+ raise Exception("Unexpected Error")
+ with pytest.raises(Exception) as e:
+ foo()
+ assert "Unexpected Error" in str(e)
+ assert le.call_count == 0
+
+
+@pytest.mark.xfail(reason="CS-8393 Mongo server reports auth failure when servers flip")
+def test_auth_failure_no_retry():
+ error = OperationFailure('unauthorized for db:arctic_jblackburn')
+ with patch('arctic.decorators._log_exception', autospec=True) as le:
+ @mongo_retry
+ def foo():
+ raise error
+ with pytest.raises(OperationFailure) as e:
+ foo()
+ assert 'OperationFailure: unauthorized for db:arctic_jblackburn' in str(e)
+ assert le.call_count == 1
+
+
+def test_duplicate_key_failure_no_retry():
+ error = DuplicateKeyError('duplicate key')
+ with patch('arctic.decorators._log_exception', autospec=True) as le:
+ @mongo_retry
+ def foo():
+ raise error
+ with pytest.raises(OperationFailure) as e:
+ foo()
+ assert 'duplicate key' in str(e)
+ assert le.call_count == 1
+
+
+def test_ServerSelectionTimeoutError_no_retry():
+ error = ServerSelectionTimeoutError('some error')
+ with patch('arctic.decorators._log_exception', autospec=True) as le:
+ @mongo_retry
+ def foo():
+ raise error
+ with pytest.raises(ServerSelectionTimeoutError) as e:
+ foo()
+ assert 'some error' in str(e)
+ assert le.call_count == 1
+
+
+def test_get_host():
+ store = Mock()
+ store._arctic_lib.arctic.mongo_host = sentinel.host
+ store._collection.database.client.nodes = set([('a', 12)])
+ store._arctic_lib.get_name.return_value = sentinel.lib_name
+ assert _get_host(store) == {'mhost': 'sentinel.host',
+ 'mnodes': ['a:12'],
+ 'l': sentinel.lib_name,
+ }
+
+
+def test_get_host_list():
+ store = Mock()
+ store._arctic_lib.arctic.mongo_host = sentinel.host
+ store._collection.database.client.nodes = set([('a', 12)])
+ store._arctic_lib.get_name.return_value = sentinel.lib_name
+ assert _get_host([store]) == {'mhost': 'sentinel.host',
+ 'mnodes': ['a:12'],
+ 'l': sentinel.lib_name,
+ }
+
+
+def test_get_host_not_a_vs():
+ store = MagicMock()
+ store._arctic_lib.get_name.side_effect = AttributeError("Hello")
+ assert _get_host(store) == {}
+ store._arctic_lib.get_name.side_effect = ValueError("Hello")
+ assert _get_host(store) == {}
diff --git a/tests/unit/test_hosts.py b/tests/unit/test_hosts.py
new file mode 100644
index 000000000..a3435d889
--- /dev/null
+++ b/tests/unit/test_hosts.py
@@ -0,0 +1,36 @@
+from mock import patch, sentinel, call, PropertyMock, Mock
+import os
+import pytest
+
+from ConfigParser import NoSectionError
+from arctic.hosts import get_arctic_lib
+
+
+def test_get_arctic_lib_with_known_host():
+ with patch('arctic.arctic.Arctic') as Arctic:
+ get_arctic_lib("foo@bar")
+ assert Arctic.call_args_list == [call('bar')]
+
+
+def test_get_arctic_lib_with_unknown_host():
+ with patch('arctic.arctic.Arctic') as Arctic:
+ with patch('pymongo.MongoClient') as MongoClient:
+ get_arctic_lib("foo@bar:123")
+ assert Arctic.call_args_list == [call("bar:123")]
+
+
+def test_get_arctic_connection_strings():
+ with patch('arctic.arctic.Arctic') as Arctic:
+ with patch('pymongo.MongoClient') as MongoClient:
+ get_arctic_lib("foo@bar")
+ get_arctic_lib("foo.sheep@bar")
+ get_arctic_lib("foo.sheep@bar:123")
+ get_arctic_lib("foo.sheep@127.0.0.1:123")
+
+
+@pytest.mark.parametrize(
+ ["string"], [('donkey',), ('donkey:ride@blackpool',),
+ ('donkey:ride',)])
+def test_get_arctic_malformed_connection_strings(string):
+ with pytest.raises(ValueError):
+ get_arctic_lib(string)
diff --git a/tests/unit/tickstore/__init__.py b/tests/unit/tickstore/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/unit/tickstore/test_toplevel.py b/tests/unit/tickstore/test_toplevel.py
new file mode 100644
index 000000000..946f4ecab
--- /dev/null
+++ b/tests/unit/tickstore/test_toplevel.py
@@ -0,0 +1,147 @@
+from mock import Mock, patch, MagicMock, create_autospec, sentinel
+import pytest
+from datetime import datetime as dt
+import pandas as pd
+from pandas.util.testing import assert_frame_equal
+import numpy as np
+from mockextras import when
+
+from arctic.date import DateRange, mktz
+from arctic.exceptions import OverlappingDataException
+from arctic.tickstore.toplevel import TopLevelTickStore, TickStoreLibrary
+from dateutil.rrule import rrule, DAILY
+
+
+def test_raise_exception_if_daterange_is_not_provided():
+ store = TopLevelTickStore(Mock())
+ with pytest.raises(Exception) as e:
+ store._get_library_metadata(None)
+ assert "A date range must be provided" in str(e)
+
+
+def test_raise_exception_if_date_range_does_not_contain_start_date():
+ store = TopLevelTickStore(Mock())
+ dr = DateRange(start=None, end=dt(2011, 1, 1))
+ with pytest.raises(Exception) as e:
+ store._get_library_metadata(dr)
+ assert "The date range {0} must contain a start and end date".format(dr) in str(e)
+
+
+def test_raise_exception_if_date_range_does_not_contain_end_date():
+ store = TopLevelTickStore(Mock())
+ dr = DateRange(start=dt(2011, 1, 1), end=None)
+ with pytest.raises(Exception) as e:
+ store._get_library_metadata(dr)
+ assert "The date range {0} must contain a start and end date".format(dr) in str(e)
+
+
+def test_raise_exception_if_date_range_does_not_contain_start_and_end_date():
+ store = TopLevelTickStore(Mock())
+ dr = DateRange(start=None, end=None)
+ with pytest.raises(Exception) as e:
+ store._get_library_metadata(dr)
+ assert "The date range {0} must contain a start and end date".format(dr) in str(e)
+
+
+def test_raise_exception_and_log_an_error_if_an_invalid_library_name_is_added():
+ arctic_lib = MagicMock()
+ arctic_lib.arctic.__getitem__.side_effect = Exception()
+ store = TopLevelTickStore(arctic_lib)
+ with patch("arctic.tickstore.toplevel.logger") as mock_logger:
+ with pytest.raises(Exception):
+ store.add(None, "blah")
+ mock_logger.error.assert_called_once_with("Could not load library")
+
+
+def test_raise_exception_if_date_range_overlaps():
+ self = create_autospec(TopLevelTickStore, _arctic_lib=MagicMock())
+ self._get_library_metadata.return_value = [TickStoreLibrary('lib1', None), ]
+ with pytest.raises(OverlappingDataException) as e:
+ TopLevelTickStore.add(self, DateRange(start=dt(2010, 1, 1), end=dt(2011, 1, 1, 23, 59, 59, 999000)), "blah")
+ assert "There are libraries that overlap with the date range:" in str(e)
+
+
+@pytest.mark.parametrize(('start', 'end', 'expected_start', 'expected_end'),
+ [(dt(2010, 1, 1, tzinfo=mktz('UTC')), dt(2010, 12, 31, 23, 59, 59, 999000, tzinfo=mktz('UTC')),
+ dt(2010, 1, 1, tzinfo=mktz('UTC')), dt(2010, 12, 31, 23, 59, 59, 999000, tzinfo=mktz('UTC'))),
+ (dt(2010, 1, 1), dt(2010, 12, 31, 23, 59, 59, 999000), dt(2010, 1, 1, tzinfo=mktz('UTC')),
+ dt(2010, 12, 31, 23, 59, 59, 999000, tzinfo=mktz('UTC'))),
+ (dt(2009, 12, 31, 19, tzinfo=mktz('America/New_York')), dt(2010, 12, 31, 18, 59, 59, 999000, tzinfo=mktz('America/New_York')),
+ dt(2010, 1, 1, tzinfo=mktz('UTC')), dt(2010, 12, 31, 23, 59, 59, 999000, tzinfo=mktz('UTC')))
+ ])
+def test_add_library_to_colllection_if_date_range_is_on_UTC_or_naive_day_boundaries(start, end, expected_start, expected_end):
+ self = create_autospec(TopLevelTickStore, _arctic_lib=MagicMock(), _collection=MagicMock())
+ self._get_library_metadata.return_value = []
+ TopLevelTickStore.add(self, DateRange(start=start, end=end), "blah")
+ self._collection.update_one.assert_called_once_with({'library_name': "blah"},
+ {'$set':
+ {'start': expected_start,
+ 'end': expected_end}}, upsert=True)
+
+
+@pytest.mark.parametrize(('start', 'end'),
+ [(dt(2010, 1, 1, 2, tzinfo=mktz('UTC')), dt(2011, 1, 1, tzinfo=mktz('UTC'))),
+ (dt(2010, 1, 1, tzinfo=mktz('UTC')), dt(2011, 1, 1, 2, tzinfo=mktz('UTC'))),
+ (dt(2010, 1, 1, 2, tzinfo=mktz('UTC')), dt(2011, 1, 1, 2, tzinfo=mktz('UTC'))),
+ (dt(2010, 1, 1, 2), dt(2011, 1, 1)),
+ (dt(2010, 1, 1), dt(2011, 1, 1, 2)),
+ (dt(2010, 1, 1, 2), dt(2011, 1, 1, 2)),
+ (dt(2009, 12, 31, 21, 10, tzinfo=mktz('America/New_York')), dt(2010, 12, 31, tzinfo=mktz('America/New_York'))),
+ (dt(2009, 12, 31, tzinfo=mktz('America/New_York')), dt(2010, 12, 31, tzinfo=mktz('America/New_York'))),
+ (dt(2009, 12, 31, 21, 10, tzinfo=mktz('America/New_York')), dt(2010, 12, 31, 9, 21, tzinfo=mktz('America/New_York')))
+ ])
+def test_raise_error_add_library_is_called_with_a_date_range_not_on_day_boundaries(start, end):
+ with pytest.raises(AssertionError) as e:
+ self = create_autospec(TopLevelTickStore, _arctic_lib=MagicMock(), _collection=MagicMock())
+ self._get_library_metadata.return_value = []
+ TopLevelTickStore.add(self, DateRange(start=start, end=end), "blah")
+ assert "Date range should fall on UTC day boundaries" in str(e)
+
+
+@pytest.mark.parametrize(('start', 'end', 'expected_start_index', 'expected_end_index'),
+ [(dt(2010, 1, 1), dt(2010, 1, 5), 0, 3),
+ (dt(2010, 1, 1), dt(2010, 1, 6), 0, 3),
+ (dt(2010, 1, 1, 1), dt(2010, 1, 6), 1, 3),
+ (dt(2010, 1, 1, 1), dt(2010, 1, 4, 2), 1, 2),
+ (dt(2009, 1, 1), dt(2010, 1, 5), 0, 3),
+ ])
+def test_slice_pandas_dataframe(start, end, expected_start_index, expected_end_index):
+ top_level_tick_store = TopLevelTickStore(Mock())
+ dates = pd.date_range('20100101', periods=5, freq='2D')
+ data = pd.DataFrame(np.random.randn(5, 4), index=dates, columns=list('ABCD'))
+ expected = data.ix[expected_start_index:expected_end_index]
+ result = top_level_tick_store._slice(data, start, end)
+ assert_frame_equal(expected, result), '{}\n{}'.format(expected, result)
+
+
+@pytest.mark.parametrize(('start', 'end', 'expected_start_index', 'expected_end_index'),
+ [(dt(2010, 1, 1), dt(2010, 1, 5), 0, 3),
+ (dt(2010, 1, 1), dt(2010, 1, 6), 0, 3),
+ (dt(2010, 1, 1, 1), dt(2010, 1, 6), 1, 3),
+ (dt(2010, 1, 1, 1), dt(2010, 1, 4, 2), 1, 2),
+ (dt(2009, 1, 1), dt(2010, 1, 5), 0, 3),
+ ])
+def test_slice_list_of_dicts(start, end, expected_start_index, expected_end_index):
+ top_level_tick_store = TopLevelTickStore(Mock())
+ dates = list(rrule(DAILY, count=5, dtstart=dt(2010, 1, 1), interval=2))
+ data = [{'index': date, 'A': val} for date, val in zip(dates, range(5))]
+ expected = data[expected_start_index:expected_end_index]
+ result = top_level_tick_store._slice(data, start, end)
+ assert expected == result
+
+
+def test_write_pandas_data_to_right_libraries():
+ self = create_autospec(TopLevelTickStore, _arctic_lib=MagicMock(), _collection=MagicMock())
+ self._collection.find.return_value = [{'library_name': sentinel.libname1, 'start': sentinel.st1, 'end': sentinel.end1},
+ {'library_name': sentinel.libname2, 'start': sentinel.st2, 'end': sentinel.end2}]
+ slice1 = range(2)
+ slice2 = range(4)
+ when(self._slice).called_with(sentinel.data, sentinel.st1, sentinel.end1).then(slice1)
+ when(self._slice).called_with(sentinel.data, sentinel.st2, sentinel.end2).then(slice2)
+ mock_lib1 = Mock()
+ mock_lib2 = Mock()
+ when(self._arctic_lib.arctic.__getitem__).called_with(sentinel.libname1).then(mock_lib1)
+ when(self._arctic_lib.arctic.__getitem__).called_with(sentinel.libname2).then(mock_lib2)
+ TopLevelTickStore.write(self, 'blah', sentinel.data)
+ mock_lib1.write.assert_called_once_with('blah', slice1)
+ mock_lib2.write.assert_called_once_with('blah', slice2)
diff --git a/tests/util.py b/tests/util.py
new file mode 100644
index 000000000..c1b25b751
--- /dev/null
+++ b/tests/util.py
@@ -0,0 +1,51 @@
+from contextlib import contextmanager
+from cStringIO import StringIO
+from dateutil.rrule import rrule, DAILY
+import dateutil
+from datetime import datetime as dt
+import pandas
+import numpy as np
+import sys
+
+
+def read_str_as_pandas(ts_str):
+ labels = [x.strip() for x in ts_str.split('\n')[0].split('|')]
+ pd = pandas.read_csv(StringIO(ts_str), sep='|', index_col=0,
+ date_parser=dateutil.parser.parse)
+ # Trim the whitespace on the column names
+ pd.columns = labels[1:]
+ pd.index.name = labels[0]
+ return pd
+
+
+def get_large_ts(size=2500):
+ timestamps = list(rrule(DAILY, count=size, dtstart=dt(1970, 1, 1), interval=1))
+ pd = pandas.DataFrame(index=timestamps, data={'n' + str(i): np.random.random_sample(size) for i in range(size)})
+ pd.index.name = 'index'
+ return pd
+
+
+@contextmanager
+def _save_argv():
+ args = sys.argv[:]
+ yield
+ sys.argv = args
+
+
+def run_as_main(fn, *args):
+ """ Run a given function as if it was the
+ system entry point, eg for testing scripts.
+
+ Eg::
+
+ from scripts.Foo import main
+
+ run_as_main(main, 'foo','bar')
+
+ This is equivalent to ``Foo foo bar``, assuming
+ ``scripts.Foo.main`` is registered as an entry point.
+ """
+ with _save_argv():
+ print("run_as_main: %s" % str(args))
+ sys.argv = ['progname'] + list(args)
+ return fn()