patrick-kidger
diff --git a/‎CHANGELOG.txt
+3-2 b/‎CHANGELOG.txt
+3-2
diff --git a/‎docs/pages/examples/online.rst
+7-2 b/‎docs/pages/examples/online.rst
+7-2
diff --git a/‎src/signatory/signature_module.py
+29-33 b/‎src/signatory/signature_module.py
+29-33
@@ -4,8 +4,9 @@ Added logsignature calculations to Path.
 Added the signature_to_logsignature function.
 Added the multi_signature_combine function.
 Improved speed and stability of the backwards operation through Path.
-Added (fixed) memory profiling.
-Dramatically improved the speed of several calculations, in particular the the forward calculation of signatures and logsignatures, via parallelisation
+Dramatically improved the speed of several calculations, in particular the the forward calculation of signatures and logsignatures, via parallelisation.
+Added support for multiple versions of PyTorch via a custom install script.
+The usual documentation improvements.
 
 1.1.3
 -----
 
@@ -21,13 +21,18 @@ In code, this problem can be solved like this:
     # Generate some more data for the path
     Y = torch.rand(1, 7, 5)
     # Calculate the signature of the overall path
-    sig_XY = signatory.signature(Y, 3, basepoint=X[:, -1, :], initial=sig_X)
+    final_X = X[:, -1, :]
+    sig_XY = signatory.signature(Y, 3, basepoint=final_X, initial=sig_X)
 
     # This is equivalent to
     XY = torch.cat([X, Y], dim=1)
     sig_XY = signatory.signature(XY, 3)
 
-As can be seen, two pieces of information need to be provided: the final value of :attr:`X` along the stream dimension, and the signature of :attr:`X`.
+As can be seen, two pieces of information need to be provided: the final value of :attr:`X` along the stream dimension, and the signature of :attr:`X`. But not :attr:`X` itself.
+
+The first method (using the :attr:`initial` argument) will be much quicker than the second (simpler) method. The first
+method efficiently uses just the new information :attr:`Y`, whilst the second method unnecessarily iterates over all of
+the old information :attr:`X`.
 
 In particular note that we only needed the last value of :attr:`X`. If memory efficiency is a concern, then by using the first method we can discard the other 999 terms of :attr:`X` without an issue!
 
 
@@ -19,6 +19,7 @@
 from torch import nn
 from torch import autograd
 from torch.autograd import function as autograd_function
+import warnings
 
 from . import backend
 # noinspection PyUnresolvedReferences
@@ -203,65 +204,53 @@ def _signature_batch_trick(path, depth, stream, basepoint, inverse, initial):
 
 def signature(path, depth, stream=False, basepoint=False, inverse=False, initial=None):
     # type: (torch.Tensor, int, bool, Union[bool, torch.Tensor], bool, Union[None, torch.Tensor]) -> torch.Tensor
+
     r"""Applies the signature transform to a stream of data.
 
     The input :attr:`path` is expected to be a three-dimensional tensor, with dimensions :math:`(N, L, C)`, where
     :math:`N` is the batch size, :math:`L` is the length of the input sequence, and :math:`C` denotes the number of
     channels. Thus each batch element is interpreted as a stream of data :math:`(x_1, \ldots, x_L)`, where each
     :math:`x_i \in \mathbb{R}^C`.
 
-    Each path is then lifted to a piecewise linear path :math:`X \colon [0, 1] \to \mathbb{R}^C` and the signature
-    transform of :attr:`path` to depth :attr:`depth`, is computed, defined by
+    Let :math:`f = (f_1, \ldots, f_C) \colon [0, 1] \to \mathbb{R}^C`, be the unique continuous piecewise linear path
+    such that :math:`f(\tfrac{i - 1}{N - 1}) = x_i`. Then and the signature transform of depth :attr:`depth` is
+    computed, defined by
 
     .. math::
-        \exp(x_2 - x_1) \otimes \exp(x_3 - x_2) \otimes \cdots \otimes \exp(x_L - x_{L - 1}),
+        \mathrm{Sig}(\text{path}) = \left(\left( \,\underset{0 < t_1 < \cdots < t_k < 1}{\int\cdots\int} \prod_{j = 1}^k \frac{\mathrm d f_{i_j}}{\mathrm dt}(t_j) \mathrm dt_1 \cdots \mathrm dt_k \right)_{\!\!1 \leq i_1, \ldots, i_k \leq C}\right)_{\!\!1\leq k \leq \text{depth}}.
 
-    which gives a tensor of shape
+    This gives a tensor of shape
 
     .. math::
         (N, C + C^2 + \cdots + C^\text{depth}).
 
-    If :attr:`basepoint` is True then an additional point :math:`x_0 = 0 \in \mathbb{R}^C` is prepended to the path
-    before the signature transform is applied. Alternatively it can be a :class:`torch.Tensor` of shape :math:`(N, C)`
-    specifying the point to prepend.
-
-    If :attr:`stream` is True then  the signatures of all paths :math:`(x_1, \ldots, x_j)`, for :math:`j=2, \ldots, L`,
-    are computed. (Or :math:`(x_0, \ldots, x_j)`, for :math:`j=1, \ldots, L` if :attr:`basepoint` is provided. In
-    neither case is the signature of the path of a single element computed, as that isn't defined.)
-
     Arguments:
         path (:class:`torch.Tensor`): The batch of input paths to apply the signature transform to.
 
         depth (int): The depth to truncate the signature at.
 
-        stream (bool, optional): Defaults to False. If False then the signature transform of the whole path is computed.
-            If True then the signature of all intermediate paths are also computed.
+        stream (bool, optional): Defaults to False. If False then the usual signature transform of the whole path is
+            computed. If True then the signatures of all paths :math:`(x_1, \ldots, x_j)`, for :math:`j=2, \ldots, L`,
+            are returned.
 
-        basepoint (bool or :class:`torch.Tensor`, optional): Defaults to False. If True, then the input paths will have
-            an additional point at the origin prepended to the start of the sequence. (If this is False then the
-            signature transform is invariant to translations of the path, which may or may not be desirable.)
-            Alternatively it may be a :class:`torch.Tensor` specifying the point to prepend, in which case it should
-            have shape :math:`(N, C)`
+        basepoint (bool or :class:`torch.Tensor`, optional): Defaults to False. If :attr:`basepoint` is True then an
+            additional point :math:`x_0 = 0 \in \mathbb{R}^C` is prepended to the path before the signature transform is
+            applied. (If this is False then the signature transform is invariant to translations of the path, which may
+            or may not be desirable. Setting this to True removes this invariance.)
+            Alternatively it may be a :class:`torch.Tensor` specifying the value of :math:`x_0`, in which case it should
+            have shape :math:`(N, C)`.
 
         inverse (bool, optional): Defaults to False. If True then it is in fact the inverse signature that is computed.
-            That is,
-
-            .. math::
-                \exp(x_{L - 1} - x_L) \otimes \cdots \otimes \exp(x_2 - x_3) \otimes \exp(x_1 - x_2).
-
+            That is, we flip the input path along its stream dimension before computing the signature. (But without the
+            extra computational overhead of actually performing that flip separately.)
             From a machine learning perspective it does not particularly matter whether the signature or the inverse
             signature is computed - both represent essentially the same information as each other.
 
         initial (None or :class:`torch.Tensor`, optional): Defaults to None. If it is a :class:`torch.Tensor` then it
-            must be of size :math:`(N, C + C^2 + ... + C^\text{depth})`, and it will be premultiplied to the signature,
-            so that in fact
-
-            .. math::
-                \text{initial} \otimes \exp(x_2 - x_1) \otimes \exp(x_3 - x_2) \otimes \cdots \otimes \exp(x_L - x_{L - 1})
-
-            is computed. (Or the appropriate modification of this if :attr:`inverse=True` or if :attr:`basepoint` is
-            passed.) If this argument is None then this extra multiplication is not done, and the signature is
-            calculated as previously described.
+            must be of size :math:`(N, C + C^2 + ... + C^\text{depth})`, corresponding to the signature of another path.
+            Then this signature is pre-tensor-multiplied on to the signature of :attr:`path`. For a more thorough
+            explanation, see :ref:`this example<examples-online>`.
+            (The appropriate modifications are made if :attr:`inverse=True` or if :attr:`basepoint`.)
 
     Returns:
         A :class:`torch.Tensor`. Given an input :class:`torch.Tensor` of shape :math:`(N, L, C)`, and input arguments
@@ -281,6 +270,13 @@ def signature(path, depth, stream=False, basepoint=False, inverse=False, initial
         :func:`signatory.signature_channels`.
     """
 
+    if initial is not None and basepoint is False:
+        warnings.warn("Argument 'initial' has been set but argument 'basepoint' has not. This is almost certainly a "
+                      "mistake. Argument 'basepoint' should be set to the final value of the path whose signature is "
+                      "'initial'. See the documentation at\n"
+                      "    https://signatory.readthedocs.io/en/latest/pages/examples/online.html\n"
+                      "for more information.")
+
     _signature_checkargs(path, depth, basepoint, initial)
 
     # Coming up is a somewhat involved set of optimisations via parallelisation.