diff --git a/doc/.DS_Store b/doc/.DS_Store
deleted file mode 100644
index 85b2fba..0000000
Binary files a/doc/.DS_Store and /dev/null differ
diff --git a/doc/bibliography.bib b/doc/bibliography.bib
deleted file mode 100644
index 3c9d1f1..0000000
--- a/doc/bibliography.bib
+++ /dev/null
@@ -1,6705 +0,0 @@
-@string{AJM      = {Amer. J. of Mathematics}}
-@string{ANNMATH  = {Annals of Mathematics}}
-@string{ANNPROB  = {Ann. Prob.}}
-@string{ANNIP    = {Ann. Inst. Henri Poincar\'{e}}}
-@string{ACHA     = {Applied and Computational Harmonic Analysis}}
-@string{AML      = {Appl. Math. Lett.}}
-@string{AUT      = {Automatica}}
-@string{BIT      = {BIT}}
-@string{BSTJ     = {Bell Syst. Tech. J.}}
-@string{BAMS     = {Bull. Amer. Math. Soc.}}
-@string{CACM     = {Comm. ACM}}
-@string{CMP      = {Comm. Math. Phys.}}
-@string{CPAM     = {Comm. Pure Appl. Math.}}
-@string{COMPAM   = {Comm. Pure Appl. Math.}}
-@string{CAGD     = {Comput. Aided Geom. Des.}}
-@string{CONAP    = {Constr. Approx.}}
-@string{CR       = {C. R. Acad. Sci. Paris S\'er. I Math.}}
-@string{EL       = {Electronics Letters}}
-@string{IEEEASSP = {IEEE Trans. Acoust., Speech, Signal Processing}}
-@string{SIEEESP  = {submitted to IEEE Trans. Signal Processing}}
-@string{AIEEESP  = {{{\rm accepted for publication in}} IEEE Trans. Signal Processing}}
-@string{IEEEAC   = {IEEE Trans. Autom. Contr.}}
-@string{IEEEAP   = {IEEE Trans. Antennas and Propagation}}
-@string{IEEESP   = {IEEE Trans. Signal Process.}}
-@string{IEEESA   = {IEEE Trans. Speech and Audio Processing}}
-@string{IEEESPL  = {IEEE Signal Processing Letters}}
-@string{IEEESPMAG= {IEEE Signal Processing Magazine}}
-@string{IEEEUFFC = {IEEE Trans. Ultrason. Ferroelect. Freq. Contr.}}
-@string{IEEEIT   = {IEEE Trans. Inf. Theory}}
-@string{IEEEBME  = {IEEE Trans. Biomedical Engineering}}
-@string{IEEECAS  = {IEEE Trans. Circuits and Systems}}
-@string{IEEECSVT = {IEEE Trans. Circ. Syst., Video Techn.}}
-@string{IEEECT   = {IEEE Trans. Circ. Theory}}
-@string{IEEECOM  = {IEEE Trans. Comm.}}
-@string{IEEECOSY = {IEEE Trans. Comm. Syst.}}
-@string{IEEECOTE = {IEEE Trans. Comm. Technol.}}
-@string{IEEECOMM = {IEEE Comm. Mag.}}
-@string{IEEEJSAC = {IEEE J. Sel. Areas Comm.}}
-@string{IEEEPAMI = {IEEE Trans. Pattern Anal. Mach. Intell.}}
-@string{IEEEAES  = {IEEE Trans. Aerospace and Electronic Systems}}
-@string{IEEEPAMI = {IEEE Trans. Patt. Anal. Mach. Intell.}}
-@string{IEEEIP   = {IEEE Trans. Image Process.}}
-@string{IEEEGRS  = {IEEE Trans. Geosci. Remote Sens.}}
-@string{IEEESAP  = {IEEE Trans. Speech, Audio Proc.}}
-@string{IEEESMC  = {IEEE Trans. Systems, Man, Cybernet.}}
-@string{IEEEVT   = {IEEE Trans. Veh. Technol.}}
-@string{IEEEPROC = {Proc. IEEE}}
-@string{JASA     = {J. Acoust. Soc. Amer.}}
-@string{JAMS     = {J. Amer. Math. Soc.}}
-@string{JAT      = {J. Approx. Theory}}
-@string{JAES     = {J. Audio Eng. Soc.}}
-@string{JFI      = {J. Franklin Inst.}}
-@string{JCAM     = {J. Comput. Appl. Math.}}
-@string{JCP      = {J. Comput. Phys.}}
-@string{JFAA     = {J. Fourier Anal. Appl.}}
-@string{JFA      = {J. Funct. Anal.}}
-@string{JIEE     = {J. IEE}}
-@string{JMAA     = {J. Math. Anal. Appl.}}
-@string{JMIV     = {J. Math. Imag. Vision}}
-@string{JMP      = {J. Math. Phys.}}
-@string{JMPA     = {J. Math. Pure Appl.}}
-@string{JOS      = {J. Opt. Soc.}}
-@string{JROYSA   = {J. Roy. Stat. Soc. Ser. A}}
-@string{JROYSB   = {J. Roy. Stat. Soc. Ser. B}}
-@string{JROYSC   = {J. Roy. Stat. Soc. Ser. C}}
-@string{JROYSD   = {J. Roy. Stat. Soc. Ser. D}}
-@string{JSV      = {J. Sound Vib.}}
-@string{LNM      = {Lecture Notes in Math.}}
-@string{LINALG   = {Linear Algebra and its Applications}}
-@string{MC       = {Math. Comp.}}
-@string{MEDI     = {IEEE Trans. Med. Imag.}}
-@string{MM       = {Monatsh. Math.}}
-@string{MN       = {Math. Nach.}}
-@string{MS       = {Math. Scand.}}
-@string{NM       = {Numer. Math.}}
-@string{NA       = {Numer. Algor.}}
-@string{OE       = {Opt. Eng.}}
-@string{OPT      = {Journal of Optimization Theory Applications}}
-@string{PJR      = {Philips J. Research}}
-@string{PAMS     = {Proc. Amer. Math. Soc.}}
-@string{PR       = {Phys. Rev.}}
-@string{PRA      = {Phys. Rev. A}}
-@string{PRL      = {Phys. Rev. Lett.}}
-@string{PRP      = {Phys. Rep.}}
-@string{PTP      = {Prog. Theor. Phys.}}
-@string{RMI      = {Rev. Mat. Iberoamericana}}
-@string{SIAMAM   = {SIAM J. Appl. Math.}}
-@string{SIAMMA   = {SIAM J. Math. Anal.}}
-@string{SIMAT    = {SIAM J. Matrix Anal. Appl.}}
-@string{SIAMNA   = {SIAM J. Numer. Anal.}}
-@string{SIJSC    = {SIAM J. Sci. Comp.}}
-@string{SIJSSC   = {SIAM J. Sci. Statist. Comp.}}
-@string{SIAMR    = {SIAM Rev.}}
-@string{EUSP     = {Signal Processing}}
-@string{TAMS     = {Trans. Amer. Math. Soc.}}
-@string{ASILOMAR = {Asilomar Conf. Signals, Systems, Computers}}
-@string{CISS     = {Proc. Conf. Information Sciences and Systems}}
-@string{ICASSP   = {Proc. IEEE Int. Conf. Acoust., Speech, and Signal Proc. (ICASSP)}}
-@string{ICASSP83 = {Proc. IEEE ICASSP-83}}
-@string{ICASSP84 = {Proc. IEEE ICASSP-84}}
-@string{ICASSP85 = {Proc. IEEE ICASSP-85}}
-@string{ICASSP86 = {Proc. IEEE ICASSP-83}}
-@string{ICASSP87 = {Proc. IEEE ICASSP-87}}
-@string{ICASSP88 = {Proc. IEEE ICASSP-83}}
-@string{ICASSP89 = {Proc. IEEE ICASSP-83}}
-@string{ICASSP90 = {Proc. IEEE ICASSP-90}}
-@string{ICASSP91 = {Proc. IEEE ICASSP-83}}
-@string{ICASSP92 = {Proc. IEEE ICASSP-92}}
-@string{ICASSP93 = {Proc. IEEE ICASSP-93}}
-@string{ICASSP94 = {Proc. IEEE ICASSP-94}}
-@string{ICASSP95 = {Proc. IEEE ICASSP-95}}
-@string{ICASSP96 = {Proc. IEEE ICASSP-96}}
-@string{ICASSP97 = {Proc. IEEE ICASSP-97}}
-@string{ICASSP98 = {Proc. IEEE ICASSP-98}}
-@string{ICASSP99 = {Proc. IEEE ICASSP-99}}
-@string{ICIP   = {Proc. IEEE Int. Conf. Image Proc. (ICIP)}}
-@string{ISCAS92  = {Proc. IEEE ISCAS-92}}
-@string{ISCAS    = {Proc. IEEE Int. Conf. Circuits and Systems}}
-@string{TFTS     = {Proc. IEEE-SP Int. Sympos. Time-Frequency Time-Scale Analysis}}
-@string{SSAP     = {Proc. IEEE-SP Workshop Stat. Signal and Array Processing}}
-@string{TU       = {Vienna University of Technology}}
-@string{INTHFT   = {Institute of Communications and Radio-Frequency Engineering, Vienna University of Technology}}
-@string{jan      = {Jan.}}
-@string{feb      = {Feb.}}
-@string{mar      = {March}}
-@string{apr      = {April}}
-@string{may      = {May}}
-@string{jun      = {June}}
-@string{jul      = {July}}
-@string{aug      = {Aug.}}
-@string{sep      = {Sept.}}
-@string{oct      = {Oct.}}
-@string{nov      = {Nov.}}
-@string{dec      = {Dec.}}
-@string{academic = {Academic Press}}
-@string{addwes   = {Addison Wesley}}
-@string{artech   = {Artech House}}
-@string{dover    = {Dover Publications}}
-@string{elsevier = {Elsevier}}
-@string{kluwer   = {Kluwer}}
-@string{dekker   = {Marcel Dekker}}
-@string{mcgraw   = {McGraw-Hill}}
-@string{pergamon = {Pergamon Press}}
-@string{prent    = {Prentice Hall}}
-@string{spring   = {Springer}}
-@string{wiley    = {Wiley}}
-
-@INPROCEEDINGS{Tadic2011, 
-	author={Tadic, V. B. and Doucet, A.}, 
-	booktitle={Proc. IEEE Decision Decision and Control and European Control Conference (CDC-ECC)}, 
-	title={Asymptotic bias of stochastic gradient search}, 
-	year={2011}, 
-	pages={722-727}, 
-}
-
-@article {DelMoral2006,
-	author = {Del Moral, Pierre and Doucet, Arnaud and Jasra, Ajay},
-	title = {Sequential {M}onte {C}arlo samplers},
-	journal = JROYSB,
-	volume = {68},
-	number = {3},
-	publisher = {Blackwell Publishing Ltd},
-	month = may,
-	year = {2006},
-}
-
-
-@article{Andrieu2007,
-	author = "C. Andrieu and K. Berthelesen and A. Doucet and G. O. Roberts",
-	title = "The expected auxiliary variable method for
-	{M}onte {C}arlo simulation",
-	journal = "Working Paper. Department of Mathematics, University of Bristol, Bristol",
-	year = "2007",
-}
-
-@article{Andrieu2006,
-	author = {C. Andrieu and E. Moulines},
-	title = {On the ergodicity properties of some adaptive {MCMC} algorithms},
-	journal = {Ann. Appl. Probab.},
-	vol = {16},
-	number = {3},
-	year = {2006},
-	pages = {1462--1505},
-}
-
-@ARTICLE{Andrieu2012,
-	author = {{Andrieu}, C. and {Tadic}, V.~B. and {Vihola}, M.},
-	title = "{On the stability of some controlled {M}arkov chains and its applications to stochastic approximation with {M}arkovian dynamic}",
-	journal = {ArXiv e-prints},
-	eprint = {1205.4181},
-	year = 2012,
-	month = may,
-}
-
-
-@article {Andrieu2010,
-	author = {Andrieu, Christophe and Doucet, Arnaud and Holenstein, Roman},
-	title = {Particle {M}arkov chain {M}onte {C}arlo methods},
-	journal = JROYSB,
-	volume = {72},
-	number = {3},
-	publisher = {Blackwell Publishing Ltd},
-	month = may,
-	year = {2010},
-}
-
-@ARTICLE{Deng2005,
-	author={Huawu Deng and Clausi, D.A.},
-	journal=IEEEGRS,
-	title={Unsupervised segmentation of synthetic aperture Radar sea ice imagery using a novel {M}arkov random field model},
-	year={2005},
-	month=mar,
-	volume={43},
-	number={3},
-	pages={ 528 - 538},
-}
-@ARTICLE{Tison2004,
-	author={Tison, C. and Nicolas, J.-M. and Tupin, F. and Maitre, H.},
-	journal=IEEEGRS,
-	title={A new statistical model for {M}arkovian classification of urban areas in high-resolution {SAR} images},
-	year={2004},
-	month=oct,
-	volume={42},
-	number={10},
-	pages={ 2046 - 2057},
-}
-
-@ARTICLE{Geyer1992,
-	author = {Geyer, Charles J. and Thompson, Elizabeth A.},
-	title = {Constrained {M}onte {C}arlo Maximum Likelihood for Dependent Data (with discussions)},
-	journal = {J. Roy. Statist. Soc.},
-	year={1992},
-	month=apr,
-	volume={B},
-	number={54},
-	pages={657-699},
-}
-
-@inproceedings{Gonzalez2011, title = {Parallel {G}ibbs Sampling: From Colored
-	Fields to Thin Junction Trees}, author = {Joseph Gonzalez and Yucheng Low and
-	Arthur Gretton and Carlos Guestrin}, booktitle = {Proc. Artifical Intell. Stat. (AISTATS)},
-	month = may, year = {2011}, address = {Ft. Lauderdale, FL}
-}
-
-@ARTICLE{Yu2010,
-	author={Yu Li and Li, J. and Chapman, M.A.},
-	journal=IEEEGRS,
-	title={Segmentation of {SAR} Intensity Imagery With a {V}oronoi Tessellation, {B}ayesian Inference, and Reversible Jump {MCMC} Algorithm},
-	year={2010},
-	month=apr,
-	volume={48},
-	number={4},
-}
-
-@inproceedings{kantorovich1942transfer,
-  title={On the transfer of masses (in Russian)},
-  author={Kantorovich, L},
-  booktitle={Doklady Akademii Nauk},
-  volume={37},
-  number={2},
-  pages={227--229},
-  year={1942},
-  ADDENDUM={English Tranlation in \textit{J. Math. Sci. 133}, 4 (2006), 1381--1382}
-}
-
-@ARTICLE{Levada2008,
-	author={Levada, A. and Mascarenhas, N. and Tannus, A.},
-	journal=IEEEGRS,
-	title={Pseudolikelihood Equations for Potts {MRF} Model Parameter Estimation on Higher Order Neighborhood Systems},
-	year={2008},
-	month=jul,
-	volume={5},
-	number={3},
-	pages={522 -526},
-}
-
-@article{Orieux2010,
-	author = {Fran\E7ois Orieux and Jean-Fran\E7ois Giovannelli and Thomas Rodet},
-	title = {Bayesian estimation of regularization and point spread function parameters for Wiener\96Hunt deconvolution},
-	journal = {Journal of The Optical Society of America},
-	volume = {27},
-	year = {2010},
-	issue = {7},
-}
-
-@ARTICLE{Yongfeng2005,
-	author={Yongfeng Cao and Hong Sun and Xin Xu},
-	journal=IEEEGRS,
-	title={An unsupervised segmentation method based on {MPM} for {SAR} images},
-	year={2005},
-	month=jan,
-	volume={2},
-	number={1},
-	pages={ 55 - 58},
-}
-
-@book{LiBook,
-	author = {Li, Stan Z.},
-	address = {Secaucus, NJ, USA},
-	isbn = {4-431-70309-8},
-	publisher = {Springer-Verlag New York, Inc.},
-	title = {{Markov random field modeling in image analysis}},
-	year = {2001}
-}
-
-
-
-@ARTICLE{Picco2011,
-	author={Picco, M. and Palacio, G.},
-	journal=IEEEGRS,
-	title={Unsupervised Classification of {SAR} Images Using {M}arkov Random Fields and ${cal G}_{I}^{0}$ Model},
-	year={2011},
-	month=mar,
-	volume={8},
-	number={2},
-	pages={350 -353},
-}
-
-@ARTICLE{Yong2006,
-	author={Yong Yang and Hong Sun and Chu He},
-	journal=IEEEGRS,
-	title={Supervised {SAR} Image {MPM} Segmentation Based on Region-Based Hierarchical Model},
-	year={2006},
-	month=oct,
-	volume={3},
-	number={4},
-	pages={517 -521},
-}
-
-@ARTICLE{Pelizzari2010,
-	author={Pelizzari, S. and Bioucas-Dias, J.},
-	journal=IEEEGRS,
-	title={Oil spill segmentation of {SAR} images via graph cuts},
-	year={2010},
-	note = {submitted},
-	url = {http://arxiv.org/abs/1007.4969},
-}
-
-@article{Green2002,
-	title = {Hidden Markov Models and Disease Mapping},
-	author = {Green, Peter J. and Richardson, Sylvia},
-	journal = {Journal of the American Statistical Association},
-	volume = {97},
-	number = {460},
-	pages = {1055--1070},
-	year = {2002},
-}
-
-@ARTICLE{Gelman1998,
-	author = {Gelman, A. and Meng, X.L.},
-	title = {{Simulating normalizing constants: from importance sampling to bridge sampling to path sampling}},
-	journal = {Statistical Science},
-	year = {1998},
-	volume = {13},
-	pages = {163--185},
-	number = {2},
-}
-
-
-@incollection{Diebolt1996,
-	author    = {J. Diebolt and E. H. S. Ip.},
-	title     = {Stochastic {EM}: method and Application},
-	booktitle = {{M}arkov {C}hain {M}onte {C}arlo in Practice},
-	year      = {1996},
-	editor    = {W. R. Gilks and S. Richardson and D. J. Spiegelhalter},
-	publisher = {Chapman \& Hall},
-	address   = {London},
-	topic     = { }
-}
-
-@article{Yu2003,
-	title = "{MRF} parameter estimation by an accelerated method",
-	journal = "Pattern Recognition Letters",
-	volume = "24",
-	number = "9-10",
-	pages = "1251 - 1259",
-	month = aug,
-	year = "2003",
-	note = "",
-	author = "Yihua Yu and Qiansheng Cheng",
-}
-
-@article{Wang2000,
-	title = "{MRF} parameter estimation by {MCMC} method",
-	journal = "Pattern Recognition",
-	volume = "33",
-	number = "11",
-	pages = "1919 - 1925",
-	month = jul,
-	year = "2000",
-	note = "",
-	author = "Lei Wang and Jun Liu and Stan Z. Li",
-}
-
-@article{li2020quantitative,
-  title={Quantitative stability and error estimates for optimal transport plans},
-  author={Li, Wenbo and Nochetto, Ricardo H},
-  journal={IMA Journal of Numerical Analysis},
-  volume={41},
-  issue={3},
-  pages={1941--1965},
-  year={2021}
-}
-
-@inproceedings{merigot2020quantitative,
-  title={Quantitative stability of optimal transport maps and linearization of the 2-{W}asserstein space},
-  author={M{\'e}rigot, Quentin and Delalande, Alex and Chazal, Fr{\'e}d{\'e}ric},
-  booktitle={International Conference on Artificial Intelligence and Statistics},
-  pages={3186--3196},
-  year={2020},
-  organization={PMLR}
-}
-
-@article{schrodinger1931umkehrung,
-  title={Uber die umkehrung der naturgesetze},
-  author={{S}chr{\"o}dinger, E},
-  journal={Akad. Wissen., Berlin Phys. Math},
-  volume={144},
-  year={1931}
-}
-
-
-@article{delalande2021quantitative,
-  title={Quantitative Stability of Optimal Transport Maps under Variations of the Target Measure},
-  author={Delalande, Alex and Merigot, Quentin},
-  journal={arXiv preprint arXiv:2103.05934},
-  year={2021}
-}
-
-
-@ARTICLE{Ayasso2010,
-	author={Ayasso, H. and Mohammad-Djafari, A.},
-	journal=IEEEIP,
-	title={Joint {NDT} Image Restoration and Segmentation Using {G}auss {M}arkov {P}otts Prior Models and Variational {B}ayesian Computation},
-	year={2010},
-	month=sep,
-	volume={19},
-	number={9},
-	pages={2265 -2277},
-}
-
-@ARTICLE{Descombes1998,
-	author={Descombes, X. and Kruggel, F. and Von Cramon, D.Y.},
-	journal=MEDI,
-	title={Spatio-temporal f{MRI} analysis using {M}arkov random fields},
-	year={1998},
-	month=dec,
-	volume={17},
-	number={6},
-	pages={1028 -1039},
-}
-
-@INPROCEEDINGS{Morris1996,
-	author={R.D. Morris and X. Descombes and J. Zerubia},
-	booktitle={Proc. IEEE Digital Signal Proc. Workshop 1996},
-	title={The {I}sing/{P}otts model is not well suited to segmentation tasks},
-	year={1996},
-	month=sep,
-	pages={263 -266},
-}
-
-@INPROCEEDINGS{Murray2004,
-	AUTHOR = "Iain Murray and Zoubin Ghahramani",
-	TITLE = "{B}ayesian Learning in Undirected Graphical Models: Approximate {MCMC} algorithms",
-	BOOKTITLE = "Proc.  Conf. Uncertainty  in  Artificial Intell. (UAI)",
-	PUBLISHER = "AUAI Press",
-	ADDRESS = "Arlington, Virginia",
-	YEAR = "2004",
-	PAGES = "392--399"
-}
-
-@techreport{Pereyra_TIP_TechReport_2012,
-	author       = {M. Pereyra and N. Dobigeon and H. Batatia and J.-Y. Tourneret},
-	title        = {Estimating the Granularity Parameter of a {P}otts-{M}arkov Random field within an {MCMC} algorithm},
-	institution  = {University of Toulouse, IRIT/INP-ENSEEIHT},
-	address      = {France},
-	month        = feb,
-	year         = 2012,
-	url          = {http://pereyra.perso.enseeiht.fr/pdf/PereyraIEEETIPtr2012.pdf},
-}
-
-@article{Pereyra_TIP_2013,
-	author       = {M. Pereyra and N. Dobigeon and H. Batatia and J.-Y. Tourneret},
-	title        = {Estimating the Granularity Parameter of a {P}otts-{M}arkov Random field within an {MCMC} algorithm},
-	journal=IEEEIP,
-	year={2013},
-	month=jun,
-	volume={22},
-	number={6},
-	pages={2385-2397},
-	
-	address      = {France},
-	month        = feb,
-	year         = 2012,
-	url          = {http://pereyra.perso.enseeiht.fr/pdf/PereyraIEEETIPtr2012.pdf},
-}
-
-@INPROCEEDINGS{Risser2009,
-	AUTHOR = {Laurent Risser and J\E9r\F4me Idier and Philippe Ciuciu
-	and Thomas Vincent},
-	TITLE = {Fast bilinear extrapolation of {3D} {I}sing field
-	partition function. {A}pplication to f{MRI} image
-	analysis},
-	BOOKTITLE = ICIP,
-	YEAR = {2009},
-	MONTH = nov,
-	ADDRESS = {Cairo, Egypte},
-	PAGES = {833--836},
-}
-
-
-@article{Dempster1977,
-	author    = {A. P. Dempster and N. M. Laird and D. B. Rubin},
-	title     = {Maximum likelihood from incomplete data via the {EM} algorithm},
-	journal   = JROYSB,
-	year      = 1977,
-	volume    = 39,
-	number    = {1},
-	pages     = {1--38},
-	topic     = { }
-}
-
-@book{Marin2007,
-	author       = {Jean-Michel Marin and Christian P. Robert},
-	title        = {Bayesian core: a practical approach to computational {B}ayesian statistics},
-	year         = {2007},
-	publisher    = spring,
-	address      = {New-York},
-}
-
-@book{Gelman_book,
-	author       = {Donald B. Rubin and Andrew Gelman and Carlin John and Hal Stern},
-	title        = {Bayesian Data Analysis (2nd ed.)},
-	year         = {2003},
-	publisher    = {Chapman \& Hall},
-	address      = {Boca Raton},
-}
-
-@phdthesis{Balasundar,
-	author    = {B. I. Raju},
-	title     = {High frequency ultrasonic characterization of human skin In vivo},
-	year      = 2002,
-	month     = jun,
-	address   = {Cambridge, MA},
-	school    = {Massachusetts Institute of Technology},
-	available = {y},
-	topic     = { }
-}
-
-@phdthesis{fort:2001,
-	author    = {Fort, G.},
-	title     = {Contr\F4le explicite d'ergodicit\E9 de cha\EEnes de {M}arkov : {A}pplications \E0 l'analyse de convergence
-                  de l'algorithme {Monte-Carlo EM}},
-	year      = 2001,
-	address   = {Paris},
-	school    = {Université Pierre et Marie Curie, Paris},
-	available = {y},
-	topic     = { }
-}
-
-@inproceedings{myEMBC2010,
-	author    = {M. A. Pereyra and H. Batatia},
-	title     = {An Alpha-Stable Model for Ultrasound Speckle Statistics in Skin},
-	booktitle = {Proc. IEEE Eng. Med. Bio. Soc. (EMBC)},
-	year      = {2010},
-	month     = sep,
-	address   = {Buenos Aires, Argentina},
-	available = {y},
-	topic     = { }
-}
-
-@inproceedings{myUltrasonics2010,
-	author    = {M. A. Pereyra and H. Batatia},
-	title     = {A {L}evy Flight Model for Ultrasound in Skin Tissues},
-	booktitle = {Proc. IEEE Ultrason. Symp.},
-	year      = {2010},
-	month     = oct,
-	address   = {San Diego (CA), USA},
-	available = {y},
-	topic     = { }
-}
-
-@inproceedings{Seabra2010,
-	author    = {J. Seabra and J. Sanches and F. Ciompi and P. Radeva},
-	title     = {Ultrasonographic plaque characterization using a rayleigh mixture model},
-	booktitle = {Proc. IEEE Symp. Med. Imag.},
-	year      = {2010},
-	month     = apr,
-	address   = {Rotterdam, Netherlands},
-	available = {y},
-	topic     = { }
-}
-
-@inproceedings{Oelze,
-	author    = {M. L. Oelze and W. D. O'Brien and J. F. Zachary},
-	title     = {Quantitative ultrasound assessment of breast cancer using a multiparameter approach},
-	booktitle = {Proc. IEEE Ultrason. Symp.},
-	year      = {2007},
-	month     = oct,
-	address   = {New York, NY},
-	available = {y},
-	topic     = { }
-}
-
-@article{CriticalReview,
-	author    = {F. Destrempes and G. Cloutier},
-	title     = {A critical review and uniformized representation of statistical distributions modeling the ultrasound echo envelope},
-	journal   = {Ultrasound Med. Biol.},
-	year      = 2010,
-	volume    = 36,
-	number    = {7},
-	pages     = {1037--1051},
-	available = {y},
-	topic     = { }
-}
-
-@article{Destrempes2009,
-	author    = {F. Destrempes and J. Meunier J. and M. F. Giroux and G. Soulez and G. Cloutier},
-	title     = {Segmentation in ultrasonic {B}-mode images of carotid arteries using mixture of {N}akagami distributions and stochastic optimization},
-	journal   = MEDI,
-	month     = feb,
-	year      = 2009,
-	volume    = 28,
-	number    = {2},
-	pages     = {215--229},
-	available = {y},
-	topic     = { }
-}
-
-@book{liptser2013statistics,
-  title={Statistics of Random Processes: I. General Theory},
-  author={Liptser, Robert and Shiryaev, Albert N},
-  volume={5},
-  year={2013},
-  publisher={Springer Science \& Business Media}
-}
-
-
-@article{ShotNoise,
-	author    = {M. A. Kutay and A. P. Petropulu and C. W. Piccoli},
-	title     = {On modeling biomedical ultrasound {RF} echoes using a power-law shot-noise model},
-	journal   = IEEEUFFC,
-	month     = jul,
-	year      = 2001,
-	volume    = 48,
-	number    = {4},
-	pages     = { 953--968},
-	available = {y},
-	topic     = { }
-}
-
-@book{MorseIngard,
-	author    = {P. Morse and K. Ingard},
-	title     = {Theoretical Acoustics},
-	year      = {1987},
-	publisher = {Princeton University Press},
-	address   = {Princeton (NJ)},
-	available = {y},
-	topic     = { }
-}
-
-@article{Casella2001,
-	author = {G. Casella},
-	title = {rical {B}ayes {G}ibbs sampling},
-	journal = {Biostatistics},
-	volume = {2},
-	number = {4},
-	pages = {485--500},
-	year = {2001},
-}
-
-@book{Robert,
-	author    = {C. P. Robert and G. Casella},
-	title     = {Monte Carlo Statistical Methods (2nd ed.)},
-	year      = {2004},
-	publisher = {Springer-Verlag},
-	address   = {New York},
-	available = {y},
-	topic     = { }
-}
-
-@article{Cardinal2006,
-	author    = {M. H. R. Cardinal and J. Meunier and G. Soulez and R. L. Maurice and E. Therasse and G. Cloutier},
-	title     = {Intravascular ultrasound image segmentation: a three-dimensional fast-marching method based on gray level distributions},
-	journal   = MEDI,
-	year      = 2006,
-	month     = may,
-	volume    = 25,
-	number    = {5},
-	pages     = {590--601},
-	available = {y},
-	topic     = { }
-}
-
-
-
-@INCOLLECTION{Roberts1996,
-	author = {G. O. Roberts},
-	title = {Markov chain concepts related to sampling algorithms},
-	booktitle = {{M}arkov Chain {M}onte {C}arlo in Practice},
-	publisher = {Chapman \& Hall},
-	year = {1996},
-	editor = {W. R. Gilks and S. Richardson and D. J. Spiegelhalter},
-	pages = {259--273},
-	address = {London},
-	topic = { },
-}
-
-@article{Eches2010tgrs,
-	author       = {Olivier Eches and Nicolas Dobigeon and Jean-Yves Tourneret},
-	title        = {Enhancing hyperspectral image unmixing with spatial correlations},
-	journal      = {IEEE Trans. Geoscience and Remote Sensing},
-	year         = 2011,
-	month        = nov,
-	volume       = {49},
-	number       = {11},
-	pages        = {4239--4247},
-}
-
-@ARTICLE{Woolrich2005,
-	author={Woolrich, M.W. and Behrens, T.E.J. and Beckmann, C.F. and Smith, S.M.},
-	journal=MEDI,
-	title={Mixture models with adaptive spatial regularization for segmentation with an application to
-	f{MRI} data},
-	year={2005},
-	month=jan,
-	volume={24},
-	number={1},
-	pages={1-11},
-}
-
-@ARTICLE{Marroquin2003,
-	author={Marroquin, J.L. and Santana, E.A. and Botello, S.},
-	journal=IEEEPAMI,
-	title={Hidden {M}arkov measure field models for image segmentation},
-	year={2003},
-	month=nov,
-	volume={25},
-	number={11},
-	pages={1380-1387},
-}
-
-@article{eckstein2021quantitative,
-  title={Quantitative Stability of Regularized Optimal Transport},
-  author={Eckstein, Stephan and Nutz, Marcel},
-  journal={arXiv preprint arXiv:2110.06798},
-  year={2021}
-}
-
-@article{Robert2011,
-	author   = {Christian Robert, Jean-Marie Cornuet, Jean-Michel Marin and Natesh Pillai},
-	title    = {Lack of confidence in approximate Bayesian computation model choice},
-	journal  = {Proceedings of the National Academy of Science},
-	volume   = 108,
-	number   = 37,
-	pages    = {15112-15117},
-	month    = jul,
-	year     = 2011,
-}
-
-
-@article{Orbanz2008,
-	author   = {P. Orbanz and J.M. Buhmann},
-	title    = {Nonparametric {B}ayesian Image Segmentation},
-	journal  = {International Journal of Computer Vision (IJCV)},
-	volume   = 77,
-	number   = {1--3},
-	pages    = {25--45},
-	month    = may,
-	year     = 2008,
-}
-
-@article{Green1995,
-	author   = {P. J. Green},
-	title    = {Reversible jump {M}arkov chain {M}onte {C}arlo Methods computation and {B}ayesian model determination},
-	journal  = {Biometrika},
-	volume   = 82,
-	number   = 4,
-	pages    = {711--732},
-	month    = dec,
-	year     = 1995,
-}
-
-@article{Green1997,
-	author   = {S. Richardson and P. J. Green},
-	title    = {On {B}ayesian Analysis of Mixtures with an Unknown Number of Components},
-	journal  = {J. Roy. Stat. Soc. Ser. B},
-	volume   = 59,
-	number   = 4,
-	year     = 1997,
-	pages    = {731--792},
-}
-
-@article{Dobigeon2010boca,
-	author       = {N. Dobigeon and J.-Y. Tourneret},
-	title        = {{B}ayesian orthogonal component analysis for sparse representation},
-	journal      = IEEESP,
-	year         = 2010,
-	month        = may,
-	volume       = {58},
-	number       = {5},
-	pages        = {2675--2685},
-	note         = {},
-}
-
-@article{Zhou1997,
-	author       = {Z. Zhou and R. Leahy and J. Qi},
-	title        = {Approximate maximum likelihood hyperparameter estimation for {G}ibbs prior},
-	journal      = IEEEIP,
-	year         = 1997,
-	month        = jun,
-	volume       = {6},
-	number       = {6},
-	pages        = {844--861},
-	note         = {},
-}
-
-@article{Descombes1999,
-	author       = {X. Descombes and R. Morris and J. Zerubia and M. Berthod},
-	title        = {Estimation of {M}arkov random field prior parameters using {M}arkov chain {M}onte {C}arlo maximum likelihood},
-	journal      = IEEEIP,
-	year         = 1999,
-	month        = jun,
-	volume       = {8},
-	number       = {7},
-	pages        = {945--963},
-	note         = {},
-}
-
-@Article{Wu1982,
-	title = {The {P}otts model},
-	author = {Wu, F. Y.},
-	journal = {Rev. Mod. Phys.},
-	volume = {54},
-	number = {1},
-	pages = {235--268},
-	numpages = {33},
-	year = {1982},
-	month = jan,
-	doi = {10.1103/RevModPhys.54.235},
-	publisher = {American Physical Society}
-}
-
-@article{Geman1984,
-	author       = {S. Geman and D. Geman},
-	title        = {{S}tochastic relaxation, {G}ibbs distributions, and the {B}ayesian restoration of images},
-	journal      = IEEEPAMI,
-	year         = 1984,
-	month        = nov,
-	volume       = {6},
-	number       = {6},
-	pages        = {721--741},
-	note         = {},
-}
-
-@article{Snoussi2004,
-	author       = {H. Snoussi and A. Mohammad-Djafari},
-	title        = {Fast Joint Separation And Segmentation Of Mixed Images},
-	journal      = {J. Electron. Imag.},
-	year         = {2004},
-	volume       = {13},
-	pages        = {349},
-	note         = {},
-}
-
-@ARTICLE{Bali2008,
-	author={Bali, N. and Mohammad-Djafari, A.},
-	journal=IEEEIP,
-	title={Bayesian Approach With Hidden Markov Modeling and Mean Field Approximation for Hyperspectral Data Analysis},
-	year={2008},
-	month=feb,
-	volume={17},
-	number={2},
-	pages={217 -225},
-}
-
-@book{Kindermann1980,
-	author    = {R. Kindermann and J. L. Snell},
-	title     = {Markov random fields and their applications},
-	year      = {1980},
-	publisher = {RI: Amer. Math. Soc.},
-	address   = {Providence},
-	available = {y},
-	topic     = { }
-}
-
-@book{Li2009,
-	author    = {Stan Z. Li},
-	title     = {{M}arkov {R}andom {F}ield {M}odeling in {I}mage {A}nalysis, 3rd ed.},
-	year      = {2009},
-	publisher = {Springer},
-	address   = {New York, London},
-	available = {y},
-	topic     = { }
-}
-
-@article{Besag1974,
-	author       = {J. Besag},
-	title        = {{S}patial interaction and the statistical analysis of lattice systems},
-	journal      = JROYSB,
-	year         = 1974,
-	volume       = {36},
-	number       = {2},
-	pages        = {192--236},
-	note         = {},
-}
-
-@inproceedings{PereyraICASSP2010,
-	author    = {M. A. Pereyra and N. Dobigeon and H. Batatia and J.-Y. Tourneret},
-	title     = {Labeling skin tissues in ultrasound images using a generalized {R}ayleigh mixture model},
-	booktitle = ICASSP,
-	year      = {2011},
-	month     = may,
-	address   = {Prague, Czech Republic}
-}
-
-@article{Moller2006,
-	author = {Moller, J. and Pettitt, A. N. and Reeves, R. and Berthelsen, K. K.},
-	title = {{An efficient Markov chain Monte Carlo method for distributions with intractable normalising constants}},
-	volume = {93},
-	number = {2},
-	pages = {451-458},
-	month = jun,
-	year = {2006},
-	abstract ={Maximum likelihood parameter estimation and sampling from Bayesian posterior distributions are problematic when the probability density for the parameter of interest involves an intractable normalising constant which is also a function of that parameter. In this paper, an auxiliary variable method is presented which requires only that independent samples can be drawn from the unnormalised density at any particular parameter value. The proposal distribution is constructed so that the normalising constant cancels from the Metropolis-Hastings ratio. The method is illustrated by producing posterior samples for parameters of the Ising model given a particular lattice realisation.},
-	journal = {Biometrika}
-}
-
-@article{McGrory2009,
-	author = {C. {McGrory} and D. Titterington and R. Reeves and A. Pettitt},
-	title = {Variational {B}ayes for estimating the parameters of a hidden {P}otts model},
-	volume = {19},
-	number = {3},
-	journal = {Statistics and Computing},
-	year = {2009},
-	month = sep,
-	pages = {329--340}
-}
-@article{Reeves2004,
-	author = {R. Reeves and A. Pettitt},
-	title = {Efficient recursions for general factorisable models},
-	volume = {91},
-	journal = {Biometrika},
-	month = dec,
-	year = {2004},
-	pages = {751\96-757}
-}
-
-@INPROCEEDINGS{Zengguo2008,
-	author={Zengguo Sun and Chongzhao Han},
-	booktitle={Proc. (IGARSS 08). IEEE Int. Geosc. and Remote Sensing Symp.},
-	title={Heavy-Tailed {R}ayleigh Distribution: A New Tool for the Modeling of {SAR} Amplitude Images},
-	year={2008},
-	month=jul,
-	volume={4},
-	number={},
-	pages={1253-1256},
-}
-
-@ARTICLE{DobigeonHero2009,
-	author={N. Dobigeon  and A. O. Hero and J.-Y. Tourneret},
-	journal=IEEEIP,
-	title={Hierarchical {B}ayesian Sparse Image Reconstruction With Application to {MRFM}},
-	year={2009},
-	month=sep ,
-	volume={18},
-	number={9},
-	pages={2059 -2070},
-	keywords={Gibbs sampling strategy;Markov chain Monte Carlo;additive white Gaussian noise;hierarchical Bayesian sparse image reconstruction;linear transformation;magnetic resonance force microscopy imaging;positive exponential distribution;posterior distribution estimation;synthetic data;tobacco virus sample;Markov processes;Monte Carlo methods;biomedical MRI;exponential distribution;image reconstruction;microscopy;Algorithms;Artificial Intelligence;Bayes Theorem;Image Processing, Computer-Assisted;Magnetic Resonance Spectroscopy;Markov Chains;Microscopy, Atomic Force;Monte Carlo Method;Tobacco Mosaic Virus;},
-	doi={10.1109/TIP.2009.2024067},
-	ISSN={1057-7149},}
-
-@book{NikiasShao,
-	author    = {C. L. Nikias and M. Shao},
-	title     = {Signal Processing with Alpha-Stable Distribution and Applications},
-	year      = {1995},
-	publisher = {Wiley},
-	address   = {New York (NJ)},
-	available = {n},
-	topic     = { }
-}
-
-
-@ARTICLE{Vincent2010,
-	author={Vincent, T. and Risser, L. and Ciuciu, P.},
-	journal=MEDI,
-	title={Spatially Adaptive Mixture Modeling for Analysis of f{MRI} Time Series},
-	year={2010},
-	month=apr ,
-	volume={29},
-	number={4},
-	pages={1059 -1074},
-	keywords={Bayesian formalism;Ising fields;brain;detection-estimation framework;evoked activity;extrapolation technique;fMRI;general linear model;hemodynamic response estimation;independent mixture models;signal-to-noise ratio;spatial mixture models;spatially adaptive mixture modeling;supervised SMM;Bayes methods;bioelectric potentials;biomedical MRI;brain;extrapolation;haemodynamics;physiological models;time series;},
-	doi={10.1109/TMI.2010.2042064},
-	ISSN={0278-0062},
-}
-
-@ARTICLE{Kayabol2009,
-	author={Kayabol, K. and Kuruoglu, E.E. and Sankur, B.},
-	journal=IEEEIP,
-	title={Bayesian Separation of Images Modeled With {MRF}s Using {MCMC}},
-	year={2009},
-	month=may ,
-	volume={18},
-	number={5},
-	pages={982 -994},
-	keywords={Bayesian framework;ICA;MCMC;Markov random field;a posteriori distribution;astrophysical image;gradient image;iterated conditional mode;joint maximization;modified-Gibbs sampling;numerical method;source estimation;source separation;statistical model;texture image;Bayes methods;Markov processes;Monte Carlo methods;image sampling;independent component analysis;iterative methods;maximum likelihood estimation;random processes;source separation;statistical distributions;},
-	doi={10.1109/TIP.2009.2012905},
-	ISSN={1057-7149},
-}
-
-@ARTICLE{Mignotte2007,
-	author={Mignotte, M.},
-	journal=IEEEIP,
-	title={Image Denoising by Averaging of Piecewise Constant Simulations of Image Partitions},
-	year={2007},
-	month=feb ,
-	volume={16},
-	number={2},
-	pages={523 -533},
-	keywords={Markov Chain Monte-Carlo simulations;additive white Gaussian noise;constant-value regions;image denoising;image partitions;least square sense;mean square sense error;piecewise constant simulations;spatial adaptive denoising method;unsupervised Markovian framework;wavelet-based denoising methods;AWGN;Markov processes;Monte Carlo methods;image denoising;least mean squares methods;wavelet transforms;Algorithms;Artifacts;Artificial Intelligence;Computer Simulation;Image Enhancement;Image Interpretation, Computer-Assisted;Imaging, Three-Dimensional;Information Storage and Retrieval;Markov Chains;Models, Statistical;Pattern Recognition, Automated;},
-	doi={10.1109/TIP.2006.887729},
-	ISSN={1057-7149},
-}
-
-@ARTICLE{Noble2006,
-	author={Noble, J.A. and Boukerroui, D.},
-	journal=MEDI,
-	title={Ultrasound image segmentation: a survey},
-	year={2006},
-	month=aug ,
-	volume={25},
-	number={8},
-	pages={987 -1010},
-	keywords={image classification;medical B-mode ultrasound images;reviews;ultrasound image segmentation;biomedical ultrasonics;image classification;image segmentation;medical image processing;},
-	doi={10.1109/TMI.2006.877092},
-	ISSN={0278-0062},}
-
-@article{Liang2010,
-	author = {Liang, F.},
-	title = {{A double {M}etropolis-{H}astings sampler for spatial models with intractable normalizing constants}},
-	volume = {80},
-	number = {9},
-	pages = {1007-1022},
-	year = {2010},
-	journal = {J. Stat. Comp. Simulation}
-}
-
-@inproceedings{Murray2006,
-	author={I. Murray and  Z. Ghahramani and D. MacKay},
-	booktitle={Proc. (UAI 06) 22nd Annual Conference on Uncertainty in Artificial Intelligence},
-	title={{MCMC} for doubly-intractable distributions},
-	year={2006},
-	pages={359-366},
-	month     = jul,
-	address   = {Cambridge, MA, USA}
-	
-}
-
-@article{Besag1975,
-	title = {Statistical Analysis of Non-Lattice Data},
-	author = {Besag, Julian},
-	journal = JROYSD,
-	volume = {24},
-	number = {3},
-	pages = {179-195},
-	month = sep,
-	year = {1975},
-}
-
-@article{Propp1996,
-	author = {Propp, James Gary and Wilson, David Bruce},
-	title = {Exact sampling with coupled {M}arkov chains and applications to statistical mechanics},
-	journal = {Rand. Struct. Algorith.},
-	volume = {9},
-	issue = {1-2},
-	month = aug,
-	year = {1996},
-	pages = {223--252},
-	numpages = {30},
-	publisher = {John Wiley \& Sons, Inc.},
-	address = {New York, USA},
-}
-
-@Article{Childs2001,
-	title = {Exact sampling from nonattractive distributions using summary states},
-	author = {Childs, Andrew M. and Patterson, Ryan B. and MacKay, David J. C.},
-	journal = {Phys. Rev. E},
-	volume = {63},
-	number = {3},
-	pages = {36113--36118},
-	month = feb,
-	year = {2001},
-}
-
-@Article{Ayed2005,
-	title = {Multiregion Level Set Partitioning of Synthetic Aperture Radar Images},
-	author = {I. Ben Ayed, A. Mitiche, and Z. Belhadj},
-	journal = IEEEPAMI,
-	volume = {27},
-	number = {5},
-	pages = {793-800},
-	month = may,
-	year = {2005},
-}
-
-@incollection{RobertMCMC,
-	author    = {C. P. Robert and S. Richardson},
-	title     = {{M}arkov {C}hain {M}onte {C}arlo methods},
-	booktitle = {Discretization and {M}{C}{M}{C} Convergence Assessment},
-	year      = {1998},
-	editor    = {C. P. Robert},
-	publisher = {Springer Verlag},
-	address   = {New York},
-	pages     = {1-25},
-	topic     = { }
-}
-
-@book{Gelman1995,
-	author    = {A. Gelman and J. B. Carlin and H.C. P. Robert and D. B. Rubin},
-	title     = {Bayesian Data Analysis},
-	year      = 1995,
-	publisher = {Chapman \& Hall},
-	address   = {London},
-	available = {n},
-	topic     = { }
-}
-
-@book{Bernardo1994,
-	author    = {J.M. Bernardo and A.F.M. Smith},
-	title     = {Bayesian Theory},
-	year      = 1994,
-	publisher = {Wiley},
-	address   = {New York},
-}
-
-@article{Gelman1992,
-	author = {A. Gelman and DB. Rubin},
-	title = {Inference from iterative simulation using
-	multiple sequences},
-	journal = {Stat. Sciences},
-	volume = {7},
-	number = {4},
-	year = {1992},
-	pages = {457-511},
-}
-
-@article{Godsill1998,
-	author = {S.J. Godsill and P.J.W. Rayner},
-	title = {Statistical reconstruction and analysis of autoregressive signals in impulsive noise using the {G}ibbs sampler},
-	journal = IEEESAP,
-	volume = {6},
-	number = {4},
-	month = jul,
-	year = {1998},
-	pages = {352-372},
-}
-
-@article{Poulain2011,
-	author = {V. Poulain and J. Inglada and P. Marthon and M. Spigai and J.-Y. Tourneret},
-	title = {High resolution optical and {SAR} image fusion for building database updating},
-	journal = IEEEGRS,
-	volume = {49},
-	number = {8},
-	month = aug,
-	year = {2011},
-	pages = {2900-2910},
-}
-
-@article{McKinley2009,
-	author = {T. McKinley and A. Cook and R. Deardon },
-	title = {Inference in epidemic models without likelihoods},
-	journal = {Int. Journal of Biostatistics},
-	volume = {5},
-	number = {1},
-	month = sep,
-	year = {2009},
-	pages = {1-38},
-}
-
-@article{Tourneret2003,
-	author = {J.-Y. Tourneret and M. Doisy and M. Lavielle},
-	title = {Bayesian Retrospective Detection of Multiple Changepoints corrupted by Multiplicative Noise. {A}pplication to {SAR} Image Edge Detection},
-	journal = {Signal Processing},
-	volume = {83},
-	number = {9},
-	month = sep,
-	year = {2003},
-	pages = {1871-1887},
-}
-
-@article{PereyraUFFC2011,
-	author    = {M. A. Pereyra and H. Batatia},
-	title     = {Modeling Ultrasound Echoes in Skin Tissues using Symmetric $\alpha$-Stable Processes},
-	journal   = IEEEUFFC,
-	month     = jan,
-	year      = 2012,
-	volume    = {59},
-	number    = {1},
-	pages     = {60--72},
-}
-
-@article{Marjoram2003,
-	author = {Marjoram, Paul and Molitor, John and Plagnol, Vincent and Tavaré, Simon},
-	journal = {Proc. Nat. Academy Sci.},
-	title = {Markov chain {M}onte {C}arlo without likelihoods},
-	volume = {100},
-	number = {26},
-	pages = {15324-15328},
-	month = dec,
-	year = {2003},
-	
-}
-
-@article{Marin2011,
-	author = {J.-M. Marin and P. Pudlo and C. P. Robert and R. Ryder},
-	journal = {Stat. Comput.},
-	title = {Approximate {B}ayesian {C}omputational methods},
-	volume = {21},
-	number = {2},
-	pages = {289-291},
-	month = oct,
-	year = {2011},
-}
-
-@article {Andrieu2003,
-	author = "Andrieu C. and de Freitas N. and Doucet A. and Jordan M.I.",
-	title = "An Introduction to MCMC for Machine Learning",
-	journal = "Machine Learning",
-	volume = "50",
-	number = "1-2",
-	year = "2003",
-	pages = "5-43",
-}
-
-@article{Grelaud2009,
-	author = {A. Grelaud and J. M. Marin and C. Robert and F. Rodolphe and F. Tally},
-	journal = {Bayesian Analysis},
-	title = {Likelihood-free methods for model choice in  {G}ibbs random fields},
-	volume = {3},
-	number = {2},
-	pages = {427-442},
-	month = jan,
-	year = {2009},
-}
-
-@ARTICLE{Galland2009,
-	author={Galland, F. and Nicolas, J.-M. and Sportouche, H. and Roche, M. and Tupin, F. and Refregier, P.},
-	journal=IEEEGRS,
-	title={Unsupervised Synthetic Aperture Radar Image Segmentation Using {F}isher Distributions},
-	year={2009},
-	month=aug,
-	volume={47},
-	number={8},
-	pages={2966 -2972},
-}
-
-@article{Forbes2007,
-	author = {F. Forbes and G. Fort},
-	journal = IEEEIP,
-	title = {Combining {M}onte {C}arlo and Mean field like methods for inference in hidden {M}arkov Random Fields},
-	volume = {16},
-	number = {3},
-	pages = {824-837},
-	month = mar,
-	year = {2007},
-}
-
-@article{Carlin2000,
-	author = {Carlin, Bradley P. and Louis, Thomas A.},
-	title = {Empirical Bayes: Past, Present and Future},
-	journal = {Journal of the American Statistical Association},
-	volume = {95},
-	number = {452},
-	pages = {1286--1289},
-	year = {2000}
-}
-
-@article{Atchade2011,
-	author={Atchad\'e, Yves F.},
-	year={2011},
-	journal={Statistics and Computing},
-	volume={21},
-	number={4},
-	title={A computational framework for empirical {B}ayes inference},
-	pages={463--473},
-}
-
-
-@article{Casella1985,
-	title = {An Introduction to {E}mpirical {B}ayes Data Analysis},
-	author = {G. Casella},
-	journal = {The American Statistician},
-	volume = {39},
-	number = {2},
-	pages = {83--87},
-	year = {1985},
-}
-
-@article{Celeux2003,
-	author = "G. Celeux and F. Forbes and N. Peyrard",
-	title = "{EM} procedures using mean field-like approximations for {M}arkov model-based image segmentation",
-	journal = "Pattern Recognition",
-	volume = "36",
-	number = "1",
-	pages = "131 - 144",
-	month = jan,
-	year = "2003",
-}
-
-@article{Forbes2003,
-	author = "F. Forbes and N. Peyrard",
-	title = "Hidden {M}arkov Random Field Selection Criteria based on Mean Field-like approximations",
-	journal = IEEEPAMI,
-	volume = "25",
-	number = "8",
-	pages = "1089 - 1101",
-	month = aug,
-	year = "2003",
-}
-
-
-
-@article{PereyraTMIC2011,
-	author = {M. Pereyra and N. Dobigeon and H. Batatia and J.-Y. Tourneret},
-	title = {Segmentation of skin lesions in 2{D} and 3{D} ultrasound images using a spatially coherent generalized {R}ayleigh mixture model},
-	journal = {IEEE Trans. Med. Imag.},
-	volume = {31},
-	number = {8},
-	pages = {1509-1520},
-	month = aug,
-	year = {2012},
-}
-
-@article{Beaumont2002,
-	author = {Mark A Beaumont and Wenyang Zhang and David J. Balding},
-	title = {Approximate Bayesian Computation in Population Genetics},
-	volume = {162},
-	number = {4},
-	pages = {2025-2035},
-	year = {2002},
-	journal = {Genetics}
-}
-
-@article{Kennedy1985,
-	volume = {54},
-	journal = {Phys. Rev. Lett.},
-	author = {Kennedy, A. D. and Kuti, J.},
-	month = jun,
-	year = {1985},
-	title = {Noise without Noise: A New {M}onte {C}arlo Method},
-	issue = {23},
-	pages = {2473--2476}
-}
-
-@ARTICLE{Everitt2012,
-	author = {R. G. Everitt},
-	title = {Bayesian Parameter Estimation for Latent {M}arkov Random Fields and Social Networks},
-	journal = {J. Comput. Graphical Stat.},
-	year = 2012,
-	note = {to appear}
-}
-
-@article{Pritchard1999,
-	author = {Pritchard, J K and Seielstad, M T and Perez-Lezaun, A and Feldman, M W},
-	title = {Population growth of human Y chromosomes: a study of Y chromosome microsatellites.},
-	volume = {16},
-	number = {12},
-	pages = {1791-1798},
-	year = {1999},
-	journal = {Molecular Biology and Evolution}
-}
-
-
-%IP journals - MRF
-@ARTICLE{Cordero2012,
-	author={Cordero-Grande, L. and Vegas-Sanchez-Ferrero, G. and Casaseca-de-la-Higuera, P. and Alberola-Lopez, C.},
-	journal=IEEEIP,
-	title={A {M}arkov Random Field Approach for Topology-Preserving Registration: Application to Object-Based Tomographic Image Interpolation},
-	year={2012},
-	month=apr,
-	volume={21},
-	number={4},
-	pages={2047 -2061},
-}
-
-@ARTICLE{Mahapatra2012,
-	author={Mahapatra, D. and Ying Sun},
-	journal=IEEEIP,
-	title={Integrating Segmentation Information for Improved {MRF}-Based Elastic Image Registration},
-	year={2012},
-	month=jan,
-	volume={21},
-	number={1},
-	pages={170 -183},
-}
-
-@ARTICLE{Katsuki2012,
-	author={Katsuki, T. and Torii, A. and Inoue, M.},
-	journal=IEEEIP,
-	title={Posterior Mean Super-resolution with a Causal {G}aussian {M}arkov Random Field Prior},
-	year={2012},
-	month=apr,
-	volume={21},
-	number={4},
-	pages={2187-2197},
-	keywords={},
-}
-
-@ARTICLE{Jain2012,
-	author={Jain, S. and Papadakis, M. and Upadhyay, S. and Azencott, R.},
-	journal={IEEE Trans. Image Process.},
-	title={Rigid Motion Invariant Classification of $3${D}-Textures},
-	year={2012},
-	month=may,
-	volume={21},
-	number={5},
-	pages={2449-2463},
-}
-
-%IP journals - MCMC
-@ARTICLE{Mignotte2010,
-	author={Mignotte, M.},
-	journal=IEEEIP,
-	title={A Label Field Fusion {B}ayesian Model and Its Penalized Maximum Rand Estimator for Image Segmentation},
-	year={2010},
-	month=jun,
-	volume={19},
-	number={6},
-	pages={1610 -1624},
-}
-
-@ARTICLE{Kayabol2010,
-	author={Kayabol, K. and Kuruoglu, E.E. and Sanz, J.L. and Sankur, B. and Salerno, E. and Herranz, D.},
-	journal=IEEEIP,
-	title={Adaptive Langevin Sampler for Separation of  t-Distribution Modelled Astrophysical Maps},
-	year={2010},
-	month=sep,
-	volume={19},
-	number={9},
-	pages={2357-2368},
-}
-
-@ARTICLE{Zhou2012,
-	author={Xiuzhuang Zhou and Yao Lu and Jiwen Lu and Jie Zhou},
-	journal=IEEEIP,
-	title={Abrupt Motion Tracking Via Intensively Adaptive {M}arkov-Chain {M}onte {C}arlo Sampling},
-	year={2012},
-	month=feb,
-	volume={21},
-	number={2},
-	pages={789 -801},
-}
-
-@article{Laplace,
-	title = {Memoir on the Probability of the Causes of Events},
-	author = {Pierre Simon Laplace},
-	journal = {Statistical Science},
-	volume = {1},
-	number = {3},
-	pages = {364--378},
-	year = {1986},
-	month = aug,
-}
-
-@ARTICLE{Destrempes2006,
-	author={Destrempes, F. and Angers, J.-F. and Mignotte, M.},
-	journal=IEEEIP,
-	title={Fusion of Hidden {M}arkov Random Field Models and Its {B}ayesian Estimation},
-	year={2006},
-	month=oct,
-	volume={15},
-	number={10},
-	pages={2920 -2935},
-}
-
-@ARTICLE{Nikou2010,
-	author={Nikou, C. and Likas, A.C. and Galatsanos, N.P.},
-	journal=IEEEIP,
-	title={A {B}ayesian Framework for Image Segmentation With Spatially Varying Mixtures},
-	year={2010},
-	month=sep,
-	volume={19},
-	number={9},
-	pages={2278 -2289},
-}
-
-@ARTICLE{Orieux2012,
-	author={Orieux, F. and Sepulveda, E. and Loriette, V. and Dubertret, B. and Olivo-Marin, J.-C.},
-	journal=IEEEIP,
-	title={Bayesian Estimation for Optimized Structured Illumination Microscopy},
-	year={2012},
-	month=feb,
-	volume={21},
-	number={2},
-	pages={601 - 614},
-}
-
-%IP journals - Kuruoglu
-
-@ARTICLE{Kuruoglu2004,
-	author={Kuruoglu, E.E. and Zerubia, J.},
-	journal=IEEEIP,
-	title={Modeling {SAR} images with a generalization of the Rayleigh distribution},
-	year={2004},
-	month=apr,
-	volume={13},
-	number={4},
-	pages={527-533},
-}
-
-@ARTICLE{Achim2006,
-	author={Achim, A. and Kuruoglu, E.E. and Zerubia, J.},
-	journal=IEEEIP,
-	title={{SAR} image filtering based on the heavy-tailed Rayleigh model},
-	year={2006},
-	month=sep,
-	volume={15},
-	number={9},
-	pages={2686-2693},
-}
-
-@inproceedings{pereyra2014maximum,
-	title={Maximum marginal likelihood estimation of the granularity coefficient of a Potts-Markov random field within an mcmc algorithm},
-	author={Pereyra, Marcelo and Whiteley, Nick and Andrieu, Christophe and Tourneret, Jean-Yves},
-	booktitle={Statistical Signal Processing (SSP), 2014 IEEE Workshop on},
-	pages={121--124},
-	year={2014},
-	organization={IEEE}
-}
-
-@article{salsa2010fast,
-	title={Fast image recovery using variable splitting and constrained optimization},
-	author={Afonso, Manya V and Bioucas-Dias, Jos{\'e} M and Figueiredo, M{\'a}rio AT},
-	journal={IEEE Transactions on Image Processing},
-	volume={19},
-	number={9},
-	pages={2345--2356},
-	year={2010},
-	publisher={IEEE}
-}
-
-@article{sugar2014stein,
-	title={Stein Unbiased GrAdient estimator of the Risk (SUGAR) for multiple parameter selection},
-	author={Deledalle, Charles-Alban and Vaiter, Samuel and Fadili, Jalal and Peyr{\'e}, Gabriel},
-	journal={SIAM Journal on Imaging Sciences},
-	volume={7},
-	number={4},
-	pages={2448--2487},
-	year={2014},
-	publisher={SIAM}
-}
-
-@article{stein1981estimation,
-	title={Estimation of the mean of a multivariate normal distribution},
-	author={Stein, Charles M},
-	journal={The annals of Statistics},
-	pages={1135--1151},
-	year={1981},
-	publisher={JSTOR}
-}
-
-@article{pesquet2009sure,
-	title={A SURE approach for digital signal/image deconvolution problems},
-	author={Pesquet, Jean-Christophe and Benazza-Benyahia, Amel and Chaux, Caroline},
-	journal={IEEE Transactions on Signal Processing},
-	volume={57},
-	number={12},
-	pages={4616--4632},
-	year={2009},
-	publisher={IEEE}
-}
-
-@article{fort2017stochastic,
-	title={Stochastic Proximal Gradient Algorithms for Penalized Mixed Models},
-	author={Fort, Gersende and Ollier, Edouard and Samson, Adeline},
-	journal={arXiv preprint arXiv:1704.08891},
-	year={2017}
-}
-
-@article{molina1999bayesian,
-	title={Bayesian and regularization methods for hyperparameter estimation in image restoration},
-	author={Molina, Rafael and Katsaggelos, Aggelos K and Mateos, Javier},
-	journal={IEEE Transactions on Image Processing},
-	volume={8},
-	number={2},
-	pages={231--246},
-	year={1999},
-	publisher={IEEE}
-}
-
-@article{thompson1991study,
-	title={A study of methods of choosing the smoothing parameter in image restoration by regularization},
-	author={Thompson, Alan M and Brown, John C and Kay, Jim W and Titterington, D Michael},
-	journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},
-	volume={13},
-	number={4},
-	pages={326--339},
-	year={1991},
-	publisher={IEEE Computer Society}
-}
-
-@article{ramani2008suremontecarlo,
-	title={Monte-Carlo SURE: A black-box optimization of regularization parameters for general denoising algorithms},
-	author={Ramani, Sathish and Blu, Thierry and Unser, Michael},
-	journal={IEEE Transactions on Image Processing},
-	volume={17},
-	number={9},
-	pages={1540--1554},
-	year={2008},
-	publisher={IEEE}
-}
-
-@article{durmus2016efficient,
-	title={Efficient Bayesian computation by proximal Markov chain Monte Carlo: when Langevin meets Moreau},
-	author={Durmus, Alain and Moulines, Eric and Pereyra, Marcelo},
-	journal={SIAM Journal on Imaging Sciences},
-	volume={11},
-	number={1},
-	pages={473--506},
-	year={2018},
-	publisher={SIAM}
-}
-
-@article {durmus2017nonasymp,
-    AUTHOR = {Durmus, Alain and Moulines, \'{E}ric},
-     TITLE = {Nonasymptotic convergence analysis for the unadjusted
-              {L}angevin algorithm},
-   JOURNAL = {The Annals of Applied Probability},
-  FJOURNAL = {The Annals of Applied Probability},
-    VOLUME = {27},
-      YEAR = {2017},
-    NUMBER = {3},
-     PAGES = {1551--1587},
-      ISSN = {1050-5164},
-   MRCLASS = {65C05 (60F05 60J05 65C40 93E35)},
-  MRNUMBER = {3678479},
-       DOI = {10.1214/16-AAP1238},
-       URL = {https://doi.org/10.1214/16-AAP1238},
-}
-
-@incollection{combettes2011proximalsplitting,
-	title={Proximal splitting methods in signal processing},
-	author={Combettes, Patrick L and Pesquet, Jean-Christophe},
-	booktitle={Fixed-point algorithms for inverse problems in science and engineering},
-	pages={185--212},
-	year={2011},
-	publisher={Springer}
-}
-
-@article{green2015bayesian,
-	title={Bayesian computation: a summary of the current state, and samples backwards and forwards},
-	author={Green, Peter J and {\L}atuszy{\'n}ski, Krzysztof and Pereyra, Marcelo and Robert, Christian P},
-	journal={Statistics and Computing},
-	volume={25},
-	number={4},
-	pages={835--862},
-	year={2015},
-	publisher={Springer}
-}
-
-@article{bonettini2013new,
-	title={A new semiblind deconvolution approach for Fourier-based image restoration: an application in astronomy},
-	author={Bonettini, Silvia and Cornelio, Anastasia and Prato, Marco},
-	journal={SIAM Journal on Imaging Sciences},
-	volume={6},
-	number={3},
-	pages={1736--1757},
-	year={2013},
-	publisher={SIAM}
-}
-
-
-@article{amizic2013compressive,
-	title={Compressive blind image deconvolution},
-	author={Amizic, Bruno and Spinoulas, Leonidas and Molina, Rafael and Katsaggelos, Aggelos K},
-	journal={IEEE Transactions on Image Processing},
-	volume={22},
-	number={10},
-	pages={3994--4006},
-	year={2013},
-	publisher={IEEE}
-}
-
-@article{leonard2012schrodinger,
-  title={From the {S}chr{\"o}dinger problem to the {M}onge--{K}antorovich problem},
-  author={L{\'e}onard, Christian},
-  journal={Journal of Functional Analysis},
-  volume={262},
-  number={4},
-  pages={1879--1920},
-  year={2012},
-  publisher={Elsevier}
-}
-
-@article{cominetti1994asymptotic,
-  title={Asymptotic analysis of the exponential penalty trajectory in linear programming},
-  author={Cominetti, Roberto and San Mart{\'i}n, Jaime},
-  journal={Mathematical Programming},
-  volume={67},
-  number={1},
-  pages={169--187},
-  year={1994},
-  publisher={Springer}
-}
-
-@article{ghosal2021stability,
-  title={Stability of Entropic Optimal Transport and {S}chr{\"o}dinger Bridges},
-  author={Ghosal, Promit and Nutz, Marcel and Bernton, Espen},
-  journal={arXiv preprint arXiv:2106.03670},
-  year={2021}
-}
-
-
-@article{carlier2017convergence,
-  title={Convergence of entropic schemes for optimal transport and gradient flows},
-  author={Carlier, Guillaume and Duval, Vincent and Peyr{\'e}, Gabriel and Schmitzer, Bernhard},
-  journal={SIAM Journal on Mathematical Analysis},
-  volume={49},
-  number={2},
-  pages={1385--1418},
-  year={2017},
-  publisher={SIAM}
-}
-
-@article{donoho2006compressed,
-	title={Compressed sensing},
-	author={Donoho, David L},
-	journal={IEEE Transactions on information theory},
-	volume={52},
-	number={4},
-	pages={1289--1306},
-	year={2006},
-	publisher={IEEE}
-}
-
-@article{candes2006robust,
-	title={Robust uncertainty principles: Exact signal reconstruction from highly incomplete frequency information},
-	author={Cand{\`e}s, Emmanuel J and Romberg, Justin and Tao, Terence},
-	journal={IEEE Transactions on information theory},
-	volume={52},
-	number={2},
-	pages={489--509},
-	year={2006},
-	publisher={IEEE}
-}
-
-@article{babacan2011variational,
-	title={Variational Bayesian super resolution},
-	author={Babacan, S Derin and Molina, Rafael and Katsaggelos, Aggelos K},
-	journal={IEEE Transactions on Image Processing},
-	volume={20},
-	number={4},
-	pages={984--999},
-	year={2011},
-	publisher={IEEE}
-}
-
-@article{morgenshtern2016super,
-	title={Super-resolution of positive sources: The discrete setup},
-	author={Morgenshtern, Veniamin I and Candes, Emmanuel J},
-	journal={SIAM Journal on Imaging Sciences},
-	volume={9},
-	number={1},
-	pages={412--444},
-	year={2016},
-	publisher={SIAM}
-}
-
-@article{bioucas2007new,
-	title={A new TwIST: Two-step iterative shrinkage/thresholding algorithms for image restoration},
-	author={Bioucas-Dias, Jos{\'e} M and Figueiredo, M{\'a}rio AT},
-	journal={IEEE Transactions on Image processing},
-	volume={16},
-	number={12},
-	pages={2992--3004},
-	year={2007},
-	publisher={IEEE}
-}
-
-@article{lustig2007sparse,
-	title={Sparse MRI: The application of compressed sensing for rapid MR imaging},
-	author={Lustig, Michael and Donoho, David and Pauly, John M},
-	journal={Magnetic resonance in medicine},
-	volume={58},
-	number={6},
-	pages={1182--1195},
-	year={2007},
-	publisher={Wiley Online Library}
-}
-
-@article{chan2011alternating,
-	title={Alternating direction method for image inpainting in wavelet domains},
-	author={Chan, Raymond H and Yang, Junfeng and Yuan, Xiaoming},
-	journal={SIAM Journal on Imaging Sciences},
-	volume={4},
-	number={3},
-	pages={807--826},
-	year={2011},
-	publisher={SIAM}
-}
-
-@article{mairal2008sparse,
-	title={Sparse representation for color image restoration},
-	author={Mairal, Julien and Elad, Michael and Sapiro, Guillermo},
-	journal={IEEE Transactions on image processing},
-	volume={17},
-	number={1},
-	pages={53--69},
-	year={2008},
-	publisher={IEEE}
-}
-
-@article{nascimento2005vertex,
-	title={Vertex component analysis: A fast algorithm to unmix hyperspectral data},
-	author={Nascimento, Jos{\'e} MP and Dias, Jos{\'e} MB},
-	journal={IEEE transactions on Geoscience and Remote Sensing},
-	volume={43},
-	number={4},
-	pages={898--910},
-	year={2005},
-	publisher={IEEE}
-}
-
-@article{xing2012dictionary,
-	title={Dictionary learning for noisy and incomplete hyperspectral images},
-	author={Xing, Zhengming and Zhou, Mingyuan and Castrodad, Alexey and Sapiro, Guillermo and Carin, Lawrence},
-	journal={SIAM Journal on Imaging Sciences},
-	volume={5},
-	number={1},
-	pages={33--56},
-	year={2012},
-	publisher={SIAM}
-}
-
-@article{simoes2015convex,
-	title={A convex formulation for hyperspectral image superresolution via subspace-based regularization},
-	author={Sim{\~o}es, Miguel and Bioucas-Dias, Jos{\'e} and Almeida, Luis B and Chanussot, Jocelyn},
-	journal={IEEE Transactions on Geoscience and Remote Sensing},
-	volume={53},
-	number={6},
-	pages={3373--3388},
-	year={2015},
-	publisher={IEEE}
-}
-
-@article{haro2012photographing,
-	title={Photographing paintings by image fusion},
-	author={Haro, Gloria and Buades, Antoni and Morel, Jean-Michel},
-	journal={SIAM Journal on Imaging Sciences},
-	volume={5},
-	number={3},
-	pages={1055--1087},
-	year={2012},
-	publisher={SIAM}
-}
-
-@article{candes2015phase,
-	title={Phase retrieval via matrix completion},
-	author={Candes, Emmanuel J and Eldar, Yonina C and Strohmer, Thomas and Voroninski, Vladislav},
-	journal={SIAM review},
-	volume={57},
-	number={2},
-	pages={225--251},
-	year={2015},
-	publisher={SIAM}
-}
-
-@article{bioucas2007phase,
-	title={Phase unwrapping via graph cuts},
-	author={Bioucas-Dias, Jos M and Valadao, Gonalo},
-	journal={IEEE Transactions on Image processing},
-	volume={16},
-	number={3},
-	pages={698--709},
-	year={2007},
-	publisher={IEEE}
-}
-
-@book{kaipio2006statistical,
-	title={Statistical and computational inverse problems},
-	author={Kaipio, Jari and Somersalo, Erkki},
-	volume={160},
-	year={2006},
-	publisher={Springer Science \& Business Media}
-}
-
-@book{robert2007bayesian,
-	title={The Bayesian choice: from decision-theoretic foundations to computational implementation},
-	author={Robert, Christian},
-	year={2007},
-	publisher={Springer Science \& Business Media}
-}
-
-@article{pereyra2016proximal,
-	title={Proximal markov chain monte carlo algorithms},
-	author={Pereyra, Marcelo},
-	journal={Statistics and Computing},
-	volume={26},
-	number={4},
-	pages={745--760},
-	year={2016},
-	publisher={Springer}
-}
-
-@article{pereyra2014computing,
-	title={Computing the Cramer--Rao bound of Markov random field parameters: application to the Ising and the Potts models},
-	author={Pereyra, Marcelo and Dobigeon, Nicolas and Batatia, Hadj and Tourneret, Jean-Yves},
-	journal={IEEE Signal Processing Letters},
-	volume={21},
-	number={1},
-	pages={47--50},
-	year={2014},
-	publisher={IEEE}
-}
-
-@article{atchade2017perturbed,
-	title={On perturbed proximal gradient algorithms},
-	author={Atchad{\'e}, Yves F and Fort, Gersende and Moulines, Eric},
-	journal={J. Mach. Learn. Res},
-	volume={18},
-	number={1},
-	pages={310--342},
-	year={2017}
-}
-
-@InProceedings{EUSIPCO,
-	title="Maximum-a-posteriori estimation with unknown regularisation parameters",
-	author="Pereyra, Marcelo and Bioucas-Dias, Jos{\'e} M and Figueiredo, M{\'a}rio AT",
-	booktitle="Signal Processing Conference (EUSIPCO), 2015 23rd European",
-	pages="230--234",
-	year="2015",
-	organization="IEEE"
-}
-
-
-@article{andrieu2006ergodicity,
-	title="On the ergodicity properties of some adaptive MCMC algorithms",
-	author="Andrieu, Christophe and Moulines, {\'E}ric",
-	journal="The Annals of Applied Probability",
-	volume="16",
-	number="3",
-	pages="1462--1505",
-	year="2006",
-	publisher="Institute of Mathematical Statistics"
-}
-
-#
-
-
-
-@Book{bakry:gentil:ledoux:2014,
-  Title                    = {Analysis and Geometry of {M}arkov Diffusion Operators},
-  Author                   = {Bakry, D. and Gentil, I. and Ledoux, M.},
-  Publisher                = {Springer},
-  Year                     = {2014},
-  Volume                   = {348},
-
-  Doi                      = {10.1007/978-3-319-00227-9},
-  ISBN                     = {978-3-319-00226-2; 978-3-319-00227-9},
-  Mrclass                  = {60J25 (58J65 60J35 60J60)},
-  Mrnumber                 = {3155209},
-  Mrreviewer               = {Ming Liao},
-  Pages                    = {xx+552},
-  Url                      = {http://dx.doi.org/10.1007/978-3-319-00227-9}
-}
-
-
-
-@article{debortoli:durmus:pereyra:vidal:2018,
-author = {De Bortoli V. and Durmus A. and Pereyra M. and Vidal A.},
-title = {Supplementary to "...."},
-year = {2018}
-}
-
-
-
-@article{douc:fort:moulines:soulier:2004,
-    AUTHOR = {Douc, R. and Fort, G. and Moulines, E. and
-              Soulier, P.},
-     TITLE = {Practical drift conditions for subgeometric rates of
-              convergence},
-   JOURNAL = {Ann. Appl. Probab.},
-  FJOURNAL = {The Annals of Applied Probability},
-    VOLUME = {14},
-      YEAR = {2004},
-    NUMBER = {3},
-     PAGES = {1353--1377},
-      ISSN = {1050-5164},
-   MRCLASS = {60J10},
-  MRNUMBER = {2071426},
-MRREVIEWER = {M. G. Shur},
-       DOI = {10.1214/105051604000000323},
-       URL = {https://doi.org/10.1214/105051604000000323},
-}
-		
-
-@book{douc:moulines:priouret:soulier:2018,
-  author =       {Douc, R. and Moulines, \'E. and Priouret, P. and Soulier, P.},
-  editor =       {Springer},
-  title =        {Markov Chains},
-  publisher =    {Springer},
-  year = {2019},
-}
-
-
-@Article{doucet:godsill:robert:2002,
-author="Doucet, A.
-and Godsill, S. J.
-and Robert, C. P.",
-title="Marginal maximum a posteriori estimation using Markov chain Monte Carlo",
-journal="Statistics and Computing",
-year=2002,
-month="Jan",
-day=01,
-volume=12,
-number=1,
-pages="77--84",
-abstract="Markov chain Monte Carlo (MCMC) methods, while facilitating the solution of many complex problems in Bayesian inference, are not currently well adapted to the problem of marginal maximum a posteriori (MMAP) estimation, especially when the number of parameters is large. We present here a simple and novel MCMC strategy, called State-Augmentation for Marginal Estimation (SAME), which leads to MMAP estimates for Bayesian models. We illustrate the simplicity and utility of the approach for missing data interpolation in autoregressive time series and blind deconvolution of impulsive processes.",
-issn="1573-1375",
-doi="10.1023/A:1013172322619",
-url="https://doi.org/10.1023/A:1013172322619"
-}
-
-
-
-@ARTICLE{durmus:moulines:2016,
-   author = {{Durmus}, A. and {Moulines}, E.},
-    title = "{High-dimensional Bayesian inference via the Unadjusted Langevin Algorithm}",
-  journal = {ArXiv e-prints},
-archivePrefix = "arXiv",
-   eprint = {1605.01559},
- primaryClass = "math.ST",
- keywords = {Mathematics - Statistics Theory, Statistics - Methodology, Statistics - Machine Learning},
-     year = 2016,
-    month = may,
-   adsurl = {http://adsabs.harvard.edu/abs/2016arXiv160501559D},
-  adsnote = {Provided by the SAO/NASA Astrophysics Data System}
-}
-
-@article {meyn1993criteria_iii,
-    AUTHOR = {Meyn, Sean P. and Tweedie, R. L.},
-     TITLE = {Stability of {M}arkovian processes. {III}. {F}oster-{L}yapunov
-              criteria for continuous-time processes},
-   JOURNAL = {Advances in Applied Probability},
-  FJOURNAL = {Advances in Applied Probability},
-    VOLUME = {25},
-      YEAR = {1993},
-    NUMBER = {3},
-     PAGES = {518--548},
-      ISSN = {0001-8678},
-   MRCLASS = {60J27},
-  MRNUMBER = {1234295},
-MRREVIEWER = {Esa Nummelin},
-       DOI = {10.2307/1427522},
-       URL = {https://doi.org/10.2307/1427522},
-}
-
-@article {meyn1993criteria_i,
-    AUTHOR = {Meyn, Sean P. and Tweedie, R. L.},
-     TITLE = {Stability of {M}arkovian processes. {I}. {C}riteria for
-              discrete-time chains},
-   JOURNAL = {Adv. in Appl. Probab.},
-  FJOURNAL = {Advances in Applied Probability},
-    VOLUME = {24},
-      YEAR = {1992},
-    NUMBER = {3},
-     PAGES = {542--574},
-      ISSN = {0001-8678},
-   MRCLASS = {60J10},
-  MRNUMBER = {1174380},
-MRREVIEWER = {Esa Nummelin},
-       DOI = {10.2307/1427479},
-       URL = {https://doi.org/10.2307/1427479},
-}
-
-@article {meyn1993criteria_ii,
-    AUTHOR = {Meyn, Sean P. and Tweedie, R. L.},
-     TITLE = {Stability of {M}arkovian processes. {II}. {C}ontinuous-time
-              processes and sampled chains},
-   JOURNAL = {Adv. in Appl. Probab.},
-  FJOURNAL = {Advances in Applied Probability},
-    VOLUME = {25},
-      YEAR = {1993},
-    NUMBER = {3},
-     PAGES = {487--517},
-      ISSN = {0001-8678},
-   MRCLASS = {60J27},
-  MRNUMBER = {1234294},
-MRREVIEWER = {Esa Nummelin},
-       DOI = {10.2307/1427521},
-       URL = {https://doi.org/10.2307/1427521},
-}
-
-
-@book {villani2009optimal,
-    AUTHOR = {Villani, C\'{e}dric},
-     TITLE = {Optimal Transport},
-    SERIES = {Grundlehren der Mathematischen Wissenschaften [Fundamental
-              Principles of Mathematical Sciences]},
-    VOLUME = {338},
-      NOTE = {Old and New},
- PUBLISHER = {Springer-Verlag, Berlin},
-      YEAR = {2009},
-     PAGES = {xxii+973},
-      ISBN = {978-3-540-71049-3},
-   MRCLASS = {49-02 (28A75 37J50 49Q20 53C23 58E30)},
-  MRNUMBER = {2459454},
-MRREVIEWER = {Dario Cordero-Erausquin},
-       DOI = {10.1007/978-3-540-71050-9},
-       URL = {https://doi.org/10.1007/978-3-540-71050-9},
-}
-@book {meyn1993markov,
-    AUTHOR = {Meyn, S. P. and Tweedie, R. L.},
-     TITLE = {Markov chains and Stochastic Stability},
-    SERIES = {Communications and Control Engineering Series},
- PUBLISHER = {Springer-Verlag London, Ltd., London},
-      YEAR = {1993},
-     PAGES = {xvi+ 548},
-      ISBN = {3-540-19832-6},
-   MRCLASS = {60J05},
-  MRNUMBER = {1287609},
-MRREVIEWER = {Esa Nummelin},
-       DOI = {10.1007/978-1-4471-3267-7},
-       URL = {https://doi.org/10.1007/978-1-4471-3267-7},
-}
-
-
-@book{khasminskii2011stochastic,
-  title={Stochastic stability of differential equations},
-  author={Khasminskii, R.},
-  volume={66},
-  year={2011},
-  publisher={Springer Science \& Business Media}
-}
-
-@unpublished{debortoli2019souk,
-  TITLE = {{Stochastic Optimization with Unadjusted Kernel: the SOUK Algorithm}},
-  AUTHOR = {De Bortoli, Valentin and Durmus, Alain and Pereyra, Marcelo and Fernandez Vidal, Ana},
-  URL = {https://hal.archives-ouvertes.fr/hal-01978999},
-  NOTE = {working paper or preprint},
-  YEAR = {2019},
-  MONTH = Jan,
-  PDF = {https://hal.archives-ouvertes.fr/hal-01978999/file/main.pdf},
-  HAL_ID = {hal-01978999},
-  HAL_VERSION = {v1},
-}
-
-@article {nummelin1978geometric,
-    AUTHOR = {Nummelin, E. and Tweedie, R. L.},
-     TITLE = {Geometric ergodicity and {$R$}-positivity for general {M}arkov
-              chains},
-   JOURNAL = {Ann. Probability},
-    VOLUME = {6},
-      YEAR = {1978},
-    NUMBER = {3},
-     PAGES = {404--420},
-   MRCLASS = {60J05 (60J10 60J15)},
-  MRNUMBER = {0474504},
-MRREVIEWER = {Dean Isaacson},
-}
-
-@article{brosse2018tamed,
-title = "The tamed unadjusted Langevin algorithm",
-journal = "Stochastic Processes and their Applications",
-year = "2018",
-issn = "0304-4149",
-doi = "https://doi.org/10.1016/j.spa.2018.10.002",
-url = "http://www.sciencedirect.com/science/article/pii/S0304414918305635",
-author = "Nicolas Brosse and Alain Durmus and Éric Moulines and Sotirios Sabanis",
-keywords = "Tamed unadjusted Langevin algorithm, Markov chain Monte Carlo, Total variation distance, Wasserstein distance",
-abstract = "In this article, we consider the problem of sampling from a probability measure π having a density on Rd proportional to x↦e−U(x). The Euler discretization of the Langevin stochastic differential equation (SDE) is known to be unstable, when the potential U is superlinear. Based on previous works on the taming of superlinear drift coefficients for SDEs, we introduce the Tamed Unadjusted Langevin Algorithm (TULA) and obtain non-asymptotic bounds in V-total variation norm and Wasserstein distance of order 2 between the iterates of TULA and π, as well as weak error bounds. Numerical experiments are presented which support our findings."
-}
-
-@article {nummelin1983rate,
-    AUTHOR = {Nummelin, Esa and Tuominen, Pekka},
-     TITLE = {The rate of convergence in {O}rey's theorem for {H}arris
-              recurrent {M}arkov chains with applications to renewal theory},
-   JOURNAL = {Stochastic Process. Appl.},
-  FJOURNAL = {Stochastic Processes and their Applications},
-    VOLUME = {15},
-      YEAR = {1983},
-    NUMBER = {3},
-     PAGES = {295--311},
-      ISSN = {0304-4149},
-   MRCLASS = {60J05 (60K05)},
-  MRNUMBER = {711187},
-MRREVIEWER = {Martin Jacobsen},
-       DOI = {10.1016/0304-4149(83)90037-6},
-       URL = {https://doi.org/10.1016/0304-4149(83)90037-6},
-}
-		
-@article {nummelin1982geometric,
-    AUTHOR = {Nummelin, Esa and Tuominen, Pekka},
-     TITLE = {Geometric ergodicity of {H}arris recurrent {M}arkov chains
-              with applications to renewal theory},
-   JOURNAL = {Stochastic Process. Appl.},
-  FJOURNAL = {Stochastic Processes and their Applications},
-    VOLUME = {12},
-      YEAR = {1982},
-    NUMBER = {2},
-     PAGES = {187--202},
-      ISSN = {0304-4149},
-   MRCLASS = {60J05 (60K05)},
-  MRNUMBER = {651903},
-MRREVIEWER = {P. E. Ney},
-       DOI = {10.1016/0304-4149(82)90041-2},
-       URL = {https://doi.org/10.1016/0304-4149(82)90041-2},
-}
-
-@article {down1995exponential,
-    AUTHOR = {Down, D. and Meyn, S. P. and Tweedie, R. L.},
-     TITLE = {Exponential and uniform ergodicity of {M}arkov processes},
-   JOURNAL = {Ann. Probab.},
-  FJOURNAL = {The Annals of Probability},
-    VOLUME = {23},
-      YEAR = {1995},
-    NUMBER = {4},
-     PAGES = {1671--1691},
-      ISSN = {0091-1798},
-   MRCLASS = {60J25},
-  MRNUMBER = {1379163},
-MRREVIEWER = {Richard Isaac},
-       URL =
-              {http://links.jstor.org/sici?sici=0091-1798(199510)23:4<1671:EAUEOM>2.0.CO;2-7&origin=MSN},
-}
-
-@article {foster1953stochastic,
-    AUTHOR = {Foster, F. G.},
-     TITLE = {On the stochastic matrices associated with certain queuing
-              processes},
-   JOURNAL = {Ann. Math. Statistics},
-  FJOURNAL = {Annals of Mathematical Statistics},
-    VOLUME = {24},
-      YEAR = {1953},
-     PAGES = {355--360},
-      ISSN = {0003-4851},
-   MRCLASS = {60.0X},
-  MRNUMBER = {0056232},
-MRREVIEWER = {J. Riordan},
-       DOI = {10.1214/aoms/1177728976},
-       URL = {https://doi.org/10.1214/aoms/1177728976},
-}
-
-@book {bremaud1999markov,
-    AUTHOR = {Br\'{e}maud, Pierre},
-     TITLE = {Markov chains},
-    SERIES = {Texts in Applied Mathematics},
-    VOLUME = {31},
-      NOTE = {Gibbs fields, Monte Carlo simulation, and queues},
- PUBLISHER = {Springer-Verlag, New York},
-      YEAR = {1999},
-     PAGES = {xviii+444},
-      ISBN = {0-387-98509-3},
-   MRCLASS = {60J10 (60J27 60K05)},
-  MRNUMBER = {1689633},
-MRREVIEWER = {Richard F. Serfozo},
-       DOI = {10.1007/978-1-4757-3124-8},
-       URL = {https://doi.org/10.1007/978-1-4757-3124-8},
-}
-
-@article {cattiaux2009trends,
-    AUTHOR = {Cattiaux, Patrick and Guillin, Arnaud},
-     TITLE = {Trends to equilibrium in total variation distance},
-   JOURNAL = {Ann. Inst. Henri Poincar\'{e} Probab. Stat.},
-  FJOURNAL = {Annales de l'Institut Henri Poincar\'{e} Probabilit\'{e}s et
-              Statistiques},
-    VOLUME = {45},
-      YEAR = {2009},
-    NUMBER = {1},
-     PAGES = {117--145},
-      ISSN = {0246-0203},
-   MRCLASS = {60E15 (26D15)},
-  MRNUMBER = {2500231},
-MRREVIEWER = {Yu Miao},
-       DOI = {10.1214/07-AIHP152},
-       URL = {https://doi.org/10.1214/07-AIHP152},
-}
-
-
-@book {lipster2001statistics,
-    AUTHOR = {Liptser, Robert S. and Shiryaev, Albert N.},
-     TITLE = {Statistics of Random Processes. {I}},
-    SERIES = {Applications of Mathematics (New York)},
-    VOLUME = {5},
-   EDITION = {expanded},
-      NOTE = {General theory,
-              Translated from the 1974 Russian original by A. B. Aries,
-              Stochastic Modelling and Applied Probability},
- PUBLISHER = {Springer-Verlag, Berlin},
-      YEAR = {2001},
-     PAGES = {xvi+427},
-      ISBN = {3-540-63929-2},
-   MRCLASS = {60-02 (60G30 60G35 60G44 60G55 60H10 62-02 93Exx)},
-  MRNUMBER = {1800857},
-}
-
-@article {hairer2008spectral,
-    AUTHOR = {Hairer, Martin and Mattingly, Jonathan C.},
-     TITLE = {Spectral gaps in {W}asserstein distances and the 2{D}
-              stochastic {N}avier-{S}tokes equations},
-   JOURNAL = {Ann. Probab.},
-  FJOURNAL = {The Annals of Probability},
-    VOLUME = {36},
-      YEAR = {2008},
-    NUMBER = {6},
-     PAGES = {2050--2091},
-      ISSN = {0091-1798},
-   MRCLASS = {35Q35 (35R60 37L55 47D07 60H15 76D05 76M35)},
-  MRNUMBER = {2478676},
-MRREVIEWER = {Hakima Bessaih},
-       DOI = {10.1214/08-AOP392},
-       URL = {https://doi.org/10.1214/08-AOP392},
-}
-
-@article {hairer2011asymptotic,
-    AUTHOR = {Hairer, M. and Mattingly, J. C. and Scheutzow, M.},
-     TITLE = {Asymptotic coupling and a general form of {H}arris' theorem
-              with applications to stochastic delay equations},
-   JOURNAL = {Probab. Theory Related Fields},
-  FJOURNAL = {Probability Theory and Related Fields},
-    VOLUME = {149},
-      YEAR = {2011},
-    NUMBER = {1-2},
-     PAGES = {223--259},
-      ISSN = {0178-8051},
-   MRCLASS = {60J05 (34K50 37A50 47N30 60H10)},
-  MRNUMBER = {2773030},
-MRREVIEWER = {Ramon van Handel},
-       DOI = {10.1007/s00440-009-0250-6},
-       URL = {https://doi.org/10.1007/s00440-009-0250-6},
-}
-
-@article {hairer2014spectral,
-    AUTHOR = {Hairer, Martin and Stuart, Andrew M. and Vollmer, Sebastian
-              J.},
-     TITLE = {Spectral gaps for a {M}etropolis-{H}astings algorithm in
-              infinite dimensions},
-   JOURNAL = {Ann. Appl. Probab.},
-  FJOURNAL = {The Annals of Applied Probability},
-    VOLUME = {24},
-      YEAR = {2014},
-    NUMBER = {6},
-     PAGES = {2455--2490},
-      ISSN = {1050-5164},
-   MRCLASS = {60J22 (37A30 60B12 60J05 65C05 65C40)},
-  MRNUMBER = {3262508},
-MRREVIEWER = {Laurent Miclo},
-       DOI = {10.1214/13-AAP982},
-       URL = {https://doi.org/10.1214/13-AAP982},
-}
-
-@article {cloez2015exponential,
-    AUTHOR = {Cloez, Bertrand and Hairer, Martin},
-     TITLE = {Exponential ergodicity for {M}arkov processes with random
-              switching},
-   JOURNAL = {Bernoulli},
-  FJOURNAL = {Bernoulli. Official Journal of the Bernoulli Society for
-              Mathematical Statistics and Probability},
-    VOLUME = {21},
-      YEAR = {2015},
-    NUMBER = {1},
-     PAGES = {505--536},
-      ISSN = {1350-7265},
-   MRCLASS = {60J05 (37A50 37H15 60B10)},
-  MRNUMBER = {3322329},
-MRREVIEWER = {Feng-Rung Hu},
-       DOI = {10.3150/13-BEJ577},
-       URL = {https://doi.org/10.3150/13-BEJ577},
-}
-
-@article {butkovsky2014subgeometric,
-    AUTHOR = {Butkovsky, Oleg},
-     TITLE = {Subgeometric rates of convergence of {M}arkov processes in the
-              {W}asserstein metric},
-   JOURNAL = {Ann. Appl. Probab.},
-  FJOURNAL = {The Annals of Applied Probability},
-    VOLUME = {24},
-      YEAR = {2014},
-    NUMBER = {2},
-     PAGES = {526--552},
-      ISSN = {1050-5164},
-   MRCLASS = {60J05 (60J25)},
-  MRNUMBER = {3178490},
-MRREVIEWER = {Nikola Sandri\'{c}},
-       DOI = {10.1214/13-AAP922},
-       URL = {https://doi.org/10.1214/13-AAP922},
-}
-
-@article {durmus2016subgeometric,
-    AUTHOR = {Durmus, Alain and Fort, Gersende and Moulines, \'{E}ric},
-     TITLE = {Subgeometric rates of convergence in {W}asserstein distance
-              for {M}arkov chains},
-   JOURNAL = {Ann. Inst. Henri Poincar\'{e} Probab. Stat.},
-  FJOURNAL = {Annales de l'Institut Henri Poincar\'{e} Probabilit\'{e}s et
-              Statistiques},
-    VOLUME = {52},
-      YEAR = {2016},
-    NUMBER = {4},
-     PAGES = {1799--1822},
-      ISSN = {0246-0203},
-   MRCLASS = {60J10 (60B10 60J05 60J22 65C40)},
-  MRNUMBER = {3573296},
-       DOI = {10.1214/15-AIHP699},
-       URL = {https://doi.org/10.1214/15-AIHP699},
-}
-
-@article {eberle2016reflection,
-    AUTHOR = {Eberle, Andreas},
-     TITLE = {Reflection couplings and contraction rates for diffusions},
-   JOURNAL = {Probab. Theory Related Fields},
-  FJOURNAL = {Probability Theory and Related Fields},
-    VOLUME = {166},
-      YEAR = {2016},
-    NUMBER = {3-4},
-     PAGES = {851--886},
-      ISSN = {0178-8051},
-   MRCLASS = {60J60 (60H10)},
-  MRNUMBER = {3568041},
-MRREVIEWER = {Julian Tugaut},
-       DOI = {10.1007/s00440-015-0673-1},
-       URL = {https://doi.org/10.1007/s00440-015-0673-1},
-}
-		
-@article {eberle2011reflection,
-    AUTHOR = {Eberle, Andreas},
-     TITLE = {Reflection coupling and {W}asserstein contractivity without
-              convexity},
-   JOURNAL = {C. R. Math. Acad. Sci. Paris},
-  FJOURNAL = {Comptes Rendus Math\'{e}matique. Acad\'{e}mie des Sciences. Paris},
-    VOLUME = {349},
-      YEAR = {2011},
-    NUMBER = {19-20},
-     PAGES = {1101--1104},
-      ISSN = {1631-073X},
-   MRCLASS = {60J60},
-  MRNUMBER = {2843007},
-       DOI = {10.1016/j.crma.2011.09.003},
-       URL = {https://doi.org/10.1016/j.crma.2011.09.003},
-}
-
-@article{eberle2018quantitative,
-  title={Quantitative contraction rates for Markov chains on general state spaces},
-  author={Eberle, Andreas and Majka, Mateusz B},
-  journal={arXiv preprint arXiv:1808.07033},
-  year={2018}
-}
-
-@article {rudolf2018perturbation,
-    AUTHOR = {Rudolf, Daniel and Schweizer, Nikolaus},
-     TITLE = {Perturbation theory for {M}arkov chains via {W}asserstein
-              distance},
-   JOURNAL = {Bernoulli},
-  FJOURNAL = {Bernoulli. Official Journal of the Bernoulli Society for
-              Mathematical Statistics and Probability},
-    VOLUME = {24},
-      YEAR = {2018},
-    NUMBER = {4A},
-     PAGES = {2610--2639},
-      ISSN = {1350-7265},
-   MRCLASS = {60J05 (60B10 60J22 60J35)},
-  MRNUMBER = {3779696},
-       DOI = {10.3150/17-BEJ938},
-       URL = {https://doi.org/10.3150/17-BEJ938},
-}
-
-@article {joulin2010curvature,
-    AUTHOR = {Joulin, Ald\'{e}ric and Ollivier, Yann},
-     TITLE = {Curvature, concentration and error estimates for {M}arkov
-              chain {M}onte {C}arlo},
-   JOURNAL = {Ann. Probab.},
-  FJOURNAL = {The Annals of Probability},
-    VOLUME = {38},
-      YEAR = {2010},
-    NUMBER = {6},
-     PAGES = {2418--2442},
-      ISSN = {0091-1798},
-   MRCLASS = {60J22 (62E17 65C05)},
-  MRNUMBER = {2683634},
-MRREVIEWER = {Irina Alexandrovna Shalimova},
-       DOI = {10.1214/10-AOP541},
-       URL = {https://doi.org/10.1214/10-AOP541},
-}
-
-@Book{nesterov:2004,
-  Title                    = {Introductory Lectures on Convex Optimization: A Basic Course},
-  Author                   = {Nesterov, Y.},
-  Publisher                = {Springer},
-  Year                     = {2004},
-  Series                   = {Applied Optimization},
-
-  ISBN                     = {9781402075537},
-  Lccn                     = {2003061994},
-  Url                      = {http://books.google.fr/books?id=VyYLem-l3CgC}
-}
-
-
-@inproceedings{Bubeck:2015,
- author = {Bubeck, S. and Eldan, R and Lehec, J.},
- title = {Finite-time Analysis of Projected Langevin Monte Carlo},
- booktitle = {Proceedings of the 28th International Conference on Neural Information Processing Systems},
- series = {NIPS'15},
- year = {2015},
- location = {Montreal, Canada},
- pages = {1243--1251},
- numpages = {9},
- url = {http://dl.acm.org/citation.cfm?id=2969239.2969378},
- acmid = {2969378},
- publisher = {MIT Press},
- address = {Cambridge, MA, USA},
-} 
-@book{bauschke:combettes:2011,
- author = {Bauschke, H. H. and Combettes, P. L.},
- title = {Convex Analysis and Monotone Operator Theory in {H}ilbert Spaces},
- year = {2011},
- isbn = {1441994661, 9781441994660},
- edition = {1st},
- publisher = {Springer Publishing Company, Incorporated},
-}
-@book{parikh:boyd:2013,
-  title={Proximal Algorithms},
-  author={Parikh, N. and Boyd, S.},
-  isbn={9781601987167},
-  series={Foundations and Trends(r) in Optimization},
-  url={https://books.google.fr/books?id=DS04ngEACAAJ},
-  year={2013},
-  publisher={Now Publishers}
-}
-@article{bernton2018langevin,
-  title={Langevin Monte Carlo and JKO splitting},
-  author={Bernton, E.},
-  journal={arXiv preprint arXiv:1802.08671},
-  year={2018}
-}
-
-@article{durmus2018analysis,
-  title={Analysis of Langevin Monte Carlo via convex optimization},
-  author={Durmus, Alain and Majewski, Szymon and Miasojedow, B{\l}a{\.z}ej},
-  journal={arXiv preprint arXiv:1802.09188},
-  year={2018}
-}
-
-@article {bubley:dyer:jerrum:1998,
-    AUTHOR = {Bubley, R. and Dyer, M. and Jerrum, M.},
-     TITLE = {An elementary analysis of a procedure for sampling points in a
-              convex body},
-   JOURNAL = {Random Structures Algorithms},
-  FJOURNAL = {Random Structures \& Algorithms},
-    VOLUME = {12},
-      YEAR = {1998},
-    NUMBER = {3},
-     PAGES = {213--235},
-      ISSN = {1042-9832},
-   MRCLASS = {68Q25 (65C50 68W25)},
-  MRNUMBER = {1635248},
-       DOI = {10.1002/(SICI)1098-2418(199805)12:3<213::AID-RSA1>3.3.CO;2-R},
-       URL =
-              {http://dx.doi.org/10.1002/(SICI)1098-2418(199805)12:3<213::AID-RSA1>3.3.CO;2-R},
-}
-
-
-@article{durmus2015quantitative,
-  title={Quantitative bounds of convergence for geometrically ergodic Markov chain in the Wasserstein distance with application to the Metropolis Adjusted Langevin Algorithm},
-  author={Durmus, Alain and Moulines, {\'E}ric},
-  journal={Statistics and Computing},
-  volume={25},
-  number={1},
-  pages={5--19},
-  year={2015},
-  publisher={Springer}
-}
-
-
-
-@article{rosenthal:1995,
-    AUTHOR = {Rosenthal, J. S.},
-     TITLE = {Minorization conditions and convergence rates for {M}arkov
-              chain {M}onte {C}arlo},
-   JOURNAL = {J. Amer. Statist. Assoc.},
-  FJOURNAL = {Journal of the American Statistical Association},
-    VOLUME = {90},
-      YEAR = {1995},
-    NUMBER = {430},
-     PAGES = {558--566},
-      ISSN = {0162-1459},
-   MRCLASS = {62M99},
-  MRNUMBER = {1340509},
-}
-
-
-@article{jarner2001locally,
-    AUTHOR = {Jarner, S. F. and Tweedie, R. L.},
-     TITLE = {Locally contracting iterated functions and stability of
-              {M}arkov chains},
-   JOURNAL = {J. Appl. Probab.},
-  FJOURNAL = {Journal of Applied Probability},
-    VOLUME = {38},
-      YEAR = {2001},
-    NUMBER = {2},
-     PAGES = {494--507},
-      ISSN = {0021-9002},
-   MRCLASS = {60J05 (60J10)},
-  MRNUMBER = {1834756},
-MRREVIEWER = {David R. Steinsaltz},
-       DOI = {10.1017/s0021900200019999},
-       URL = {https://doi.org/10.1017/s0021900200019999},
-}
-
-@book{ikeda1989sto,
-    AUTHOR = {Ikeda, Nobuyuki and Watanabe, Shinzo},
-     TITLE = {Stochastic Differential Equations and Diffusion Processes},
-    SERIES = {North-Holland Mathematical Library},
-    VOLUME = {24},
-   EDITION = {Second},
- PUBLISHER = {North-Holland Publishing Co., Amsterdam; Kodansha, Ltd.,
-              Tokyo},
-      YEAR = {1989},
-     PAGES = {xvi+555},
-      ISBN = {0-444-87378-3},
-   MRCLASS = {60H10 (58G32 60J60)},
-  MRNUMBER = {1011252},
-}
-
-@article{hashiminsky1960ergodic,
-    AUTHOR = {Hashiminski, R. Z.},
-     TITLE = {Ergodic properties of recurrent diffusion processes and
-              stabilization of the solution of the {C}auchy problem for
-              parabolic equations},
-   JOURNAL = {Teor. Verojatnost. i Primenen.},
-  FJOURNAL = {Akademija Nauk SSSR. Teorija Verojatnoste\u{\i} i ee Primenenija},
-    VOLUME = {5},
-      YEAR = {1960},
-     PAGES = {196--214},
-      ISSN = {0040-361x},
-   MRCLASS = {60.62},
-  MRNUMBER = {0133871},
-MRREVIEWER = {J. L. Snell},
-}
-
-@book{boucheron2013concentration,
-    AUTHOR = {Boucheron, St\'{e}phane and Lugosi, G\'{a}bor and Massart, Pascal},
-     TITLE = {Concentration inequalities},
-      NOTE = {A nonasymptotic theory of independence,
-              With a foreword by Michel Ledoux},
- PUBLISHER = {Oxford University Press, Oxford},
-      YEAR = {2013},
-     PAGES = {x+481},
-      ISBN = {978-0-19-953525-5},
-   MRCLASS = {60E15 (60D05 60G15 60G50 60G70 62G20)},
-  MRNUMBER = {3185193},
-MRREVIEWER = {Sreenivasan Ravi},
-       DOI = {10.1093/acprof:oso/9780199535255.001.0001},
-       URL = {https://doi.org/10.1093/acprof:oso/9780199535255.001.0001},
-}
-
-@article{douc:moulines:rosenthal:2004,
-    AUTHOR = {Douc, R. and Moulines, E. and Rosenthal, J. S.},
-     TITLE = {Quantitative bounds on convergence of time-inhomogeneous
-              {M}arkov chains},
-   JOURNAL = {Ann. Appl. Probab.},
-  FJOURNAL = {The Annals of Applied Probability},
-    VOLUME = {14},
-      YEAR = {2004},
-    NUMBER = {4},
-     PAGES = {1643--1665},
-      ISSN = {1050-5164},
-   MRCLASS = {60J22 (60J05)},
-  MRNUMBER = {2099647},
-MRREVIEWER = {B. L. Granovsky},
-       DOI = {10.1214/105051604000000620},
-       URL = {https://doi.org/10.1214/105051604000000620},
-}
-
-
-
-@article{fort:2002,
-	title={Computable bounds for {V}-geometric ergodicity of {M}arkov transition kernels},
-	author={Fort, G.},
-	journal={Rapport de Recherche, Univ. J. Fourier, RR 1047-M., \url{https://www.math.univ-toulouse.fr/%7Egfort/Preprints/fort:2002.pdf}},
-	year={2002},
-	url = {https://www.math.univ-toulouse.fr/%7Egfort/Preprints/fort:2002.pdf},
-}
-
-@article{baxendal:1993,
-	title={Uniform estimates for geometric ergodicity of recurrent Markov processes},
-	author={P. H. Baxendal},
-	journal={Report, Univ. Southern California},
-	year={1993},
-}
-
-@article {meyn:tweedie:1994,
-    AUTHOR = {Meyn, S. P. and Tweedie, R. L.},
-     TITLE = {Computable bounds for geometric convergence rates of {M}arkov
-              chains},
-   JOURNAL = {Ann. Appl. Probab.},
-  FJOURNAL = {The Annals of Applied Probability},
-    VOLUME = {4},
-      YEAR = {1994},
-    NUMBER = {4},
-     PAGES = {981--1011},
-      ISSN = {1050-5164},
-   MRCLASS = {60J10},
-  MRNUMBER = {1304770},
-MRREVIEWER = {Jeffrey S. Rosenthal},
-       URL =
-              {http://links.jstor.org/sici?sici=1050-5164(199411)4:4<981:CBFGCR>2.0.CO;2-U&origin=MSN},
-}
-
-
-%3 8  22 32
-
-%7 13 23 32
-
-
-@article {lund:tweedie:1996,
-    AUTHOR = {Lund, R. B. and Tweedie, R. L.},
-     TITLE = {Geometric convergence rates for stochastically ordered
-              {M}arkov chains},
-   JOURNAL = {Math. Oper. Res.},
-  FJOURNAL = {Mathematics of Operations Research},
-    VOLUME = {21},
-      YEAR = {1996},
-    NUMBER = {1},
-     PAGES = {182--194},
-      ISSN = {0364-765X},
-   MRCLASS = {60J05 (60K25)},
-  MRNUMBER = {1385873},
-MRREVIEWER = {Hermann Thorisson},
-       DOI = {10.1287/moor.21.1.182},
-       URL = {https://doi.org/10.1287/moor.21.1.182},
-}
-
-@article {popov:1977,
-    AUTHOR = {Popov, N. N.},
-     TITLE = {Geometric ergodicity conditions for countable {M}arkov chains},
-   JOURNAL = {Dokl. Akad. Nauk SSSR},
-  FJOURNAL = {Doklady Akademii Nauk SSSR},
-    VOLUME = {234},
-      YEAR = {1977},
-    NUMBER = {2},
-     PAGES = {316--319},
-      ISSN = {0002-3264},
-   MRCLASS = {60J10},
-  MRNUMBER = {0458598},
-MRREVIEWER = {Serban Grigorescu},
-}
-
-@article {goldys:maslowski:2006,
-    AUTHOR = {Goldys, B. and Maslowski, B.},
-     TITLE = {Lower estimates of transition densities and bounds on
-              exponential ergodicity for stochastic {PDE}'s},
-   JOURNAL = {Ann. Probab.},
-  FJOURNAL = {The Annals of Probability},
-    VOLUME = {34},
-      YEAR = {2006},
-    NUMBER = {4},
-     PAGES = {1451--1496},
-      ISSN = {0091-1798},
-   MRCLASS = {60H15 (35R60 37L55 60J35)},
-  MRNUMBER = {2257652},
-MRREVIEWER = {Pedro Mar\'{i}n Rubio},
-       DOI = {10.1214/009117905000000800},
-       URL = {https://doi.org/10.1214/009117905000000800},
-}
-
-@article {tuominen:tweedie:1994,
-    AUTHOR = {Tuominen, P. and Tweedie, R. L.},
-     TITLE = {Subgeometric rates of convergence of {$f$}-ergodic {M}arkov
-              chains},
-   JOURNAL = {Adv. in Appl. Probab.},
-  FJOURNAL = {Advances in Applied Probability},
-    VOLUME = {26},
-      YEAR = {1994},
-    NUMBER = {3},
-     PAGES = {775--798},
-      ISSN = {0001-8678},
-   MRCLASS = {60J05},
-  MRNUMBER = {1285459},
-MRREVIEWER = {Hermann Thorisson},
-       DOI = {10.2307/1427820},
-       URL = {https://doi.org/10.2307/1427820},
-}
-
-@article {fort:roberts:2005,
-    AUTHOR = {Fort, G. and Roberts, G. O.},
-     TITLE = {Subgeometric ergodicity of strong {M}arkov processes},
-   JOURNAL = {Ann. Appl. Probab.},
-  FJOURNAL = {The Annals of Applied Probability},
-    VOLUME = {15},
-      YEAR = {2005},
-    NUMBER = {2},
-     PAGES = {1565--1589},
-      ISSN = {1050-5164},
-   MRCLASS = {60J25 (60J60 60K25 60K30)},
-  MRNUMBER = {2134115},
-MRREVIEWER = {Dean H. Fearn},
-       DOI = {10.1214/105051605000000115},
-       URL = {https://doi.org/10.1214/105051605000000115},
-}
-
-@article {jarner:roberts:2002,
-    AUTHOR = {Jarner, S. F. and Roberts, G. O.},
-     TITLE = {Polynomial convergence rates of {M}arkov chains},
-   JOURNAL = {Ann. Appl. Probab.},
-  FJOURNAL = {The Annals of Applied Probability},
-    VOLUME = {12},
-      YEAR = {2002},
-    NUMBER = {1},
-     PAGES = {224--247},
-      ISSN = {1050-5164},
-   MRCLASS = {60J05 (60J10)},
-  MRNUMBER = {1890063},
-MRREVIEWER = {A. Fuchs},
-       DOI = {10.1214/aoap/1015961162},
-       URL = {https://doi.org/10.1214/aoap/1015961162},
-}
-
-
-
-@article {douc:fort:guillin:2009,
-    AUTHOR = {Douc, R. and Fort, G. and Guillin, A.},
-     TITLE = {Subgeometric rates of convergence of {$f$}-ergodic strong
-              {M}arkov processes},
-   JOURNAL = {Stochastic Process. Appl.},
-  FJOURNAL = {Stochastic Processes and their Applications},
-    VOLUME = {119},
-      YEAR = {2009},
-    NUMBER = {3},
-     PAGES = {897--923},
-      ISSN = {0304-4149},
-   MRCLASS = {60J25 (60F10 60J35 60J60)},
-  MRNUMBER = {2499863},
-       DOI = {10.1016/j.spa.2008.03.007},
-       URL = {https://doi.org/10.1016/j.spa.2008.03.007},
-}
-
-
-@article {veretennikov:1997,
-    AUTHOR = {Veretennikov, A. Yu.},
-     TITLE = {On polynomial mixing bounds for stochastic differential
-              equations},
-   JOURNAL = {Stochastic Process. Appl.},
-  FJOURNAL = {Stochastic Processes and their Applications},
-    VOLUME = {70},
-      YEAR = {1997},
-    NUMBER = {1},
-     PAGES = {115--127},
-      ISSN = {0304-4149},
-   MRCLASS = {60H10 (60J60)},
-  MRNUMBER = {1472961},
-       DOI = {10.1016/S0304-4149(97)00056-2},
-       URL = {https://doi.org/10.1016/S0304-4149(97)00056-2},
-}
-
-@article {chan:1993,
-    AUTHOR = {Chan, K. S.},
-     TITLE = {Asymptotic behavior of the {G}ibbs sampler},
-   JOURNAL = {J. Amer. Statist. Assoc.},
-  FJOURNAL = {Journal of the American Statistical Association},
-    VOLUME = {88},
-      YEAR = {1993},
-    NUMBER = {421},
-     PAGES = {320--326},
-      ISSN = {0162-1459},
-   MRCLASS = {60J10 (62F15)},
-  MRNUMBER = {1212494},
-       URL =
-              {http://links.jstor.org/sici?sici=0162-1459(199303)88:421<320:ABOTGS>2.0.CO;2-T&origin=MSN},
-}
-
-@article {chen:tsay:1991,
-    AUTHOR = {Chen, R. and Tsay, R. S.},
-     TITLE = {On the ergodicity of {${\rm TAR}(1)$} processes},
-   JOURNAL = {Ann. Appl. Probab.},
-  FJOURNAL = {The Annals of Applied Probability},
-    VOLUME = {1},
-      YEAR = {1991},
-    NUMBER = {4},
-     PAGES = {613--634},
-      ISSN = {1050-5164},
-   MRCLASS = {60J05 (62M10 93E03)},
-  MRNUMBER = {1129777},
-MRREVIEWER = {Eric V. Slud},
-       URL =
-              {http://links.jstor.org/sici?sici=1050-5164(199111)1:4<613:OTEOTP>2.0.CO;2-W&origin=MSN},
-}
-
-
-
-@ARTICLE{roberts:polson:1994,
-    author = {G. O. Roberts and  N. G. Polson},
-    title = {On the Geometric Convergence of the Gibbs Sampler},
-    journal = {Journal of the Royal Statistical Society, Series B},
-    year = {1994},
-    volume = {56},
-    pages = {377--384}
-}
-
-
-
-@article {rosenthal:2002,
-    AUTHOR = {Rosenthal, J. S.},
-     TITLE = {Quantitative convergence rates of {M}arkov chains: a simple
-              account},
-   JOURNAL = {Electron. Comm. Probab.},
-  FJOURNAL = {Electronic Communications in Probability},
-    VOLUME = {7},
-      YEAR = {2002},
-     PAGES = {123--128},
-      ISSN = {1083-589X},
-   MRCLASS = {60J05},
-  MRNUMBER = {1917546},
-MRREVIEWER = {Martin Jacobsen},
-       DOI = {10.1214/ECP.v7-1054},
-       URL = {https://doi.org/10.1214/ECP.v7-1054},
-}
-
-@article {roberts:tweedie:1999,
-    AUTHOR = {Roberts, G. O. and Tweedie, R. L.},
-     TITLE = {Bounds on regeneration times and convergence rates for
-              {M}arkov chains},
-   JOURNAL = {Stochastic Process. Appl.},
-  FJOURNAL = {Stochastic Processes and their Applications},
-    VOLUME = {80},
-      YEAR = {1999},
-    NUMBER = {2},
-     PAGES = {211--229},
-      ISSN = {0304-4149},
-   MRCLASS = {60J10 (60K15)},
-  MRNUMBER = {1682243},
-MRREVIEWER = {Mark R. Jerrum},
-       DOI = {10.1016/S0304-4149(98)00085-4},
-       URL = {https://doi.org/10.1016/S0304-4149(98)00085-4},
-}
-
-@article {jones:hobert:2004,
-    AUTHOR = {Jones, G. L. and Hobert, J. P.},
-     TITLE = {Sufficient burn-in for {G}ibbs samplers for a hierarchical
-              random effects model},
-   JOURNAL = {Ann. Statist.},
-  FJOURNAL = {The Annals of Statistics},
-    VOLUME = {32},
-      YEAR = {2004},
-    NUMBER = {2},
-     PAGES = {784--817},
-      ISSN = {0090-5364},
-   MRCLASS = {60J10 (62F15)},
-  MRNUMBER = {2060178},
-MRREVIEWER = {Ross P. Kindermann},
-       DOI = {10.1214/009053604000000184},
-       URL = {https://doi.org/10.1214/009053604000000184},
-}
-
-@article{eberle:guillin:zimmer:2018,
-  title={Quantitative Harris-type theorems for diffusions and McKean--Vlasov processes},
-  author={Eberle, A. and Guillin, A. and Zimmer, R.},
-  journal={Transactions of the American Mathematical Society},
-  year={2018}
-}
-
-
-@article{butkovsky:scheutzow:2017,
-author = "Butkovsky, O. and Scheutzow, M.",
-doi = "10.1214/17-EJP122",
-fjournal = "Electronic Journal of Probability",
-journal = "Electron. J. Probab.",
-pages = "23 pp.",
-pno = "98",
-publisher = "The Institute of Mathematical Statistics and the Bernoulli Society",
-title = "Invariant measures for stochastic functional differential equations",
-url = "https://doi.org/10.1214/17-EJP122",
-volume = "22",
-year = "2017"
-}
-
-
-
-@article{eberle:guillin:zimmer:2017b,
-  title={Couplings and quantitative contraction rates for Langevin dynamics},
-  author={Eberle, A. and Guillin, A. and Zimmer, R.},
-  journal={arXiv preprint arXiv:1703.01617},
-  year={2017}
-}
-
-
-@article{durmus:eberle:guillin:zimmer:2018,
-  title={An elementary approach to uniform in time propagation of chaos},
-  author={Durmus, A. and Eberle, A. and Guillin, A. and Zimmer, R.},
-  journal={arXiv preprint arXiv:1805.11387},
-  year={2018}
-}
-
-
-@article{qin:hobert:2019,
-  title={Geometric convergence bounds for Markov chains in Wasserstein distance based on generalized drift and contraction conditions},
-  author={Qin, Q. and Hobert, J. P.},
-  journal={arXiv preprint arXiv:1902.02964},
-  year=2019
-}
-
-
-
-@article {roberts:rosenthal:1996,
-    AUTHOR = {Roberts, G. O. and Rosenthal, J. S.},
-     TITLE = {Quantitative bounds for convergence rates of continuous time
-              {M}arkov processes},
-   JOURNAL = {Electron. J. Probab.},
-  FJOURNAL = {Electronic Journal of Probability},
-    VOLUME = {1},
-      YEAR = {1996},
-     PAGES = {no. 9, approx. 21 pp.\},
-      ISSN = {1083-6489},
-   MRCLASS = {60J25},
-  MRNUMBER = {1423462},
-MRREVIEWER = {Mu Fa Chen},
-       DOI = {10.1214/EJP.v1-9},
-       URL = {https://doi.org/10.1214/EJP.v1-9},
-}
-
-
-
-@article {jones:hobert:2001,
-    AUTHOR = {Jones, G. L. and Hobert, J. P.},
-     TITLE = {Honest exploration of intractable probability distributions
-              via {M}arkov chain {M}onte {C}arlo},
-   JOURNAL = {Statist. Sci.},
-  FJOURNAL = {Statistical Science. A Review Journal of the Institute of
-              Mathematical Statistics},
-    VOLUME = {16},
-      YEAR = {2001},
-    NUMBER = {4},
-     PAGES = {312--334},
-      ISSN = {0883-4237},
-   MRCLASS = {60J05 (65C05 65C40)},
-  MRNUMBER = {1888447},
-       DOI = {10.1214/ss/1015346317},
-       URL = {https://doi.org/10.1214/ss/1015346317},
-}
-
-
-@article{ollivier:2009,
-  title={Ricci curvature of Markov chains on metric spaces},
-  author={Ollivier, Y.},
-  journal={Journal of Functional Analysis},
-  volume={256},
-  number={3},
-  pages={810--864},
-  year={2009},
-  publisher={Elsevier}
-}
-
-
-@article{joulin:ollivier:2010,
-  title={Curvature, concentration and error estimates for Markov chain Monte Carlo},
-  author={Joulin, A. and Ollivier, Y.},
-  journal={The Annals of Probability},
-  volume={38},
-  number={6},
-  pages={2418--2442},
-  year={2010},
-  publisher={Institute of Mathematical Statistics}
-}
-
-@article{debortoli2019supplementary,
-  title={From continuous to discrete processes and back -- Supplementary},
-  author={De Bortoli, V. and Durmus, A.},
-  journal={arXiv preprint arXiv:tocomplete},
-  year={2017}  
-}
-
-@article{debortoli2019back,
-  title={From continuous to discrete processes and back},
-  author={De Bortoli, V. and Durmus, A.},
-  journal={arXiv preprint arXiv:tocomplete},
-  year={2017}  
-}
-
-
-@article {lindvall1986coupling,
-    AUTHOR = {Lindvall, Torgny and Rogers, L. C. G.},
-     TITLE = {Coupling of multidimensional diffusions by reflection},
-   JOURNAL = {Ann. Probab.},
-  FJOURNAL = {The Annals of Probability},
-    VOLUME = {14},
-      YEAR = {1986},
-    NUMBER = {3},
-     PAGES = {860--872},
-      ISSN = {0091-1798},
-   MRCLASS = {60J60 (60F20 60H10)},
-  MRNUMBER = {841588},
-MRREVIEWER = {Uwe R\"{o}sler},
-       URL =
-              {http://links.jstor.org/sici?sici=0091-1798(198607)14:3<860:COMDBR>2.0.CO;2-V&origin=MSN},
-}
-
-
-@article{dalalyan2019user,
-  title={User-friendly guarantees for the Langevin Monte Carlo with inaccurate gradient},
-  author={Dalalyan, Arnak S and Karagulyan, Avetik},
-  journal={Stochastic Processes and their Applications},
-  year={2019},
-  publisher={Elsevier}
-}
-
-
-@inproceedings{chatterji2018theory,
-  title={On the Theory of Variance Reduction for Stochastic Gradient Monte Carlo},
-  author={Chatterji, Niladri and Flammarion, Nicolas and Ma, Yian and Bartlett, Peter and Jordan, Michael},
-  booktitle={International Conference on Machine Learning},
-  pages={763--772},
-  year={2018}
-}
-
-
-@article{baker2017control,
-  title={Control variates for stochastic gradient MCMC},
-  author={Baker, Jack and Fearnhead, Paul and Fox, Emily B and Nemeth, Christopher},
-  journal={Statistics and Computing},
-  pages={1--17},
-  publisher={Springer}
-}
-
-@article{qin2018wasserstein,
-  title={Wasserstein-based methods for convergence complexity analysis of MCMC with application to Albert and Chib's algorithm},
-  author={Qin, Qian and Hobert, James P},
-  journal={arXiv preprint arXiv:1810.08826},
-  year={2018}
-}
-
-@article {steinsaltz1999locally,
-    AUTHOR = {Steinsaltz, David},
-     TITLE = {Locally contractive iterated function systems},
-   JOURNAL = {Ann. Probab.},
-  FJOURNAL = {The Annals of Probability},
-    VOLUME = {27},
-      YEAR = {1999},
-    NUMBER = {4},
-     PAGES = {1952--1979},
-      ISSN = {0091-1798},
-   MRCLASS = {26A18 (28A80 60K99)},
-  MRNUMBER = {1742896},
-MRREVIEWER = {M. Iosifescu},
-       DOI = {10.1214/aop/1022677556},
-       URL = {https://doi.org/10.1214/aop/1022677556},
-}
-
-@article {madras2010quantitative,
-    AUTHOR = {Madras, Neal and Sezer, Deniz},
-     TITLE = {Quantitative bounds for {M}arkov chain convergence:
-              {W}asserstein and total variation distances},
-   JOURNAL = {Bernoulli},
-  FJOURNAL = {Bernoulli. Official Journal of the Bernoulli Society for
-              Mathematical Statistics and Probability},
-    VOLUME = {16},
-      YEAR = {2010},
-    NUMBER = {3},
-     PAGES = {882--908},
-      ISSN = {1350-7265},
-   MRCLASS = {60J22 (37A50 60J05)},
-  MRNUMBER = {2730652},
-MRREVIEWER = {Thomas R. Boucher},
-       DOI = {10.3150/09-BEJ238},
-       URL = {https://doi.org/10.3150/09-BEJ238},
-}
-
-@incollection {hairer2011yet,
-    AUTHOR = {Hairer, Martin and Mattingly, Jonathan C.},
-     TITLE = {Yet another look at {H}arris' ergodic theorem for {M}arkov
-              chains},
- BOOKTITLE = {Seminar on {S}tochastic {A}nalysis, {R}andom {F}ields and
-              {A}pplications {VI}},
-    SERIES = {Progr. Probab.},
-    VOLUME = {63},
-     PAGES = {109--117},
- PUBLISHER = {Birkh\"{a}user/Springer Basel AG, Basel},
-      YEAR = {2011},
-   MRCLASS = {60J05 (37A30 37A50 47D07)},
-  MRNUMBER = {2857021},
-MRREVIEWER = {Wojciech Bartoszek},
-       DOI = {10.1007/978-3-0348-0021-1_7},
-       URL = {https://doi.org/10.1007/978-3-0348-0021-1_7},
-}
-@article {vaisala:2005,
-    AUTHOR = {V\"{a}is\"{a}l\"{a}, J.},
-     TITLE = {Gromov hyperbolic spaces},
-   JOURNAL = {Expo. Math.},
-  FJOURNAL = {Expositiones Mathematicae},
-    VOLUME = {23},
-      YEAR = {2005},
-    NUMBER = {3},
-     PAGES = {187--231},
-      ISSN = {0723-0869},
-   MRCLASS = {53C23 (20F67)},
-  MRNUMBER = {2164775},
-MRREVIEWER = {Michel Coornaert},
-       DOI = {10.1016/j.exmath.2005.01.010},
-       URL = {https://doi.org/10.1016/j.exmath.2005.01.010},
-}
-
-
-@book {rogers2000diffusions,
-    AUTHOR = {Rogers, L. C. G. and Williams, David},
-     TITLE = {Diffusions, {M}arkov processes, and martingales. {V}ol. 2},
-    SERIES = {Cambridge Mathematical Library},
-      NOTE = {It\^{o} calculus,
-              Reprint of the second (1994) edition},
- PUBLISHER = {Cambridge University Press, Cambridge},
-      YEAR = {2000},
-     PAGES = {xiv+480},
-      ISBN = {0-521-77593-0},
-   MRCLASS = {60J60 (60G07 60H05 60J25)},
-  MRNUMBER = {1780932},
-       DOI = {10.1017/CBO9781107590120},
-       URL = {https://doi.org/10.1017/CBO9781107590120},
-}
-
-
-@article{gadat,
-  title={Optimal non-asymptotic bound of the Ruppert-Polyak averaging without strong convexity},
-  author={Gadat, S{\'e}bastien and Panloup, Fabien},
-  journal={arXiv preprint arXiv:1709.03342},
-  year={2017}
-}
-
-
-@article{bottou:lecun:2005,
-title = "On-line learning for very large data sets",
-abstract = "The design of very large learning systems presents many unsolved challenges. Consider, for instance, a system that 'watches' television for a few weeks and learns to enumerate the objects present in these images. Most current learning algorithms do not scale well enough to handle such massive quantities of data. Experience suggests that the stochastic learning algorithms are best suited to such tasks. This is at first surprising because stochastic learning algorithms optimize the training error rather slowly. Our paper reconsiders the convergence speed in terms of how fast a learning algorithm optimizes the testing error. This reformulation shows the superiority of the well designed stochastic learning algorithm.",
-keywords = "Convergence speed, Learning, Online learning, Stochastic optimization",
-author = "L. Bottou and Y. LeCun",
-year = "2005",
-month = "3",
-doi = "10.1002/asmb.538",
-language = "English (US)",
-volume = "21",
-pages = "137--151",
-journal = "Applied Stochastic Models in Business and Industry",
-issn = "1524-1904",
-publisher = "John Wiley and Sons Ltd",
-number = "2",
-}
-
-@article{polyak:juditsky:1992,
-author = {Polyak, B. T. and Juditsky, A. B.},
-title = {Acceleration of Stochastic Approximation by Averaging},
-journal = {SIAM Journal on Control and Optimization},
-volume = {30},
-number = {4},
-pages = {838-855},
-year = {1992},
-doi = {10.1137/0330046},
-URL = { 
-        https://doi.org/10.1137/0330046
-},
-eprint = { 
-        https://doi.org/10.1137/0330046
-}
-}
-
-@article{kingma:ba:2014,
-  title={Adam: A method for stochastic optimization},
-  author={Kingma, D. P and Ba, J.},
-  journal={arXiv preprint arXiv:1412.6980},
-  year={2014}
-}
-
-
-@book{karatzas1991brownian,
-    AUTHOR = {Karatzas, Ioannis and Shreve, Steven E.},
-     TITLE = {Brownian motion and stochastic calculus},
-    SERIES = {Graduate Texts in Mathematics},
-    VOLUME = {113},
-   EDITION = {Second},
- PUBLISHER = {Springer-Verlag, New York},
-      YEAR = {1991},
-     PAGES = {xxiv+470},
-      ISBN = {0-387-97655-8},
-   MRCLASS = {60J65 (35K99 35R60 60G44 60H10 60J60)},
-  MRNUMBER = {1121940},
-       DOI = {10.1007/978-1-4612-0949-2},
-       URL = {https://doi.org/10.1007/978-1-4612-0949-2},
-}
-
-
-@book{milstein1995numerical,
-    AUTHOR = {Milstein, G. N.},
-     TITLE = {Numerical integration of stochastic differential equations},
-    SERIES = {Mathematics and its Applications},
-    VOLUME = {313},
-      NOTE = {Translated and revised from the 1988 Russian original},
- PUBLISHER = {Kluwer Academic Publishers Group, Dordrecht},
-      YEAR = {1995},
-     PAGES = {viii+169},
-      ISBN = {0-7923-3213-X},
-   MRCLASS = {65C05 (60H10 65D30 65L99 65P05 65U05)},
-  MRNUMBER = {1335454},
-MRREVIEWER = {Eckhard Platen},
-       DOI = {10.1007/978-94-015-8455-5},
-       URL = {https://doi.org/10.1007/978-94-015-8455-5},
-}
-
-@book{kushneryin,
-  title={Stochastic approximation and recursive algorithms and applications},
-  author={Kushner, Harold and Yin, G George},
-  volume={35},
-  year={2003},
-  publisher={Springer Science \& Business Media}
-}
-
-@article{gadat,
-  title={Optimal non-asymptotic bound of the Ruppert-Polyak averaging without strong convexity},
-  author={Gadat, S{\'e}bastien and Panloup, Fabien},
-  journal={arXiv preprint arXiv:1709.03342},
-  year={2017}
-}
-
-
-
-
-
-
-@article {ljung:1977,
-    AUTHOR = {Ljung, L.},
-     TITLE = {Analysis of recursive stochastic algorithms},
-   JOURNAL = {IEEE Trans. Automatic Control},
-  FJOURNAL = {Institute of Electrical and Electronics Engineers.
-              Transactions on Automatic Control},
-    VOLUME = {AC-22},
-      YEAR = {1977},
-    NUMBER = {4},
-     PAGES = {551--575},
-      ISSN = {0018-9286},
-   MRCLASS = {93E15},
-  MRNUMBER = {0465458},
-MRREVIEWER = {H. J. Kushner},
-}
-
-@book {kushner:clark:1978,
-    AUTHOR = {Kushner, H. J. and Clark, D. S.},
-     TITLE = {Stochastic approximation methods for constrained and
-              unconstrained systems},
-    SERIES = {Applied Mathematical Sciences},
-    VOLUME = {26},
- PUBLISHER = {Springer-Verlag, New York-Berlin},
-      YEAR = {1978},
-     PAGES = {x+261},
-      ISBN = {0-387-90341-0},
-   MRCLASS = {62L20 (93E10)},
-  MRNUMBER = {499560},
-MRREVIEWER = {Lennart Ljung},
-}
-
-@article {metivier:priouret:1984,
-    AUTHOR = {M\'etivier, M. and Priouret, P.},
-     TITLE = {Applications of a {K}ushner and {C}lark lemma to general
-              classes of stochastic algorithms},
-   JOURNAL = {IEEE Trans. Inform. Theory},
-  FJOURNAL = {Institute of Electrical and Electronics Engineers.
-              Transactions on Information Theory},
-    VOLUME = {30},
-      YEAR = {1984},
-    NUMBER = {2, part 1},
-     PAGES = {140--151},
-      ISSN = {0018-9448},
-   MRCLASS = {62L20 (93E25)},
-  MRNUMBER = {807052},
-MRREVIEWER = {Sami Abdelhamid},
-       DOI = {10.1109/TIT.1984.1056894},
-       URL = {https://doi.org/10.1109/TIT.1984.1056894},
-}
-
-
-
-@article {metivier:priouret:1987,
-    AUTHOR = {M\'etivier, M. and Priouret, P.},
-     TITLE = {Th\'eor\`emes de convergence presque sure pour une classe
-              d'algorithmes stochastiques \`a pas d\'ecroissant},
-   JOURNAL = {Probab. Theory Related Fields},
-  FJOURNAL = {Probability Theory and Related Fields},
-    VOLUME = {74},
-      YEAR = {1987},
-    NUMBER = {3},
-     PAGES = {403--428},
-      ISSN = {0178-8051},
-   MRCLASS = {60F15 (62L20 93E25)},
-  MRNUMBER = {873887},
-MRREVIEWER = {G. Pflug},
-       DOI = {10.1007/BF00699098},
-       URL = {https://doi.org/10.1007/BF00699098},
-}
-
-@book {benveniste:metivier:priouret:1990,
-    AUTHOR = {Benveniste, A. and M\'etivier, M. and Priouret, P.},
-     TITLE = {Adaptive algorithms and stochastic approximations},
-    SERIES = {Applications of Mathematics (New York)},
-    VOLUME = {22},
-      NOTE = {Translated from the French by Stephen S. Wilson},
- PUBLISHER = {Springer-Verlag, Berlin},
-      YEAR = {1990},
-     PAGES = {xii+365},
-      ISBN = {3-540-52894-6},
-   MRCLASS = {62L20 (60G35 93E35)},
-  MRNUMBER = {1082341},
-MRREVIEWER = {G. Pflug},
-       DOI = {10.1007/978-3-642-75894-2},
-       URL = {https://doi.org/10.1007/978-3-642-75894-2},
-}
-
-
-@article {tadic:doucet:2017,
-    AUTHOR = {Tadi\'c, V. B. and Doucet, A.},
-     TITLE = {Asymptotic bias of stochastic gradient search},
-   JOURNAL = {Ann. Appl. Probab.},
-  FJOURNAL = {The Annals of Applied Probability},
-    VOLUME = {27},
-      YEAR = {2017},
-    NUMBER = {6},
-     PAGES = {3255--3304},
-      ISSN = {1050-5164},
-   MRCLASS = {62L20 (90C15 93E12 93E35)},
-  MRNUMBER = {3737925},
-       DOI = {10.1214/16-AAP1272},
-       URL = {https://doi.org/10.1214/16-AAP1272},
-}
-
-
-@article {benaim:1996,
-    AUTHOR = {Benaim, M.},
-     TITLE = {A dynamical system approach to stochastic approximations},
-   JOURNAL = {SIAM J. Control Optim.},
-  FJOURNAL = {SIAM Journal on Control and Optimization},
-    VOLUME = {34},
-      YEAR = {1996},
-    NUMBER = {2},
-     PAGES = {437--472},
-      ISSN = {0363-0129},
-   MRCLASS = {62L20 (93E25)},
-  MRNUMBER = {1377706},
-MRREVIEWER = {K. M. Ramachandran},
-       DOI = {10.1137/S0363012993253534},
-       URL = {https://doi.org/10.1137/S0363012993253534},
-}
-
-@book{kushneryin,
-  title={Stochastic approximation and recursive algorithms and applications},
-  author={Kushner, Harold and Yin, G George},
-  volume={35},
-  year={2003},
-  publisher={Springer Science \& Business Media}
-}
-
-@article{gadat,
-  title={Optimal non-asymptotic bound of the Ruppert-Polyak averaging without strong convexity},
-  author={Gadat, S{\'e}bastien and Panloup, Fabien},
-  journal={arXiv preprint arXiv:1709.03342},
-  year={2017}
-}
-
-@inproceedings{shamirzhang,
-  title={Stochastic gradient descent for non-smooth optimization: Convergence results and optimal averaging schemes},
-  author={Shamir, Ohad and Zhang, Tong},
-  booktitle={International Conference on Machine Learning},
-  pages={71--79},
-  year={2013}
-}
-
-@article{bertsekas,
-  title={Nonlinear programming},
-  author={Bertsekas, Dimitri P},
-  journal={Journal of the Operational Research Society},
-  volume={48},
-  number={3},
-  pages={334--334},
-  year={1997},
-  publisher={Taylor \& Francis}
-}
-
-@book {nesterov2004introductory,
-    AUTHOR = {Nesterov, Yurii},
-     TITLE = {Introductory lectures on convex optimization},
-    SERIES = {Applied Optimization},
-    VOLUME = {87},
-      NOTE = {A basic course},
- PUBLISHER = {Kluwer Academic Publishers, Boston, MA},
-      YEAR = {2004},
-     PAGES = {xviii+236},
-      ISBN = {1-4020-7553-7},
-   MRCLASS = {90-02 (90-01 90C25)},
-  MRNUMBER = {2142598},
-       DOI = {10.1007/978-1-4419-8853-9},
-       URL = {https://doi.org/10.1007/978-1-4419-8853-9},
-}
-
-
-@book {pachpatte1998ineq,
-    AUTHOR = {Pachpatte, B. G.},
-     TITLE = {Inequalities for differential and integral equations},
-    SERIES = {Mathematics in Science and Engineering},
-    VOLUME = {197},
- PUBLISHER = {Academic Press, Inc., San Diego, CA},
-      YEAR = {1998},
-     PAGES = {x+611},
-      ISBN = {0-12-543430-8},
-   MRCLASS = {00A05 (26D15 34A40 35B45 35R45 45A05 45G10)},
-  MRNUMBER = {1487077},
-MRREVIEWER = {A. A. Martynyuk},
-}
-
-@article{li2019jmlr,
-  author    = {Qianxiao Li and
-               Cheng Tai and
-               Weinan E},
-  title     = {Stochastic Modified Equations and Dynamics of Stochastic Gradient
-               Algorithms {I:} Mathematical Foundations},
-  journal   = {J. Mach. Learn. Res.},
-  volume    = {20},
-  pages     = {40:1--40:47},
-  year      = {2019},
-  url       = {http://jmlr.org/papers/v20/17-526.html},
-  timestamp = {Wed, 10 Jul 2019 15:28:31 +0200},
-  biburl    = {https://dblp.org/rec/bib/journals/jmlr/LiTE19},
-  bibsource = {dblp computer science bibliography, https://dblp.org}
-}
-
-@article{feng2019uniform,
-  author    = {Yuanyuan Feng and
-               Tingran Gao and
-               Lei Li and
-               Jian{-}Guo Liu and
-               Yulong Lu},
-  title     = {Uniform-in-Time Weak Error Analysis for Stochastic Gradient Descent
-               Algorithms via Diffusion Approximation},
-  journal   = {CoRR},
-  volume    = {abs/1902.00635},
-  year      = {2019},
-  url       = {http://arxiv.org/abs/1902.00635},
-  archivePrefix = {arXiv},
-  eprint    = {1902.00635},
-  timestamp = {Tue, 05 Nov 2019 08:27:27 +0100},
-  biburl    = {https://dblp.org/rec/bib/journals/corr/abs-1902-00635},
-  bibsource = {dblp computer science bibliography, https://dblp.org}
-}
-
-@article{li2017batch,
-  author    = {Chris Junchi Li and
-               Lei Li and
-               Junyang Qian and
-               Jian{-}Guo Liu},
-  title     = {Batch Size Matters: {A} Diffusion Approximation Framework on Nonconvex
-               Stochastic Gradient Descent},
-  journal   = {CoRR},
-  volume    = {abs/1705.07562},
-  year      = {2017},
-  url       = {http://arxiv.org/abs/1705.07562},
-  archivePrefix = {arXiv},
-  eprint    = {1705.07562},
-  timestamp = {Mon, 13 Aug 2018 16:46:08 +0200},
-  biburl    = {https://dblp.org/rec/bib/journals/corr/LiLQL17},
-  bibsource = {dblp computer science bibliography, https://dblp.org}
-}
-
-@inproceedings{nesterov83,
-  title={{A method for solving the convex programming problem with convergence rate $O (1/k^2)$}},
-  author={Nesterov, Yurii E},
-  booktitle={Dokl. akad. nauk Sssr},
-  volume={269},
-  pages={543--547},
-  year={1983}
-}
-
-@article{boydcandes,
-  author    = {Weijie Su and
-               Stephen P. Boyd and
-               Emmanuel J. Cand{\`{e}}s},
-  title     = {A Differential Equation for Modeling Nesterov's Accelerated Gradient
-               Method: Theory and Insights},
-  journal   = {J. Mach. Learn. Res.},
-  volume    = {17},
-  pages     = {153:1--153:43},
-  year      = {2016},
-  url       = {http://jmlr.org/papers/v17/15-084.html},
-  timestamp = {Wed, 10 Jul 2019 15:28:25 +0200},
-  biburl    = {https://dblp.org/rec/bib/journals/jmlr/SuBC16},
-  bibsource = {dblp computer science bibliography, https://dblp.org}
-}
-
-@article{jordanhighresolution,
-  author    = {Bin Shi and
-               Simon S. Du and
-               Michael I. Jordan and
-               Weijie J. Su},
-  title     = {Understanding the Acceleration Phenomenon via High-Resolution Differential
-               Equations},
-  journal   = {CoRR},
-  volume    = {abs/1810.08907},
-  year      = {2018},
-  url       = {http://arxiv.org/abs/1810.08907},
-  archivePrefix = {arXiv},
-  eprint    = {1810.08907},
-  timestamp = {Thu, 01 Nov 2018 18:03:07 +0100},
-  biburl    = {https://dblp.org/rec/bib/journals/corr/abs-1810-08907},
-  bibsource = {dblp computer science bibliography, https://dblp.org}
-}
-
-@inproceedings{orvieto2019continuous,
-  author    = {Antonio Orvieto and
-               Aur{\'{e}}lien Lucchi},
-  title     = {Continuous-time Models for Stochastic Optimization Algorithms},
-  booktitle = {Advances in Neural Information Processing Systems 32: Annual Conference
-               on Neural Information Processing Systems 2019, NeurIPS 2019, 8-14
-               December 2019, Vancouver, BC, Canada},
-  pages     = {12589--12601},
-  year      = {2019},
-  url       = {http://papers.nips.cc/paper/9424-continuous-time-models-for-stochastic-optimization-algorithms},
-  timestamp = {Mon, 13 Jan 2020 09:28:31 +0100},
-  biburl    = {https://dblp.org/rec/bib/conf/nips/OrvietoL19},
-  bibsource = {dblp computer science bibliography, https://dblp.org}
-}
-
-@article{hardt2018gradient,
-  author    = {Moritz Hardt and
-               Tengyu Ma and
-               Benjamin Recht},
-  title     = {Gradient Descent Learns Linear Dynamical Systems},
-  journal   = {J. Mach. Learn. Res.},
-  volume    = {19},
-  pages     = {29:1--29:44},
-  year      = {2018},
-  url       = {http://jmlr.org/papers/v19/16-465.html},
-  timestamp = {Wed, 10 Jul 2019 15:28:42 +0200},
-  biburl    = {https://dblp.org/rec/bib/journals/jmlr/HardtMR18},
-  bibsource = {dblp computer science bibliography, https://dblp.org}
-}
-
-
-@article{talay1990expansion,
-  title={Expansion of the global error for numerical schemes solving stochastic differential equations},
-  author={Talay, Denis and Tubaro, Luciano},
-  journal={Stochastic analysis and applications},
-  volume={8},
-  number={4},
-  pages={483--509},
-  year={1990},
-  publisher={Taylor \& Francis}
-}
-
-@inproceedings{bachmoulines2011,
-  author    = {Francis R. Bach and
-               Eric Moulines},
-  title     = {Non-Asymptotic Analysis of Stochastic Approximation Algorithms for
-               Machine Learning},
-  booktitle = {Advances in Neural Information Processing Systems 24: 25th Annual
-               Conference on Neural Information Processing Systems 2011. Proceedings
-               of a meeting held 12-14 December 2011, Granada, Spain},
-  pages     = {451--459},
-  year      = {2011},
-  url       = {http://papers.nips.cc/paper/4316-non-asymptotic-analysis-of-stochastic-approximation-algorithms-for-machine-learning},
-  timestamp = {Wed, 06 May 2015 16:20:33 +0200},
-  biburl    = {https://dblp.org/rec/bib/conf/nips/BachM11},
-  bibsource = {dblp computer science bibliography, https://dblp.org}
-}
-
-@proceedings{DBLP:conf/nips/2011,
-  editor    = {John Shawe{-}Taylor and
-               Richard S. Zemel and
-               Peter L. Bartlett and
-               Fernando C. N. Pereira and
-               Kilian Q. Weinberger},
-  title     = {Advances in Neural Information Processing Systems 24: 25th Annual
-               Conference on Neural Information Processing Systems 2011. Proceedings
-               of a meeting held 12-14 December 2011, Granada, Spain},
-  year      = {2011},
-  url       = {http://papers.nips.cc/book/advances-in-neural-information-processing-systems-24-2011},
-  timestamp = {Thu, 11 Dec 2014 17:34:08 +0100},
-  biburl    = {https://dblp.org/rec/bib/conf/nips/2011},
-  bibsource = {dblp computer science bibliography, https://dblp.org}
-}
-
-@article{apidopoulos,
-  title={Convergence rates of an inertial gradient descent algorithm under growth and flatness conditions},
-  author={Apidopoulos, Vassilis and Aujol, Jean-Fran{\c{c}}ois and Dossal, Charles and Rondepierre, Aude},
-  year={2019}
-}
-
-@article{adr2018,
-  title={Optimal convergence rates for Nesterov acceleration},
-  author={Aujol, Jean Fran{\c{c}}ois and Rondepierre, Aude and Dossal, Charles},
-  journal={arXiv preprint arXiv:1805.05719},
-  year={2018}
-}
-
-@inproceedings{HarveyLPR19,
-  author    = {Nicholas J. A. Harvey and
-               Christopher Liaw and
-               Yaniv Plan and
-               Sikander Randhawa},
-  title     = {Tight analyses for non-smooth stochastic gradient descent},
-  booktitle = {Conference on Learning Theory, {COLT} 2019, 25-28 June 2019, Phoenix,
-               AZ, {USA}},
-  pages     = {1579--1613},
-  year      = {2019},
-  crossref  = {DBLP:conf/colt/2019},
-  url       = {http://proceedings.mlr.press/v99/harvey19a.html},
-  timestamp = {Mon, 08 Jul 2019 16:13:41 +0200},
-  biburl    = {https://dblp.org/rec/bib/conf/colt/HarveyLPR19},
-  bibsource = {dblp computer science bibliography, https://dblp.org}
-}
-@proceedings{DBLP:conf/colt/2019,
-  editor    = {Alina Beygelzimer and
-               Daniel Hsu},
-  title     = {Conference on Learning Theory, {COLT} 2019, 25-28 June 2019, Phoenix,
-               AZ, {USA}},
-  series    = {Proceedings of Machine Learning Research},
-  volume    = {99},
-  publisher = {{PMLR}},
-  year      = {2019},
-  url       = {http://proceedings.mlr.press/v99/},
-  timestamp = {Mon, 08 Jul 2019 16:13:41 +0200},
-  biburl    = {https://dblp.org/rec/bib/conf/colt/2019},
-  bibsource = {dblp computer science bibliography, https://dblp.org}
-}
-
-
-@inproceedings{rakhlin2012,
-  author    = {Alexander Rakhlin and
-               Ohad Shamir and
-               Karthik Sridharan},
-  title     = {Making Gradient Descent Optimal for Strongly Convex Stochastic Optimization},
-  booktitle = {Proceedings of the 29th International Conference on Machine Learning,
-               {ICML} 2012, Edinburgh, Scotland, UK, June 26 - July 1, 2012},
-  year      = {2012},
-  crossref  = {DBLP:conf/icml/2012},
-  url       = {http://icml.cc/2012/papers/261.pdf},
-  timestamp = {Wed, 03 Apr 2019 17:43:36 +0200},
-  biburl    = {https://dblp.org/rec/bib/conf/icml/RakhlinSS12},
-  bibsource = {dblp computer science bibliography, https://dblp.org}
-}
-
-@proceedings{DBLP:conf/icml/2012,
-  title     = {Proceedings of the 29th International Conference on Machine Learning,
-               {ICML} 2012, Edinburgh, Scotland, UK, June 26 - July 1, 2012},
-  publisher = {icml.cc / Omnipress},
-  year      = {2012},
-  timestamp = {Wed, 29 Mar 2017 16:45:25 +0200},
-  biburl    = {https://dblp.org/rec/bib/conf/icml/2012},
-  bibsource = {dblp computer science bibliography, https://dblp.org}
-}
-
-@article{jmlrhazan,
-  author  = {Elad Hazan and Satyen Kale},
-  title   = {Beyond the Regret Minimization Barrier: Optimal Algorithms for Stochastic Strongly-Convex Optimization},
-  journal = {Journal of Machine Learning Research},
-  year    = {2014},
-  volume  = {15},
-  pages   = {2489-2512},
-  url     = {http://jmlr.org/papers/v15/hazan14a.html}
-}
-
-@inproceedings{mirrorzhou,
-  author    = {Zhengyuan Zhou and
-               Panayotis Mertikopoulos and
-               Nicholas Bambos and
-               Stephen P. Boyd and
-               Peter W. Glynn},
-  title     = {Stochastic Mirror Descent in Variationally Coherent Optimization Problems},
-  booktitle = {Advances in Neural Information Processing Systems 30: Annual Conference
-               on Neural Information Processing Systems 2017, 4-9 December 2017,
-               Long Beach, CA, {USA}},
-  pages     = {7040--7049},
-  year      = {2017},
-  crossref  = {DBLP:conf/nips/2017},
-  url       = {http://papers.nips.cc/paper/7279-stochastic-mirror-descent-in-variationally-coherent-optimization-problems},
-  timestamp = {Tue, 23 Jul 2019 12:44:35 +0200},
-  biburl    = {https://dblp.org/rec/bib/conf/nips/ZhouMBBG17},
-  bibsource = {dblp computer science bibliography, https://dblp.org}
-}
-@proceedings{DBLP:conf/nips/2017,
-  editor    = {Isabelle Guyon and
-               Ulrike von Luxburg and
-               Samy Bengio and
-               Hanna M. Wallach and
-               Rob Fergus and
-               S. V. N. Vishwanathan and
-               Roman Garnett},
-  title     = {Advances in Neural Information Processing Systems 30: Annual Conference
-               on Neural Information Processing Systems 2017, 4-9 December 2017,
-               Long Beach, CA, {USA}},
-  year      = {2017},
-  timestamp = {Mon, 27 Nov 2017 12:38:16 +0100},
-  biburl    = {https://dblp.org/rec/bib/conf/nips/2017},
-  bibsource = {dblp computer science bibliography, https://dblp.org}
-}
-
-@article{lei2019stochastic,
-  title={Stochastic gradient descent for nonconvex learning without bounded gradient assumptions},
-  author={Lei, Yunwen and Hu, Ting and Li, Guiying and Tang, Ke},
-  journal={IEEE Transactions on Neural Networks and Learning Systems},
-  year={2019},
-  publisher={IEEE}
-}
-
-@inproceedings{hogwild,
-  author    = {Lam M. Nguyen and
-               Phuong Ha Nguyen and
-               Marten van Dijk and
-               Peter Richt{\'{a}}rik and
-               Katya Scheinberg and
-               Martin Tak{\'{a}}c},
-  title     = {{SGD} and Hogwild! Convergence Without the Bounded Gradients Assumption},
-  booktitle = {Proceedings of the 35th International Conference on Machine Learning,
-               {ICML} 2018, Stockholmsm{\"{a}}ssan, Stockholm, Sweden, July
-               10-15, 2018},
-  pages     = {3747--3755},
-  year      = {2018},
-  url       = {http://proceedings.mlr.press/v80/nguyen18c.html},
-  timestamp = {Wed, 03 Apr 2019 18:17:30 +0200},
-  biburl    = {https://dblp.org/rec/bib/conf/icml/NguyenNDRST18},
-  bibsource = {dblp computer science bibliography, https://dblp.org}
-}
-
-@proceedings{DBLP:conf/icml/2018,
-  editor    = {Jennifer G. Dy and
-               Andreas Krause},
-  title     = {Proceedings of the 35th International Conference on Machine Learning,
-               {ICML} 2018, Stockholmsm{\"{a}}ssan, Stockholm, Sweden, July
-               10-15, 2018},
-  series    = {Proceedings of Machine Learning Research},
-  volume    = {80},
-  publisher = {{PMLR}},
-  year      = {2018},
-  url       = {http://proceedings.mlr.press/v80/},
-  timestamp = {Wed, 03 Apr 2019 18:17:30 +0200},
-  biburl    = {https://dblp.org/rec/bib/conf/icml/2018},
-  bibsource = {dblp computer science bibliography, https://dblp.org}
-}
-
-@incollection{krichene,
-title = {Accelerated Mirror Descent in Continuous and Discrete Time},
-author = {Krichene, Walid and Bayen, Alexandre and Bartlett, Peter L},
-booktitle = {Advances in Neural Information Processing Systems 28},
-editor = {C. Cortes and N. D. Lawrence and D. D. Lee and M. Sugiyama and R. Garnett},
-pages = {2845--2853},
-year = {2015},
-publisher = {Curran Associates, Inc.},
-url = {http://papers.nips.cc/paper/5843-accelerated-mirror-descent-in-continuous-and-discrete-time.pdf}
-}
-
-@article{robbinsmonro,
-  title={A stochastic approximation method},
-  author={Robbins, Herbert and Monro, Sutton},
-  journal={The annals of mathematical statistics},
-  pages={400--407},
-  year={1951},
-  publisher={JSTOR}
-}
-
-@book {bauschke2017convex,
-    AUTHOR = {Bauschke, Heinz H. and Combettes, Patrick L.},
-     TITLE = {Convex analysis and monotone operator theory in {H}ilbert
-              spaces},
-    SERIES = {CMS Books in Mathematics/Ouvrages de Math\'{e}matiques de la SMC},
-   EDITION = {Second},
-      NOTE = {With a foreword by H\'{e}dy Attouch},
- PUBLISHER = {Springer, Cham},
-      YEAR = {2017},
-     PAGES = {xix+619},
-      ISBN = {978-3-319-48310-8; 978-3-319-48311-5},
-   MRCLASS = {49-02 (41A65 46B20 46C05 47H05 90C25)},
-  MRNUMBER = {3616647},
-       DOI = {10.1007/978-3-319-48311-5},
-       URL = {https://doi.org/10.1007/978-3-319-48311-5},
-}
-
-@inproceedings{li2017sme,
-  author    = {Qianxiao Li and
-               Cheng Tai and
-               Weinan E},
-  title     = {Stochastic Modified Equations and Adaptive Stochastic Gradient Algorithms},
-  booktitle = {Proceedings of the 34th International Conference on Machine Learning,
-               {ICML} 2017, Sydney, NSW, Australia, 6-11 August 2017},
-  pages     = {2101--2110},
-  year      = {2017},
-  url       = {http://proceedings.mlr.press/v70/li17f.html},
-  timestamp = {Wed, 29 May 2019 08:41:45 +0200},
-  biburl    = {https://dblp.org/rec/bib/conf/icml/LiTE17},
-  bibsource = {dblp computer science bibliography, https://dblp.org}
-}
-
-@article{agarwal2012lower,
-  author    = {Alekh Agarwal and
-               Peter L. Bartlett and
-               Pradeep Ravikumar and
-               Martin J. Wainwright},
-  title     = {Information-Theoretic Lower Bounds on the Oracle Complexity of Stochastic
-               Convex Optimization},
-  journal   = {{IEEE} Trans. Information Theory},
-  volume    = {58},
-  number    = {5},
-  pages     = {3235--3249},
-  year      = {2012},
-  url       = {https://doi.org/10.1109/TIT.2011.2182178},
-  doi       = {10.1109/TIT.2011.2182178},
-  timestamp = {Fri, 02 Nov 2018 09:29:37 +0100},
-  biburl    = {https://dblp.org/rec/bib/journals/tit/AgarwalBRW12},
-  bibsource = {dblp computer science bibliography, https://dblp.org}
-}
-
-@book {nemirovsky1983problem,
-    AUTHOR = {Nemirovsky, A. S. and Yudin, D. B. and },
-     TITLE = {Problem complexity and method efficiency in optimization},
-    SERIES = {A Wiley-Interscience Publication},
-      NOTE = {Translated from the Russian and with a preface by E. R. Dawson,
-              Wiley-Interscience Series in Discrete Mathematics},
- PUBLISHER = {John Wiley \& Sons, Inc., New York},
-      YEAR = {1983},
-     PAGES = {xv+388},
-      ISBN = {0-471-10345-4},
-   MRCLASS = {90C25 (68C25)},
-  MRNUMBER = {702836},
-}
-
-@article{nemirovski:2009,
-  title={Robust stochastic approximation approach to stochastic programming},
-  author={Nemirovski, Arkadi and Juditsky, Anatoli and Lan, Guanghui and Shapiro, Alexander},
-  journal={SIAM Journal on optimization},
-  volume={19},
-  number={4},
-  pages={1574--1609},
-  year={2009},
-  publisher={SIAM}
-}
-
-
-@inproceedings{zhang2004solving,
-  author    = {Tong Zhang},
-  title     = {Solving large scale linear prediction problems using stochastic gradient
-               descent algorithms},
-  booktitle = {Machine Learning, Proceedings of the Twenty-first International Conference
-               {(ICML} 2004), Banff, Alberta, Canada, July 4-8, 2004},
-  year      = {2004},
-  url       = {https://doi.org/10.1145/1015330.1015332},
-  doi       = {10.1145/1015330.1015332},
-  timestamp = {Tue, 06 Nov 2018 16:58:29 +0100},
-  biburl    = {https://dblp.org/rec/bib/conf/icml/Zhang04},
-  bibsource = {dblp computer science bibliography, https://dblp.org}
-}
-
-@inproceedings{hazan2015beyond,
-  author    = {Elad Hazan and
-               Kfir Y. Levy and
-               Shai Shalev{-}Shwartz},
-  title     = {Beyond Convexity: Stochastic Quasi-Convex Optimization},
-  booktitle = {Advances in Neural Information Processing Systems 28: Annual Conference
-               on Neural Information Processing Systems 2015, December 7-12, 2015,
-               Montreal, Quebec, Canada},
-  pages     = {1594--1602},
-  year      = {2015},
-  url       = {http://papers.nips.cc/paper/5718-beyond-convexity-stochastic-quasi-convex-optimization},
-  timestamp = {Fri, 08 Apr 2016 19:32:54 +0200},
-  biburl    = {https://dblp.org/rec/bib/conf/nips/HazanLS15},
-  bibsource = {dblp computer science bibliography, https://dblp.org}
-}
-
-@inproceedings {kunita1981decomposition,
-    AUTHOR = {Kunita, Hiroshi},
-     TITLE = {On the decomposition of solutions of stochastic differential
-              equations},
- BOOKTITLE = {Stochastic integrals ({P}roc. {S}ympos., {U}niv. {D}urham,
-              {D}urham, 1980)},
-    SERIES = {Lecture Notes in Math.},
-    VOLUME = {851},
-     PAGES = {213--255},
- PUBLISHER = {Springer, Berlin-New York},
-      YEAR = {1981},
-   MRCLASS = {60H10 (58G32)},
-  MRNUMBER = {620992},
-MRREVIEWER = {Halim Doss},
-}
-
-@article {blago1961some,
-    AUTHOR = {Blagovescenskii, Ju. N. and Freidlin, M. I.},
-     TITLE = {Some properties of diffusion processes depending on a
-              parameter},
-   JOURNAL = {Dokl. Akad. Nauk SSSR},
-  FJOURNAL = {Doklady Akademii Nauk SSSR},
-    VOLUME = {138},
-      YEAR = {1961},
-     PAGES = {508--511},
-      ISSN = {0002-3264},
-   MRCLASS = {60.62},
-  MRNUMBER = {0139196},
-MRREVIEWER = {K. Balagangadharan},
-}
-
-@inproceedings{kleinberg2018alternative,
-  author    = {Robert Kleinberg and
-               Yuanzhi Li and
-               Yang Yuan},
-  title     = {An Alternative View: When Does {SGD} Escape Local Minima?},
-  booktitle = {Proceedings of the 35th International Conference on Machine Learning,
-               {ICML} 2018, Stockholmsm{\"{a}}ssan, Stockholm, Sweden, July
-               10-15, 2018},
-  pages     = {2703--2712},
-  year      = {2018},
-  url       = {http://proceedings.mlr.press/v80/kleinberg18a.html},
-  timestamp = {Wed, 03 Apr 2019 18:17:30 +0200},
-  biburl    = {https://dblp.org/rec/bib/conf/icml/KleinbergLY18},
-  bibsource = {dblp computer science bibliography, https://dblp.org}
-}
-
-@inproceedings{yuan2019stagewise,
-  author    = {Zhuoning Yuan and
-               Yan Yan and
-               Rong Jin and
-               Tianbao Yang},
-  title     = {Stagewise Training Accelerates Convergence of Testing Error Over {SGD}},
-  booktitle = {Advances in Neural Information Processing Systems 32: Annual Conference
-               on Neural Information Processing Systems 2019, NeurIPS 2019, 8-14
-               December 2019, Vancouver, BC, Canada},
-  pages     = {2604--2614},
-  year      = {2019},
-  url       = {http://papers.nips.cc/paper/8529-stagewise-training-accelerates-convergence-of-testing-error-over-sgd},
-  timestamp = {Mon, 13 Jan 2020 09:28:31 +0100},
-  biburl    = {https://dblp.org/rec/bib/conf/nips/Yuan0JY19},
-  bibsource = {dblp computer science bibliography, https://dblp.org}
-}
-
-@inproceedings{karimi,
-author = {Karimi, Hamed and Nutini, Julie and Schmidt, Mark},
-title = {Linear Convergence of Gradient and Proximal-Gradient Methods Under the Polyak-Lojasiewicz Condition},
-year = {2016},
-isbn = {9783319461274},
-publisher = {Springer-Verlag},
-address = {Berlin, Heidelberg},
-url = {https://doi.org/10.1007/978-3-319-46128-1_50},
-doi = {10.1007/978-3-319-46128-1_50},
-booktitle = {European Conference on Machine Learning and Knowledge Discovery in Databases - Volume 9851},
-pages = {795--811},
-numpages = {17},
-keywords = {Coordinate descent, Support vector machines, Gradient descent, Variance-reduction, L1-regularization, Boosting, Stochastic gradient},
-location = {Riva del Garda, Italy},
-series = {ECML PKDD 2016}
-}
-
-@article{noll,
-  TITLE = {{Convergence of non-smooth descent methods using the Kurdyka-\L ojasiewicz inequality.}},
-  AUTHOR = {Noll, Dominikus},
-  URL = {https://hal.archives-ouvertes.fr/hal-01868363},
-  JOURNAL = {{Journal of Optimization, Theory and Applications}},
-  YEAR = {2014},
-  KEYWORDS = {subgradient-oriented descent method ; Non-smooth non-convex optimization ; upper C 1 function ; lower C 1 function ; Kurdyka-Łojasiewicz inequality ; model ; strict},
-  PDF = {https://hal.archives-ouvertes.fr/hal-01868363/file/flows_rev_rev.pdf},
-  HAL_ID = {hal-01868363},
-  HAL_VERSION = {v1},
-}
-
-@article{attouch2010proximal,
-  title={Proximal alternating minimization and projection methods for nonconvex problems: An approach based on the Kurdyka-{\L}ojasiewicz inequality},
-  author={Attouch, H{\'e}dy and Bolte, J{\'e}r{\^o}me and Redont, Patrick and Soubeyran, Antoine},
-  journal={Mathematics of Operations Research},
-  volume={35},
-  number={2},
-  pages={438--457},
-  year={2010},
-  publisher={INFORMS}
-}
-
-@article{debortoli2019souk,
-  title={Efficient stochastic optimisation by unadjusted Langevin Monte Carlo. Application to maximum marginal likelihood and empirical Bayesian estimation},
-  author={De Bortoli, Valentin and Durmus, Alain and Pereyra, Marcelo and Vidal, Ana F},
-  journal={arXiv preprint arXiv:1906.12281},
-  year={2019}
-}
-
-
-@book{casella2002,
-	author    = {R. Berger and G. Casella},
-	title     = {Statistical inference (2nd ed.)},
-	year      = {2002},
-	publisher = {Duxbury / Thomson Learning},
-	address   = {Pacific Grove, USA},
-	available = {y},
-	topic     = { }
-}
-
-@incollection {gentle2004comp,
-    AUTHOR = {Gentle, James E. and H\"{a}rdle, Wolfgang and Mori, Yuichi},
-     TITLE = {Computational statistics: an introduction},
- BOOKTITLE = {Handbook of computational statistics},
-     PAGES = {3--16},
- PUBLISHER = {Springer, Berlin},
-      YEAR = {2004},
-   MRCLASS = {62-07},
-  MRNUMBER = {2089736},
-}
-
-@incollection{bottou2010large,
-  title={Large-scale machine learning with stochastic gradient descent},
-  author={Bottou, L{\'e}on},
-  booktitle={Proceedings of COMPSTAT'2010},
-  pages={177--186},
-  year={2010},
-  publisher={Springer}
-}
-
-@article{gu2018recent,
-  title={Recent advances in convolutional neural networks},
-  author={Gu, Jiuxiang and Wang, Zhenhua and Kuen, Jason and Ma, Lianyang and Shahroudy, Amir and Shuai, Bing and Liu, Ting and Wang, Xingxing and Wang, Gang and Cai, Jianfei and others},
-  journal={Pattern Recognition},
-  volume={77},
-  pages={354--377},
-  year={2018},
-  publisher={Elsevier}
-}
-
-
-@article{beck2009fast,
-  title={Fast gradient-based algorithms for constrained total variation image denoising and deblurring problems},
-  author={Beck, Amir and Teboulle, Marc},
-  journal={IEEE transactions on image processing},
-  volume={18},
-  number={11},
-  pages={2419--2434},
-  year={2009},
-  publisher={IEEE}
-}
-
-@article{knoll2011second,
-  title={Second order total generalized variation (TGV) for MRI},
-  author={Knoll, Florian and Bredies, Kristian and Pock, Thomas and Stollberger, Rudolf},
-  journal={Magnetic resonance in medicine},
-  volume={65},
-  number={2},
-  pages={480--491},
-  year={2011},
-  publisher={Wiley Online Library}
-}
-
-@article {durmus2017unadjusted,
-    AUTHOR = {Durmus, A. and Moulines, \'{E}.},
-     TITLE = {Nonasymptotic convergence analysis for the unadjusted
-              {L}angevin algorithm},
-   JOURNAL = {Ann. Appl. Probab.},
-  FJOURNAL = {The Annals of Applied Probability},
-    VOLUME = {27},
-      YEAR = {2017},
-    NUMBER = {3},
-     PAGES = {1551--1587},
-      ISSN = {1050-5164},
-   MRCLASS = {65C05 (60F05 60J05 65C40 93E35)},
-  MRNUMBER = {3678479},
-       DOI = {10.1214/16-AAP1238},
-       URL = {https://doi.org/10.1214/16-AAP1238},
-}
-
-@article {dalalyan2017theoretical,
-    AUTHOR = {Dalalyan, Arnak S.},
-     TITLE = {Theoretical guarantees for approximate sampling from smooth
-              and log-concave densities},
-   JOURNAL = {J. R. Stat. Soc. Ser. B. Stat. Methodol.},
-  FJOURNAL = {Journal of the Royal Statistical Society. Series B.
-              Statistical Methodology},
-    VOLUME = {79},
-      YEAR = {2017},
-    NUMBER = {3},
-     PAGES = {651--676},
-      ISSN = {1369-7412},
-   MRCLASS = {60J22 (60B10 60J05 60J70)},
-  MRNUMBER = {3641401},
-       DOI = {10.1111/rssb.12183},
-       URL = {https://doi.org/10.1111/rssb.12183},
-}
-
-@article {roberts1996exponential,
-    AUTHOR = {Roberts, Gareth O. and Tweedie, Richard L.},
-     TITLE = {Exponential convergence of {L}angevin distributions and their
-              discrete approximations},
-   JOURNAL = {Bernoulli},
-  FJOURNAL = {Bernoulli. Official Journal of the Bernoulli Society for
-              Mathematical Statistics and Probability},
-    VOLUME = {2},
-      YEAR = {1996},
-    NUMBER = {4},
-     PAGES = {341--363},
-      ISSN = {1350-7265},
-   MRCLASS = {62E25 (65C05)},
-  MRNUMBER = {1440273},
-MRREVIEWER = {Arnoldo Frigessi},
-       DOI = {10.2307/3318418},
-       URL = {https://doi.org/10.2307/3318418},
-}
-
-@techreport{ruppert1988efficient,
-  title={Efficient estimations from a slowly convergent Robbins-Monro process},
-  author={Ruppert, David},
-  year={1988},
-  institution={Cornell University Operations Research and Industrial Engineering}
-}
-
-@inproceedings{li2017convergence,
-  author    = {Yuanzhi Li and
-               Yang Yuan},
-  title     = {Convergence Analysis of Two-layer Neural Networks with ReLU Activation},
-  booktitle = {Advances in Neural Information Processing Systems 30: Annual Conference
-               on Neural Information Processing Systems 2017, 4-9 December 2017,
-               Long Beach, CA, {USA}},
-  pages     = {597--607},
-  year      = {2017},
-  url       = {http://papers.nips.cc/paper/6662-convergence-analysis-of-two-layer-neural-networks-with-relu-activation},
-  timestamp = {Mon, 27 Nov 2017 12:38:48 +0100},
-  biburl    = {https://dblp.org/rec/bib/conf/nips/LiY17},
-  bibsource = {dblp computer science bibliography, https://dblp.org}
-}
-
-@book{kloeden:platen:2011,
-  title={Numerical Solution of Stochastic Differential Equations},
-  author={Kloeden, P.E. and Platen, E.},
-  isbn={9783540540625},
-  lccn={92015916},
-  series={Stochastic Modelling and Applied Probability},
-  url={https://books.google.fr/books?id=BCvtssom1CMC},
-  year={2011},
-  publisher={Springer Berlin Heidelberg}
-}
-@article{shalev2011pegasos,
-  author    = {Shai Shalev{-}Shwartz and
-               Yoram Singer and
-               Nathan Srebro and
-               Andrew Cotter},
-  title     = {Pegasos: primal estimated sub-gradient solver for {SVM}},
-  journal   = {Math. Program.},
-  volume    = {127},
-  number    = {1},
-  pages     = {3--30},
-  year      = {2011},
-  doi       = {10.1007/s10107-010-0420-4},
-  timestamp = {Wed, 14 Nov 2018 10:49:02 +0100},
-  biburl    = {https://dblp.org/rec/bib/journals/mp/Shalev-ShwartzSSC11},
-  bibsource = {dblp computer science bibliography, https://dblp.org}
-}
-
-@inproceedings{recht2011hogwild,
-  author    = {Benjamin Recht and
-               Christopher R{\'{e}} and
-               Stephen J. Wright and
-               Feng Niu},
-  title     = {Hogwild: {A} Lock-Free Approach to Parallelizing Stochastic Gradient
-               Descent},
-  booktitle = {Advances in Neural Information Processing Systems 24: 25th Annual
-               Conference on Neural Information Processing Systems 2011. Proceedings
-               of a meeting held 12-14 December 2011, Granada, Spain},
-  pages     = {693--701},
-  year      = {2011},
-  url       = {http://papers.nips.cc/paper/4390-hogwild-a-lock-free-approach-to-parallelizing-stochastic-gradient-descent},
-  timestamp = {Tue, 12 Jul 2016 21:51:13 +0200},
-  biburl    = {https://dblp.org/rec/bib/conf/nips/RechtRWN11},
-  bibsource = {dblp computer science bibliography, https://dblp.org}
-}
-
-@article{bolte2017error,
-  title={From error bounds to the complexity of first-order descent methods for convex functions},
-  author={Bolte, J{\'e}r{\^o}me and Nguyen, Trong Phong and Peypouquet, Juan and Suter, Bruce W},
-  journal={Mathematical Programming},
-  volume={165},
-  number={2},
-  pages={471--507},
-  year={2017},
-  publisher={Springer}
-}
-
-@article{frankel2015splitting,
-  title={Splitting methods with variable metric for Kurdyka--{\L}ojasiewicz functions and general convergence rates},
-  author={Frankel, Pierre and Garrigos, Guillaume and Peypouquet, Juan},
-  journal={Journal of Optimization Theory and Applications},
-  volume={165},
-  number={3},
-  pages={874--900},
-  year={2015},
-  publisher={Springer}
-}
-
-@article{simsekli2019tail,
-  title={A tail-index analysis of stochastic gradient noise in deep neural networks},
-  author={Simsekli, Umut and Sagun, Levent and Gurbuzbalaban, Mert},
-  journal={arXiv preprint arXiv:1901.06053},
-  year={2019}
-}
-
-@book{stroock2007multidimensional,
-  title={Multidimensional diffusion processes},
-  author={Stroock, Daniel W and Varadhan, SR Srinivasa},
-  year={2007},
-  publisher={Springer}
-}
-
-@InProceedings{taylorbach,
-  title = 	 {Stochastic first-order methods: non-asymptotic and computer-aided analyses via potential functions},
-  author = 	 {Taylor, Adrien and Bach, Francis},
-  booktitle = 	 {Proceedings of the Thirty-Second Conference on Learning Theory},
-  pages = 	 {2934--2992},
-  year = 	 {2019},
-  editor = 	 {Beygelzimer, Alina and Hsu, Daniel},
-  volume = 	 {99},
-  series = 	 {Proceedings of Machine Learning Research},
-  address = 	 {Phoenix, USA},
-  month = 	 {25--28 Jun},
-  publisher = 	 {PMLR},
-  pdf = 	 {http://proceedings.mlr.press/v99/taylor19a/taylor19a.pdf},
-  url = 	 {http://proceedings.mlr.press/v99/taylor19a.html}
-}
-
-@article{ross2011fundamentals,
-  title={Fundamentals of Stein’s method},
-  author={Ross, Nathan and others},
-  journal={Probability Surveys},
-  volume={8},
-  pages={210--293},
-  year={2011},
-  publisher={The Institute of Mathematical Statistics and the Bernoulli Society}
-}
-
-@article{heng2020controlled,
-  title={Controlled sequential monte carlo},
-  author={Heng, Jeremy and Bishop, Adrian N and Deligiannidis, George and Doucet, Arnaud and others},
-  journal={Annals of Statistics},
-  volume={48},
-  number={5},
-  pages={2904--2929},
-  year={2020},
-  publisher={Institute of Mathematical Statistics}
-}
-
-@article{deming1940least,
-  title={On a least squares adjustment of a sampled frequency table when the expected marginal totals are known},
-  author={Deming, W Edwards and Stephan, Frederick F},
-  journal={The Annals of Mathematical Statistics},
-  volume={11},
-  number={4},
-  pages={427--444},
-  year={1940},
-  publisher={JSTOR}
-}
-
-@article{sinkhorn1967diagonal,
-  title={Diagonal equivalence to matrices with prescribed row and column sums},
-  author={Sinkhorn, Richard},
-  journal={The American Mathematical Monthly},
-  volume={74},
-  number={4},
-  pages={402--405},
-  year={1967},
-  publisher={JSTOR}
-}
-
-@article{ireland1968contingency,
-  title={Contingency tables with given marginals},
-  author={Ireland, C Terrance and Kullback, Solomon},
-  journal={Biometrika},
-  volume={55},
-  number={1},
-  pages={179--188},
-  year={1968},
-  publisher={Oxford University Press}
-}
-
-@article{kullback1968probability,
-  title={Probability densities with given marginals},
-  author={Kullback, Solomon},
-  journal={The Annals of Mathematical Statistics},
-  volume={39},
-  number={4},
-  pages={1236--1243},
-  year={1968},
-  publisher={JSTOR}
-}
-
-@inproceedings{cuturi2013sinkhorn,
-  title={Sinkhorn distances: Lightspeed computation of optimal transport},
-  author={Cuturi, Marco},
-  booktitle={Advances in Neural Information Processing Systems},
-  year={2013}
-}
-
-@article{peyre2019computational,
-  title={Computational Optimal Transport},
-  author={Peyr{\'e}, Gabriel and Cuturi, Marco},
-  journal={Foundations and Trends{\textregistered} in Machine Learning},
-  volume={11},
-  number={5-6},
-  pages={355--607},
-  year={2019},
-  publisher={Now Publishers, Inc.}
-}
-
-@article{chen2016entropic,
-  title={Entropic and displacement interpolation: a computational approach using the {H}ilbert metric},
-  author={Chen, Yongxin and Georgiou, Tryphon and Pavon, Michele},
-  journal={SIAM Journal on Applied Mathematics},
-  volume={76},
-  number={6},
-  pages={2375--2396},
-  year={2016},
-  publisher={SIAM}
-}
-
-@article{reich2018data,
-  title={Data assimilation: the {S}chr{\"o}dinger perspective},
-  author={Reich, Sebastian},
-  journal={Acta Numerica},
-  volume={28},
-  pages={635--711},
-  year={2019},
-  publisher={Cambridge University Press}
-}
-
-@article{haussmann1986time,
-  title={Time reversal of diffusions},
-  author={Haussmann, Ulrich G and Pardoux, Etienne},
-  journal={The Annals of Probability},
-  volume={14},
-  number={4},
-  pages={1188--1205},
-  year={1986},
-  publisher={JSTOR}
-}
-
-@article{ruschendorf1993note,
-  title={Note on the {S}chr{\"o}dinger equation and I-projections},
-  author={R{\"u}schendorf, Ludger and Thomsen, W},
-  journal={Statistics \& Probability letters},
-  volume={17},
-  number={5},
-  pages={369--375},
-  year={1993},
-  publisher={Elsevier}
-}
-
-@article{bernton2019schr,
-  title={Schr{\"o}dinger Bridge Samplers},
-  author={Bernton, Espen and Heng, Jeremy and Doucet, Arnaud and Jacob, Pierre E},
-  journal={arXiv preprint arXiv:1912.13170},
-  year={2019}
-}
-
-@article{kingma2013auto,
-  title={Auto-encoding variational bayes},
-  author={Kingma, Diederik P and Welling, Max},
-  journal={arXiv preprint arXiv:1312.6114},
-  year={2013}
-}
-
-@article{pavon2018data,
-  title={The Data-Driven {S}chr{\"o}dinger Bridge},
-  author={Pavon, Michele and Trigila, Giulio and Tabak, Esteban G},
-  journal={Communications on Pure and Applied Mathematics},
-  volume={74},
-  issue={7},
-  pages={1545--1573},
-  year={2021},
-  publisher={Wiley Online Library}
-}
-
-
-@article{fortet1940resolution,
-  title={R{\'e}solution d’un syst{\`e}me d’{\'e}quations de {M}. {S}chr{\"o}dinger},
-  author={Fortet, Robert},
-  journal={Journal de Math{\'e}matiques Pures et Appliqu{\'e}s},
-  volume={1},
-  pages={83--105},
-  year={1940}
-}
-
-@article{beurling1960automorphism,
-  title={An automorphism of product measures},
-  author={Beurling, Arne},
-  journal={Annals of Mathematics},
-  pages={189--200},
-  volume={72},
-  number={1},
-  year={1960},
-  publisher={JSTOR}
-}
-
-@incollection{follmer1988random,
-  title={Random fields and diffusion processes},
-  author={F{\"o}llmer, Hans},
-  booktitle={{\'E}cole d'{\'E}t{\'e} de Probabilit{\'e}s de Saint-Flour XV--XVII, 1985--87},
-  pages={101--203},
-  year={1988},
-  publisher={Springer}
-}
-
-@article{jamison1975markov,
-  title={The Markov processes of Schr{\"o}dinger},
-  author={Jamison, Benton},
-  journal={Zeitschrift f{\"u}r Wahrscheinlichkeitstheorie und Verwandte Gebiete},
-  volume={32},
-  number={4},
-  pages={323--331},
-  year={1975},
-  publisher={Springer}
-}
-
-@article{leonard2019revisiting,
-  title={Revisiting {F}ortet's proof of existence of a solution to the {S}chr{\"o}dinger system},
-  author={L{\'e}onard, Christian},
-  journal={arXiv preprint arXiv:1904.13211},
-  year={2019}
-}
-
-@article{schrodinger1932theorie,
-  title={Sur la th{\'e}orie relativiste de l'{\'e}lectron et l'interpr{\'e}tation de la m{\'e}canique quantique},
-  author={Schr{\"o}dinger, Erwin},
-  journal={Annales de l'Institut Henri Poincar{\'e}},
-  volume={2},
-  number={4},
-  pages={269--310},
-  year={1932}
-}
-
-@article{wu2020stochastic,
-  title={Stochastic Normalizing Flows},
-  author={Wu, Hao and K{\"o}hler, Jonas and No{\'e}, Frank},
-  journal={arXiv preprint arXiv:2002.06707},
-  year={2020}
-}
-
-@inproceedings{sohl2015deep,
-  title={Deep unsupervised learning using nonequilibrium thermodynamics},
-  author={Sohl-Dickstein, Jascha and Weiss, Eric and Maheswaranathan, Niru and Ganguli, Surya},
-  booktitle={International Conference on Machine Learning},
-  year={2015}
-}
-
-@article{dai1991stochastic,
-  title={A stochastic control approach to reciprocal diffusion processes},
-  author={Dai Pra, Paolo},
-  journal={Applied mathematics and Optimization},
-  volume={23},
-  number={1},
-  pages={313--329},
-  year={1991},
-  publisher={Springer}
-}
-
-@article{csiszar1975divergence,
-  title={I-divergence geometry of probability distributions and minimization problems},
-  author={Csisz{\'a}r, Imre},
-  journal={The Annals of Probability},
-  volume={3},
-  number={1},
-  pages={146--158},
-  year={1975},
-  publisher={JSTOR}
-}
-
-@article{hilbert1895gerade,
-  title={{\"U}ber die gerade Linie als k{\"u}rzeste Verbindung zweier Punkte},
-  author={Hilbert, David},
-  journal={Mathematische Annalen},
-  volume={46},
-  number={1},
-  pages={91--96},
-  year={1895},
-  publisher={Springer}
-}
-
-@article{birkhoff1957extensions,
-  title={Extensions of {J}entzsch's theorem},
-  author={Birkhoff, Garrett},
-  journal={Transactions of the American Mathematical Society},
-  volume={85},
-  number={1},
-  pages={219--227},
-  year={1957},
-  publisher={JSTOR}
-}
-
-@article{franklin1989scaling,
-  title={On the scaling of multidimensional matrices},
-  author={Franklin, Joel and Lorenz, Jens},
-  journal={Linear Algebra and Its Applications},
-  volume={114},
-  pages={717--735},
-  year={1989},
-  publisher={Elsevier}
-}
-
-@article{lemmens2013birkhoff,
-  title={Birkhoff’s version of {H}ilbert’s metric and its applications in analysis},
-  author={Lemmens, Bas and Nussbaum, Roger D},
-  journal={Handbook of Hilbert Geometry},
-  pages={275--303},
-  year={2014}
-}
-
-@article{kohlberg1982contraction,
-  title={The contraction mapping approach to the {P}erron-{F}robenius theory: Why {H}ilbert's metric?},
-  author={Kohlberg, Elon and Pratt, John W},
-  journal={Mathematics of Operations Research},
-  volume={7},
-  number={2},
-  pages={198--210},
-  year={1982},
-  publisher={INFORMS}
-}
-
-@article{bushell1973hilbert,
-  title={Hilbert's metric and positive contraction mappings in a {B}anach space},
-  author={Bushell, Peter J},
-  journal={Archive for Rational Mechanics and Analysis},
-  volume={52},
-  number={4},
-  pages={330--338},
-  year={1973},
-  publisher={Springer}
-}
-
-@article{bushell1973projective,
-  title={On the projective contraction ratio for positive linear mappings},
-  author={Bushell, PJ},
-  journal={Journal of the London Mathematical Society},
-  volume={2},
-  number={2},
-  pages={256--258},
-  year={1973},
-  publisher={Narnia}
-}
-
-@article{bauer1965elementary,
-  title={An elementary proof of the {H}opf inequality for positive operators},
-  author={Bauer, Friedrich L},
-  journal={Numerische Mathematik},
-  volume={7},
-  number={4},
-  pages={331--337},
-  year={1965},
-  publisher={Springer}
-}
-
-@article{ostrowski1964positive,
-  title={Positive matrices and functional analysis},
-  author={Ostrowski, AM},
-  journal={Recent advances in matrix theory},
-  pages={81--101},
-  year={1964},
-  publisher={Univ. of Wisconsin Press Madison}
-}
-
-@article{hopf1963inequality,
-  title={An inequality for positive linear integral operators},
-  author={Hopf, Eberhard},
-  journal={Journal of Mathematics and Mechanics},
-  pages={683--692},
-  year={1963},
-  publisher={JSTOR}
-}
-
-@article{jolicoeur2020adversarial,
-  title={Adversarial score matching and improved sampling for image generation},
-  author={Jolicoeur-Martineau, Alexia and Pich{\'e}-Taillefer, R{\'e}mi and Tachet des Combes, R{\'e}mi  and Mitliagkas, Ioannis},
-  journal={International Conference on Learning Representations},
-  year={2021}
-}
-
-@inproceedings{song2020improved,
-  title={Improved techniques for training score-based generative models},
-  author={Song, Yang and Ermon, Stefano},
-  booktitle={Advances in Neural Information Processing Systems},
-  year={2020}
-}
-
-@article{song2020denoising,
-  title={Denoising Diffusion Implicit Models},
-  author={Song, Jiaming and Meng, Chenlin and Ermon, Stefano},
-  journal={arXiv preprint arXiv:2010.02502},
-  year={2020}
-}
-
-@inproceedings{song2019generative,
-  title={Generative modeling by estimating gradients of the data distribution},
-  author={Song, Yang and Ermon, Stefano},
-  booktitle={Advances in Neural Information Processing Systems},
-  year={2019}
-}
-
-@article{ho2020denoising,
-  title={Denoising diffusion probabilistic models},
-  author={Ho, Jonathan and Jain, Ajay and Abbeel, Pieter},
-  journal={Advances in Neural Information Processing Systems},
-  year={2020}
-}
-
-@article{ruschendorf1995convergence,
-  title={Convergence of the iterative proportional fitting procedure},
-  author={R{\"u}schendorf, Ludger},
-  journal={The Annals of Statistics},
-  volume={23},
-  number={4},
-  pages={1160--1174},
-  year={1995},
-  publisher={Institute of Mathematical Statistics}
-}
-
-@article{kruithof1937telefoonverkeersrekening,
-  title={Telefoonverkeersrekening},
-  author={Kruithof, J},
-  journal={De Ingenieur},
-  volume={52},
-  pages={15--25},
-  year={1937}
-}
-
-@article{soules1991rate,
-  title={The rate of convergence of Sinkhorn balancing},
-  author={Soules, George W},
-  journal={Linear algebra and Its Applications},
-  volume={150},
-  pages={3--40},
-  year={1991},
-  publisher={North-Holland}
-}
-
-@incollection{brossard2018iterated,
-  title={Iterated proportional fitting procedure and infinite products of stochastic matrices},
-  author={Brossard, Jean and Leuridan, Christophe},
-  booktitle={S{\'e}minaire de Probabilit{\'e}s XLIX},
-  pages={75--117},
-  year={2018},
-  publisher={Springer}
-}
-
-@article{fienberg1970iterative,
-  title={An iterative procedure for estimation in contingency tables},
-  author={Fienberg, Stephen E and others},
-  journal={The Annals of Mathematical Statistics},
-  volume={41},
-  number={3},
-  pages={907--917},
-  year={1970},
-  publisher={Institute of Mathematical Statistics}
-}
-
-@article{brown1993order,
-  title={Order independence and factor convergence in iterative scaling},
-  author={Brown, Jack B and Chase, Phillip J and Pittenger, Arthur O},
-  journal={Linear Algebra and Its Applications},
-  volume={190},
-  pages={1--38},
-  year={1993},
-  publisher={Elsevier}
-}
-
-@article{pukelsheim2009iterative,
-  title={On the iterative proportional fitting procedure: Structure of accumulation points and L1-error analysis},
-  author={Pukelsheim, Friedrich and Simeone, Bruno},
-  year={2009}
-}
-
-@article{sinkhorn1967concerning,
-  title={Concerning nonnegative matrices and doubly stochastic matrices},
-  author={Sinkhorn, Richard and Knopp, Paul},
-  journal={Pacific Journal of Mathematics},
-  volume={21},
-  number={2},
-  pages={343--348},
-  year={1967},
-  publisher={Mathematical Sciences Publishers}
-}
-
-@article{bacharach1965estimating,
-  title={Estimating nonnegative matrices from marginal data},
-  author={Bacharach, Michael},
-  journal={International Economic Review},
-  volume={6},
-  number={3},
-  pages={294--310},
-  year={1965},
-  publisher={JSTOR}
-}
-
-@article{gietl2013accumulation,
-  title={Accumulation points of the iterative proportional fitting procedure},
-  author={Gietl, Christoph and Reffel, Fabian P},
-  journal={Metrika},
-  volume={76},
-  number={6},
-  pages={783--798},
-  year={2013},
-  publisher={Springer}
-}
-
-@article{knight2008sinkhorn,
-  title={The Sinkhorn--Knopp algorithm: convergence and applications},
-  author={Knight, Philip A},
-  journal={SIAM Journal on Matrix Analysis and Applications},
-  volume={30},
-  number={1},
-  pages={261--275},
-  year={2008},
-  publisher={SIAM}
-}
-
-@inproceedings{song2020score,
-  author    = {Yang Song and
-               Jascha Sohl{-}Dickstein and
-               Diederik P. Kingma and
-               Abhishek Kumar and
-               Stefano Ermon and
-               Ben Poole},
-  title     = {Score-Based Generative Modeling through Stochastic Differential Equations},
-  booktitle   = {International Conference on Learning Representations},
-  year      = {2021}
-}
-
-@article{hoogeboom2021argmax,
-  title={Argmax Flows and Multinomial Diffusion: Towards Non-Autoregressive Language Models},
-  author={Hoogeboom, Emiel and Nielsen, Didrik and Jaini, Priyank and Forr{\'e}, Patrick and Welling, Max},
-  journal={arXiv preprint arXiv:2102.05379},
-  year={2021}
-}
-
-@article{durkan2021maximum,
-  title={On Maximum Likelihood Training of Score-Based Generative Models},
-  author={Durkan, Conor and Song, Yang},
-  journal={arXiv preprint arXiv:2101.09258},
-  year={2021}
-}
-
-@article{chen2020wavegrad,
-  title={WaveGrad: Estimating gradients for waveform generation},
-  author={Chen, Nanxin and Zhang, Yu and Zen, Heiga and Weiss, Ron J and Norouzi, Mohammad and Chan, William},
-  journal={International Conference on Learning Representations},
-  year={2021}
-}
-
-@article{kong2020diffwave,
-  title={Diffwave: A versatile diffusion model for audio synthesis},
-  author={Kong, Zhifeng and Ping, Wei and Huang, Jiaji and Zhao, Kexin and Catanzaro, Bryan},
-  journal={International Conference on Learning Representations},
-  year={2021}
-}
-
-@article{cai2020learning,
-  title={Learning gradient fields for shape generation},
-  author={Cai, Ruojin and Yang, Guandao and Averbuch-Elor, Hadar and Hao, Zekun and Belongie, Serge and Snavely, Noah and Hariharan, Bharath},
-  journal={European Conference on Computer Vision},
-  year={2020}
-}
-
-@article{hyvarinen2005estimation,
-  title={Estimation of non-normalized statistical models by score matching.},
-  author={Hyv{\"a}rinen, Aapo and Dayan, Peter},
-  journal={Journal of Machine Learning Research},
-  volume={6},
-  number={4},
-  year={2005}
-}
-
-% TRUC
-
-@article{guo2019agem,
-  title={AGEM: Solving Linear Inverse Problems via Deep Priors and Sampling},
-  author={Guo, Bichuan and Han, Yuxing and Wen, Jiangtao},
-  journal={Advances in Neural Information Processing Systems},
-  volume={32},
-  pages={547--558},
-  year={2019}
-}
-
-@article{kadkhodaie2020solving,
-  title={Solving linear inverse problems using the prior implicit in a denoiser},
-  author={Kadkhodaie, Zahra and Simoncelli, Eero P},
-  journal={arXiv preprint arXiv:2007.13640},
-  year={2020}
-}
-
-@article{romano2017little,
-  title={The little engine that could: Regularization by denoising (RED)},
-  author={Romano, Yaniv and Elad, Michael and Milanfar, Peyman},
-  journal={SIAM Journal on Imaging Sciences},
-  volume={10},
-  number={4},
-  pages={1804--1844},
-  year={2017},
-  publisher={SIAM}
-}
-
-@inproceedings{ryu2019plug,
-  title={Plug-and-play methods provably converge with properly trained denoisers},
-  author={Ryu, Ernest and Liu, Jialin and Wang, Sicheng and Chen, Xiaohan and Wang, Zhangyang and Yin, Wotao},
-  booktitle={International Conference on Machine Learning},
-  pages={5546--5557},
-  year={2019},
-  organization={PMLR}
-}
-
-@article{sreehari2016plug,
-  title={Plug-and-play priors for bright field electron tomography and sparse interpolation},
-  author={Sreehari, Suhas and Venkatakrishnan, S Venkat and Wohlberg, Brendt and Buzzard, Gregery T and Drummy, Lawrence F and Simmons, Jeffrey P and Bouman, Charles A},
-  journal={IEEE Transactions on Computational Imaging},
-  volume={2},
-  number={4},
-  pages={408--423},
-  year={2016},
-  publisher={IEEE}
-}
-
-@inproceedings{venkatakrishnan2013plug,
-  title={Plug-and-play priors for model based reconstruction},
-  author={Venkatakrishnan, Singanallur V and Bouman, Charles A and Wohlberg, Brendt},
-  booktitle={2013 IEEE Global Conference on Signal and Information Processing},
-  pages={945--948},
-  year={2013},
-  organization={IEEE}
-}
-
-@article{xu2020provable,
-  title={Provable Convergence of Plug-and-Play Priors With MMSE Denoisers},
-  author={Xu, Xiaojian and Sun, Yu and Liu, Jiaming and Wohlberg, Brendt and Kamilov, Ulugbek S},
-  journal={IEEE Signal Processing Letters},
-  volume={27},
-  pages={1280--1284},
-  year={2020},
-  publisher={IEEE}
-}
-
-@article{lecun2006tutorial,
-  title={A tutorial on energy-based learning},
-  author={LeCun, Yann and Chopra, Sumit and Hadsell, Raia and Ranzato, M and Huang, F},
-  journal={Predicting structured data},
-  volume={1},
-  number={0},
-  year={2006}
-}
-
-@article{goodfellow2014generative,
-  title={Generative adversarial networks},
-  author={Goodfellow, Ian J and Pouget-Abadie, Jean and Mirza, Mehdi and Xu, Bing and Warde-Farley, David and Ozair, Sherjil and Courville, Aaron and Bengio, Yoshua},
-  journal={arXiv preprint arXiv:1406.2661},
-  year={2014}
-}
-
-@inproceedings{arjovsky2017wasserstein,
-  title={Wasserstein generative adversarial networks},
-  author={Arjovsky, Martin and Chintala, Soumith and Bottou, L{\'e}on},
-  booktitle={International conference on machine learning},
-  pages={214--223},
-  year={2017},
-  organization={PMLR}
-}
-
-@article{salimans2016improved,
-  title={Improved techniques for training gans},
-  author={Salimans, Tim and Goodfellow, Ian and Zaremba, Wojciech and Cheung, Vicki and Radford, Alec and Chen, Xi},
-  journal={arXiv preprint arXiv:1606.03498},
-  year={2016}
-}
-
-@article{arjovsky2017towards,
-  title={Towards principled methods for training generative adversarial networks},
-  author={Arjovsky, Martin and Bottou, L{\'e}on},
-  journal={arXiv preprint arXiv:1701.04862},
-  year={2017}
-}
-
-@article{debortoli2021maximum,
-author = {De Bortoli, Valentin and Desolneux, Agnès and Durmus, Alain and Galerne, Bruno and Leclaire, Arthur},
-title = {Maximum Entropy Methods for Texture Synthesis: Theory and Practice},
-journal = {SIAM Journal on Mathematics of Data Science},
-volume = {3},
-number = {1},
-pages = {52-82},
-year = {2021},
-doi = {10.1137/19M1307731},
-
-URL = { 
-        https://doi.org/10.1137/19M1307731
-    
-},
-eprint = { 
-        https://doi.org/10.1137/19M1307731
-    
-}
-
-}
-
-
-@article{dessein2017parameter,
-  title={Parameter estimation in finite mixture models by regularized optimal transport: A unified framework for hard and soft clustering},
-  author={Dessein, Arnaud and Papadakis, Nicolas and Deledalle, Charles-Alban},
-  journal={arXiv preprint arXiv:1711.04366},
-  year={2017}
-}
-
-@incollection{follmer1985entropy,
-  title={An entropy approach to the time reversal of diffusion processes},
-  author={F{\"o}llmer, Hans},
-  booktitle={Stochastic Differential Systems: Filtering and Control},
-  pages={156--163},
-  year={1985},
-  publisher={Springer}
-}
-
-@article{kodali2017convergence,
-  title={On convergence and stability of gans},
-  author={Kodali, Naveen and Abernethy, Jacob and Hays, James and Kira, Zsolt},
-  journal={arXiv preprint arXiv:1705.07215},
-  year={2017}
-}
-
-@article{miyato2018spectral,
-  title={Spectral normalization for generative adversarial networks},
-  author={Miyato, Takeru and Kataoka, Toshiki and Koyama, Masanori and Yoshida, Yuichi},
-  journal={arXiv preprint arXiv:1802.05957},
-  year={2018}
-}
-
-
-@article {mikami2004monge,
-    AUTHOR = {Mikami, Toshio},
-     TITLE = {Monge's problem with a quadratic cost by the zero-noise limit
-              of {$h$}-path processes},
-   JOURNAL = {Probability Theory and Related Fields},
-  FJOURNAL = {Probability Theory and Related Fields},
-    VOLUME = {129},
-      YEAR = {2004},
-    NUMBER = {2},
-     PAGES = {245--260},
-      ISSN = {0178-8051},
-   MRCLASS = {60J25},
-  MRNUMBER = {2063377},
-MRREVIEWER = {Vadim A. Ka\u{\i}manovich},
-       DOI = {10.1007/s00440-004-0340-4},
-       URL = {https://doi.org/10.1007/s00440-004-0340-4},
-}
-
-@article{clason2021entropic,
-  title={Entropic regularization of continuous optimal transport problems},
-  author={Clason, Christian and Lorenz, Dirk A and Mahler, Hinrich and Wirth, Benedikt},
-  journal={Journal of Mathematical Analysis and Applications},
-  volume={494},
-  number={1},
-  pages={124432},
-  year={2021},
-  publisher={Elsevier}
-}
-
-@article {mikami2008optimal,
-    AUTHOR = {Mikami, Toshio and Thieullen, Mich\`ele},
-     TITLE = {Optimal transportation problem by stochastic optimal control},
-   JOURNAL = {SIAM J. Control Optim.},
-  FJOURNAL = {SIAM Journal on Control and Optimization},
-    VOLUME = {47},
-      YEAR = {2008},
-    NUMBER = {3},
-     PAGES = {1127--1139},
-      ISSN = {0363-0129},
-   MRCLASS = {49Q20 (60J25)},
-  MRNUMBER = {2407010},
-MRREVIEWER = {Guy Jumarie},
-       DOI = {10.1137/050631264},
-       URL = {https://doi.org/10.1137/050631264},
-}
-
-@article {mikami2006duality,
-    AUTHOR = {Mikami, Toshio and Thieullen, Mich\`ele},
-     TITLE = {Duality theorem for the stochastic optimal control problem},
-   JOURNAL = {Stochastic Process. Appl.},
-  FJOURNAL = {Stochastic Processes and their Applications},
-    VOLUME = {116},
-      YEAR = {2006},
-    NUMBER = {12},
-     PAGES = {1815--1835},
-      ISSN = {0304-4149},
-   MRCLASS = {93E20 (49N15 60J60)},
-  MRNUMBER = {2307060},
-MRREVIEWER = {Shanjian Tang},
-       DOI = {10.1016/j.spa.2006.04.014},
-       URL = {https://doi.org/10.1016/j.spa.2006.04.014},
-}
-
-
-
-@book {ambrosio200gradient,
-    AUTHOR = {Ambrosio, Luigi and Gigli, Nicola and Savar\'{e}, Giuseppe},
-     TITLE = {Gradient Flows in Metric Spaces and in the Space of
-              Probability Measures},
-    SERIES = {Lectures in Mathematics ETH Z\"{u}rich},
-   EDITION = {Second},
- PUBLISHER = {Birkh\"{a}user Verlag, Basel},
-      YEAR = {2008},
-     PAGES = {x+334},
-      ISBN = {978-3-7643-8721-1},
-   MRCLASS = {49-02 (28A33 35K55 35K90 49Q20 60B05)},
-  MRNUMBER = {2401600},
-MRREVIEWER = {Pietro Celada},
-}
-
-@book{goldrei2017classic,
-  title={Classic Set Theory: For Guided Independent Study},
-  author={Goldrei, DC},
-  year={2017},
-  publisher={Routledge}
-}
-
-@book {enderton1977elements,
-    AUTHOR = {Enderton, Herbert B.},
-     TITLE = {Elements of Set Theory},
- PUBLISHER = {Academic Pres, New
-              York-London},
-      YEAR = {1977},
-     PAGES = {xiv+279},
-   MRCLASS = {04-01 (02-01)},
-  MRNUMBER = {0439636},
-MRREVIEWER = {J. E. Rubin},
-}
-
-@article {kober1939,
-    AUTHOR = {Kober, H.},
-     TITLE = {A theorem on {B}anach spaces},
-   JOURNAL = {Compositio Mathematica},
-  FJOURNAL = {Compositio Mathematica},
-    VOLUME = {7},
-      YEAR = {1939},
-     PAGES = {135--140},
-      ISSN = {0010-437X},
-   MRCLASS = {46.3X},
-  MRNUMBER = {350},
-MRREVIEWER = {I. Halperin},
-       URL = {http://www.numdam.org/item?id=CM_1940__7__135_0},
-}
-
-
-@article{leonard2014survey,
-  title={A survey of the {S}chr{\"o}dinger problem and some of its connections with optimal transport},
-  author={L{\'e}onard, Christian},
-  journal={Discrete \& Continuous Dynamical Systems-A},
-  volume={34},
-  number={4},
-  pages={1533--1574},
-  year={2014}
-}
-
-@article {fournier2015rate,
-    AUTHOR = {Fournier, Nicolas and Guillin, Arnaud},
-     TITLE = {On the rate of convergence in {W}asserstein distance of the
-              empirical measure},
-   JOURNAL = {Probability Theory and Related Fields},
-  FJOURNAL = {Probability Theory and Related Fields},
-    VOLUME = {162},
-      YEAR = {2015},
-    NUMBER = {3-4},
-     PAGES = {707--738},
-      ISSN = {0178-8051},
-   MRCLASS = {60F25 (60E15 60F10)},
-  MRNUMBER = {3383341},
-MRREVIEWER = {Jos\'{e} Trashorras},
-       DOI = {10.1007/s00440-014-0583-7},
-       URL = {https://doi.org/10.1007/s00440-014-0583-7},
-}
-
-@inproceedings{lin2017refinenet,
-  title={Refinenet: Multi-path refinement networks for high-resolution semantic segmentation},
-  author={Lin, Guosheng and Milan, Anton and Shen, Chunhua and Reid, Ian},
-  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
-  pages={1925--1934},
-  year={2017}
-}
-@article{chen2020optimal,
-  title={Optimal Transport in Systems and Control},
-  author={Chen, Yongxin and Georgiou, Tryphon T and Pavon, Michele},
-  journal={Annual Review of Control, Robotics, and Autonomous Systems},
-  volume={4},
-  year={2021}
-}
-
-@article{leger2020gradient,
-  title={A gradient descent perspective on {S}inkhorn},
-  author={L{\'e}ger, Flavien},
-  journal={Applied Mathematics \& Optimization},
-  pages={1--13},
-  year={2020},
-  publisher={Springer}
-}
-
-@article{leonard2014reciprocal,
-  title={Reciprocal processes: a measure-theoretical point of view},
-  author={L{\'e}onard, Christian and R{\oe}lly, Sylvie and Zambrini, Jean-Claude and others},
-  journal={Probability Surveys},
-  volume={11},
-  pages={237--269},
-  year={2014},
-  publisher={The Institute of Mathematical Statistics and the Bernoulli Society}
-}
-
-@article{cattiaux2021time,
-  title={Time reversal of diffusion processes under a finite entropy condition},
-  author={Cattiaux, Patrick and Conforti, Giovanni and Gentil, Ivan and L{\'e}onard, Christian},
-  journal={arXiv preprint arXiv:2104.07708},
-  year={2021}
-}
-
-@article{vincent2011connection,
-  title={A connection between score matching and denoising autoencoders},
-  author={Vincent, Pascal},
-  journal={Neural Computation},
-  volume={23},
-  number={7},
-  pages={1661--1674},
-  year={2011},
-  publisher={MIT Press}
-}
-
-@article{saharia2021image,
-  title={Image Super-Resolution via Iterative Refinement},
-  author={Saharia, Chitwan and Ho, Jonathan and Chan, William and Salimans, Tim and Fleet, David J and Norouzi, Mohammad},
-  journal={arXiv preprint arXiv:2104.07636},
-  year={2021}
-}
-
-@book {dellacherie1988,
-    AUTHOR = {Dellacherie, Claude and Meyer, Paul-Andr\'{e}},
-     TITLE = {Probabilities and Potential. {C}},
-    SERIES = {North-Holland Mathematics Studies},
-    VOLUME = {151},
-      NOTE = {Potential theory for discrete and continuous semigroups,
-              Translated from the French by J. Norris},
- PUBLISHER = {North-Holland Publishing Co., Amsterdam},
-      YEAR = {1988},
-     PAGES = {xiv+416},
-      ISBN = {0-444-70386-1},
-   MRCLASS = {60-02 (31-02 31C20 60J45)},
-  MRNUMBER = {939365},
-}
-
-@article{laumont2021bayesian,
-  title={Bayesian imaging using Plug \& Play priors: when {L}angevin meets {T}weedie},
-  author={Laumont, R{\'e}mi and De Bortoli, Valentin and Almansa, Andr{\'e}s and Delon, Julie and Durmus, Alain and Pereyra, Marcelo},
-  journal={arXiv preprint arXiv:2103.04715},
-  year={2021}
-}
-
-
-@book {revuz1999continuous,
-    AUTHOR = {Revuz, Daniel and Yor, Marc},
-     TITLE = {Continuous Martingales and {B}rownian Motion},
-    SERIES = {Grundlehren der Mathematischen Wissenschaften [Fundamental
-              Principles of Mathematical Sciences]},
-    VOLUME = {293},
-   EDITION = {Third},
- PUBLISHER = {Springer-Verlag, Berlin},
-      YEAR = {1999},
-     PAGES = {xiv+602},
-      ISBN = {3-540-64325-7},
-   MRCLASS = {60G44 (60G07 60H05)},
-  MRNUMBER = {1725357},
-       DOI = {10.1007/978-3-662-06400-9},
-       URL = {https://doi.org/10.1007/978-3-662-06400-9},
-}
-
-@book {kullback1997information,
-    AUTHOR = {Kullback, Solomon},
-     TITLE = {Information Theory and Statistics},
-      NOTE = {Reprint of the second (1968) edition},
- PUBLISHER = {Dover Publications, Inc., Mineola, NY},
-      YEAR = {1997},
-     PAGES = {xvi+399},
-      ISBN = {0-486-69684-7},
-   MRCLASS = {62B10},
-  MRNUMBER = {1461541},
-}
-
-
-@article{gao2020learning,
-  title={Learning Energy-Based Models by Diffusion Recovery Likelihood},
-  author={Gao, Ruiqi and Song, Yang and Poole, Ben and Wu, Ying Nian and Kingma, Diederik P},
-  journal={arXiv preprint arXiv:2012.08125},
-  year={2020}
-}
-
-@article {constantine1996faadibruno,
-    AUTHOR = {Constantine, G. M. and Savits, T. H.},
-     TITLE = {A multivariate {F}a\`a di {B}runo formula with applications},
-   JOURNAL = {Transactions of the American Mathematical Society},
-  FJOURNAL = {Transactions of the American Mathematical Society},
-    VOLUME = {348},
-      YEAR = {1996},
-    NUMBER = {2},
-     PAGES = {503--520},
-      ISSN = {0002-9947},
-   MRCLASS = {05A15 (05A19 60G20)},
-  MRNUMBER = {1325915},
-MRREVIEWER = {F. T. Howard},
-       DOI = {10.1090/S0002-9947-96-01501-2},
-       URL = {https://doi.org/10.1090/S0002-9947-96-01501-2},
-}
-
-@article {leha1984diffusion,
-    AUTHOR = {Leha, G. and Ritter, G.},
-     TITLE = {On diffusion processes and their semigroups in {H}ilbert
-              spaces with an application to interacting stochastic systems},
-   JOURNAL = {The Annals of Probability},
-  FJOURNAL = {The Annals of Probability},
-    VOLUME = {12},
-      YEAR = {1984},
-    NUMBER = {4},
-     PAGES = {1077--1112},
-      ISSN = {0091-1798},
-   MRCLASS = {60J60 (35K99 47D05 60K35)},
-  MRNUMBER = {757771},
-MRREVIEWER = {Hermann Rost},
-       URL =
-              {http://links.jstor.org/sici?sici=0091-1798(198411)12:4<1077:ODPATS>2.0.CO;2-Q&origin=MSN},
-}
-
-@book {ethier1986markov,
-    AUTHOR = {Ethier, Stewart N. and Kurtz, Thomas G.},
-     TITLE = {Markov Processes},
-    SERIES = {Wiley Series in Probability and Mathematical Statistics:
-              Probability and Mathematical Statistics},
-      NOTE = {Characterization and convergence},
- PUBLISHER = {John Wiley \& Sons, Inc., New York},
-      YEAR = {1986},
-     PAGES = {x+534},
-      ISBN = {0-471-08186-8},
-   MRCLASS = {60J25 (60B10 60F05 60F17 60G44 60J80)},
-  MRNUMBER = {838085},
-MRREVIEWER = {S. R. S. Varadhan},
-       DOI = {10.1002/9780470316658},
-       URL = {https://doi.org/10.1002/9780470316658},
-}
-
-@article{luhman2020diffusion,
-  title={Diffusion models for Handwriting Generation},
-  author={Luhman, Troy and Luhman, Eric},
-  journal={arXiv preprint arXiv:2011.06704},
-  year={2020}
-}
-
-@article{nichol2021improved,
-  title={Improved denoising diffusion probabilistic models},
-  author={Nichol, Alex and Dhariwal, Prafulla},
-  journal={arXiv preprint arXiv:2102.09672},
-  year={2021}
-}
-
-@article{nichol2021beatgans,
-  title={Diffusion models beat {GAN} on Image Synthesis},
-  author={Dhariwal, Prafulla and Nichol, Alex},
-  journal={arXiv preprint arXiv:2105.05233},
-  year={2021}
-}
-
-@article{popov2021gradtts,
-      title={Grad-TTS: A Diffusion Probabilistic Model for Text-to-Speech}, 
-      author={Vadim Popov and Ivan Vovk and Vladimir Gogoryan and Tasnima Sadekova and Mikhail Kudinov},
-      year={2021},
-      journal={arXiv preprint arXiV:2105.06337}
-}
-
-@incollection{leonard2014some,
-  title={Some properties of path measures},
-  author={L{\'e}onard, Christian},
-  booktitle={S{\'e}minaire de Probabilit{\'e}s XLVI},
-  pages={207--230},
-  year={2014},
-  publisher={Springer}
-}
-
-@inproceedings{vaswani2017attention,
-  title={Attention is all you need},
-  author={Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, Lukasz and Polosukhin, Illia},
-  booktitle={Advances in Neural Information Processing Systems},
-  year={2017}
-}
-
-
-@inproceedings{genevay2018learning,
-  title={Learning generative models with {S}inkhorn divergences},
-  author={Genevay, Aude and Peyr{\'e}, Gabriel and Cuturi, Marco},
-  booktitle={International Conference on Artificial Intelligence and Statistics},
-  year={2018}
-}
-
-@inproceedings{genevay2019sample,
-  title={Sample complexity of {S}inkhorn divergences},
-  author={Genevay, Aude and Chizat, L{\'e}naic and Bach, Francis and Cuturi, Marco and Peyr{\'e}, Gabriel},
-  booktitle={International Conference on Artificial Intelligence and Statistics},
-  pages={1574--1583},
-  year={2019},
-  organization={PMLR}
-}
-
-@article{salimans2017pixelcnn++,
-  title={Pixelcnn++: Improving the pixelcnn with discretized logistic mixture likelihood and other modifications},
-  author={Salimans, Tim and Karpathy, Andrej and Chen, Xi and Kingma, Diederik P},
-  journal={arXiv preprint arXiv:1701.05517},
-  year={2017},
-}
-
-@inproceedings{niu2020permutation,
-  title={Permutation invariant graph generation via score-Based generative modeling},
-  author={Niu, Chenhao and Song, Yang and Song, Jiaming and Zhao, Shengjia and Grover, Aditya and Ermon, Stefano},
-  booktitle={International Conference on Artificial Intelligence and Statistics},
-  year={2020}
-}
-
-@article{san2021noise,
-  title={Noise Estimation for Generative Diffusion Models},
-  author={San-Roman, Robin and Nachmani, Eliya and Wolf, Lior},
-  journal={arXiv preprint arXiv:2104.02600},
-  year={2021}
-}
-
-@article{luhman2021knowledge,
-  title={Knowledge Distillation in Iterative Generative Models for Improved Sampling Speed},
-  author={Luhman, Eric and Luhman, Troy},
-  journal={arXiv preprint arXiv:2101.02388},
-  year={2021}
-}
-
-@article{heusel2017gans,
-  title={{GAN}s trained by a two time-scale update rule converge to a local {N}ash equilibrium},
-  author={Heusel, Martin and Ramsauer, Hubert and Unterthiner, Thomas and Nessler, Bernhard and Hochreiter, Sepp},
-  journal={arXiv preprint arXiv:1706.08500},
-  year={2017}
-}
-
-@article{mnist,
-  added-at = {2010-06-28T21:16:30.000+0200},
-  author = {LeCun, Yann and Cortes, Corinna},
-  biburl = {https://www.bibsonomy.org/bibtex/2935bad99fa1f65e03c25b315aa3c1032/mhwombat},
-  groups = {public},
-  howpublished = {http://yann.lecun.com/exdb/mnist/},
-  interhash = {21b9d0558bd66279df9452562df6e6f3},
-  intrahash = {935bad99fa1f65e03c25b315aa3c1032},
-  keywords = {MSc _checked character_recognition mnist network neural},
-  lastchecked = {2016-01-14 14:24:11},
-  timestamp = {2016-07-12T19:25:30.000+0200},
-  title = {{MNIST} handwritten digit database},
-  url = {http://yann.lecun.com/exdb/mnist/},
-  username = {mhwombat},
-  year = 2010
-}
-
-@inproceedings{liu2015faceattributes,
- title = {Deep Learning Face Attributes in the Wild},
- author = {Liu, Ziwei and Luo, Ping and Wang, Xiaogang and Tang, Xiaoou},
- booktitle = {International Conference on Computer Vision},
- month = {December},
- year = {2015} 
-}
-
-@article{dimarino2020optimal,
-  title={An Optimal Transport approach for the {S}chr{\"o}dinger bridge problem and convergence of {S}inkhorn algorithm},
-  author={Di Marino, Simone and Gerolin, Augusto},
-  journal={Journal of Scientific Computing},
-  volume={85},
-  number={2},
-  pages={1--28},
-  year={2020},
-}
-
-
-@article {memoli2011gromov,
-    AUTHOR = {M\'{e}moli, Facundo},
-     TITLE = {Gromov-{W}asserstein distances and the metric approach to
-              object matching},
-   JOURNAL = {Foundations of Computational Mathematics},
-  FJOURNAL = {Foundations of Computational Mathematics. The Journal of the
-              Society for the Foundations of Computational Mathematics},
-    VOLUME = {11},
-      YEAR = {2011},
-    NUMBER = {4},
-     PAGES = {417--487},
-      ISSN = {1615-3375},
-   MRCLASS = {68T10 (49Q15 54E35 60B05 60B10 68U10)},
-  MRNUMBER = {2811584},
-       DOI = {10.1007/s10208-011-9093-5},
-       URL = {https://doi.org/10.1007/s10208-011-9093-5},
-}
-
-@article{scikit-learn,
- title={Scikit-learn: Machine Learning in {P}ython},
- author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V.
-         and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P.
-         and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and
-         Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.},
- journal={Journal of Machine Learning Research},
- volume={12},
- pages={2825--2830},
- year={2011}
-}
-
-@ARTICLE{cohen2017emnist,
-	author = {Gregory Cohen and
-               Saeed Afshar and
-               Jonathan Tapson and
-               Andr{\'{e}} van Schaik},
-	title = {{EMNIST:} an extension of {MNIST} to handwritten letters},
-  journal={arXiv preprint arXiv:1702.05373},
-	year = 2017,
-}
-
-@article{finlay2020learning,
-  title={Learning normalizing flows from Entropy-{K}antorovich potentials},
-  author={Finlay, Chris and Gerolin, Augusto and Oberman, Adam M and Pooladian, Aram-Alexandre},
-  journal={arXiv preprint arXiv:2006.06033},
-  year={2020}
-}
-
-@article{vargas2021solving,
-      title={Solving {S}chr{\"o}dinger Bridges via Maximum Likelihood}, 
-      author={Francisco Vargas and Pierre Thodoroff and Neil D. Lawrence and Austen Lamacraft},
-      year={2021},
-      journal={arXiv preprint arXiv:2106.02081}
-}
-
-@article{carlier2020differential,
-  title={A Differential Approach to the Multi-Marginal {S}chr{\"o}dinger System},
-  author={Carlier, Guillaume and Laborde, Maxime},
-  journal={SIAM Journal on Mathematical Analysis},
-  volume={52},
-  number={1},
-  pages={709--717},
-  year={2020},
-  publisher={SIAM}
-}
-
-@article{silvester2000determinants,
-  title={Determinants of block matrices},
-  author={Silvester, John R},
-  journal={The Mathematical Gazette},
-  volume={84},
-  number={501},
-  pages={460--467},
-  year={2000},
-  publisher={JSTOR}
-}
-
-@article{jolicoeur2021gotta,
-  title={Gotta Go Fast When Generating Data with Score-Based Models},
-  author={Jolicoeur-Martineau, Alexia and Li, Ke and Pich{\'e}-Taillefer, R{\'e}mi and Kachman, Tal and Mitliagkas, Ioannis},
-  journal={arXiv preprint arXiv:2105.14080},
-  year={2021}
-}
-
-@article{watson2021learning,
-      title={Learning to Efficiently Sample from Diffusion Probabilistic Models}, 
-      author={Daniel Watson and Jonathan Ho and Mohammad Norouzi and William Chan},
-      year={2021},
-      journal={arXiv preprint arXiv:2106.03802}
-}
-
-
-@article{block2020generative,
-  title={Generative modeling with denoising auto-encoders and {L}angevin sampling},
-  author={Block, Adam and Mroueh, Youssef and Rakhlin, Alexander},
-  journal={arXiv preprint arXiv:2002.00107},
-  year={2020}
-}
-
-@article{raginsky2019theoretical,
-  title = 	 {Theoretical guarantees for sampling and inference in generative models with latent diffusions},
-  author =       {Tzen, Belinda and Raginsky, Maxim},
-  journal = 	 {Conference on Learning Theory},
-  volume = 	 {99},
-  pages = 	 {3084--3114},
-  year = 	 {2019},
-}
-
-@article{bernton2021entropic,
-  title={Entropic optimal transport: geometry and large deviations},
-  author={Bernton, Espen and Ghosal, Promit and Nutz, Marcel},
-  journal={arXiv preprint arXiv:2102.04397},
-  year={2021}
-}
-
-@article{carlier2021linear,
-  title={On the linear convergence of the multi-marginal {S}inkhorn algorithm},
-  author={Carlier, Guillaume},
-  journal={HAL preprint hal-03176512},
-  year={2021}
-}
-
-@article{gigli2011holder,
-  title={On {H}{\"o}lder continuity-in-time of the optimal transport map towards measures along a curve},
-  author={Gigli, Nicola},
-  journal={Proceedings of the Edinburgh Mathematical Society},
-  volume={54},
-  number={2},
-  pages={401--409},
-  year={2011},
-  publisher={Cambridge University Press}
-}
-
-@inproceedings{debortoli2021neurips,
-  title={Diffusion {S}chr{\"o}dinger Bridge with Applications to Score-Based Generative Modeling},
-  author={De Bortoli, Valentin and Thornton, James and Heng, Jeremy and Doucet, Arnaud},
-  booktitle={Advances in Neural Information Processing Systems},
-  year={2021}
-}
-
-@inproceedings{corenflos2021differentiable,
-  title={Differentiable particle filtering via entropy-regularized optimal transport},
-  author={Corenflos, Adrien and Thornton, James and Deligiannidis, George and Doucet, Arnaud},
-  booktitle={International Conference on Machine Learning},
-  year={2021}
-}
-
-
-@article{huang2021schrodinger,
-  title={Schr{\"o}dinger-{F}{\"o}llmer Sampler: Sampling without Ergodicity},
-  author={Huang, Jian and Jiao, Yuling and Kang, Lican and Liao, Xu and Liu, Jin and Liu, Yanyan},
-  journal={arXiv preprint arXiv:2106.10880},
-  year={2021}
-}
-
-@inproceedings{altschuler2017near,
-  title={Near-linear time approximation algorithms for optimal transport via {S}inkhorn iteration},
-  author={Altschuler, Jason and Weed, Jonathan and Rigollet, Philippe},
-  booktitle={Advances in Neural Information Processing Systems},
-  year={2017},
-}
-
-
-
-
-@article{chen2021stochastic,
-  title={Stochastic Control Liaisons: {R}ichard {S}inkhorn Meets {G}aspard {M}onge on a {S}chrödinger Bridge},
-  author={Chen, Yongxin and Georgiou, Tryphon T and Pavon, Michele},
-  journal={SIAM Review},
-  volume={63},
-  number={2},
-  pages={249--313},
-  year={2021},
-  publisher={SIAM}
-}
-
-@article{chen2021optimal,
-  title={Optimal transport in systems and control},
-  author={Chen, Yongxin and Georgiou, Tryphon T and Pavon, Michele},
-  journal={Annual Review of Control, Robotics, and Autonomous Systems},
-  volume={4},
-  pages={89--113},
-  year={2021},
-  publisher={Annual Reviews}
-}
-
-@inproceedings{luise2019sinkhorn,
-  title={Sinkhorn barycenters with free support via {F}rank-{W}olfe algorithm},
-  author={Luise, Giulia and Salzo, Saverio and Pontil, Massimiliano and Ciliberto, Carlo},
-  booktitle={Advances in Neural Information Processing Systems},
-  year={2019}
-}
-
-@inproceedings{li2020continuous,
-  title={Continuous Regularized {W}asserstein Barycenters},
-  author={Li, Lingxiao and Genevay, Aude and Yurochkin, Mikhail and Solomon, Justin M},
-  booktitle={Advances in Neural Information Processing Systems},
-  year={2020}
-}
-
-@book{klebaner2012introduction,
-  title={Introduction to stochastic calculus with applications},
-  author={Klebaner, Fima C},
-  year={2012},
-  publisher={World Scientific Publishing Company}
-}
-
-@book{hsu2002stochastic,
-  title={Stochastic Analysis on Manifolds},
-  author={Hsu, Elton P},
-  number={38},
-  year={2002},
-  publisher={American Mathematical Society}
-}
-
-@book{lee2006riemannian,
-  title={Riemannian Manifolds: An Introduction to Curvature},
-  author={Lee, John M},
-  volume={176},
-  year={2006},
-  publisher={Springer Science \& Business Media}
-}
-
-@book{lee2018introduction,
-  title={Introduction to Riemannian manifolds},
-  author={Lee, John M},
-  year={2018},
-  publisher={Springer}
-}
-
-@book{lee2010introduction,
-  title={Introduction to Topological Manifolds},
-  author={Lee, John},
-  volume={202},
-  year={2010},
-  publisher={Springer Science \& Business Media}
-}
-
-@incollection{lee2013smooth,
-  title={Smooth Manifolds},
-  author={Lee, John M},
-  booktitle={Introduction to Smooth Manifolds},
-  pages={1--31},
-  year={2013},
-  publisher={Springer}
-}
-
-@inproceedings{kurtz1995stratonovich,
-  title={Stratonovich stochastic differential equations driven by general semimartingales},
-  author={Kurtz, Thomas G and Pardoux, {\'E}tienne and Protter, Philip},
-  booktitle={Annales de l'IHP Probabilit{\'e}s et statistiques},
-  volume={31},
-  number={2},
-  pages={351--377},
-  year={1995}
-}
-
-@article{jorgensen1975central,
-  title={The central limit problem for geodesic random walks},
-  author={J{\o}rgensen, Erik},
-  journal={Zeitschrift f{\"u}r Wahrscheinlichkeitstheorie und verwandte Gebiete},
-  volume={32},
-  number={1-2},
-  pages={1--64},
-  year={1975},
-  publisher={Springer}
-}
-
-@article{mathieu2020riemannian,
-  title={Riemannian continuous normalizing flows},
-  author={Mathieu, Emile and Nickel, Maximilian},
-  journal={arXiv preprint arXiv:2006.10605},
-  year={2020}
-}
-
-@book{kolar2013natural,
-  title={Natural Operations in Differential Geometry},
-  author={Kol{\'a}r, Ivan and Michor, Peter W and Slov{\'a}k, Jan},
-  year={2013},
-  publisher={Springer Science \& Business Media}
-}
-
-@book{kobayashi1963foundations,
-  title={Foundations of Differential Geometry},
-  author={Kobayashi, Shoshichi and Nomizu, Katsumi},
-  volume={1},
-  number={2},
-  year={1963},
-  publisher={New York, London}
-}
-
-@inproceedings{gunther1991isometric,
-  title={Isometric embeddings of {R}iemannian manifolds, {K}yoto, 1990},
-  author={Gunther, Matthias},
-  booktitle={Proc. Intern. Congr. Math.},
-  pages={1137--1143},
-  year={1991},
-  organization={Math. Soc. Japan}
-}
-
-@book{chavel1984eigenvalues,
-  title={Eigenvalues in Riemannian Geometry},
-  author={Chavel, Isaac},
-  year={1984},
-  publisher={Academic press}
-}
-
-@article{li1986large,
-  title={Large time behavior of the heat equation on complete manifolds with non-negative Ricci curvature},
-  author={Li, Peter},
-  journal={Annals of Mathematics},
-  volume={124},
-  number={1},
-  pages={1--21},
-  year={1986},
-  publisher={JSTOR}
-}
-
-@book{federer2014geometric,
-  title={Geometric Measure Theory},
-  author={Federer, Herbert},
-  year={2014},
-  publisher={Springer}
-}
-
-
-@article{devito2019Reproducing,
-  title = {Reproducing Kernel {{Hilbert}} Spaces on Manifolds: {{Sobolev}} and {{Diffusion}} Spaces},
-  shorttitle = {Reproducing Kernel {{Hilbert}} Spaces on Manifolds},
-  author = {De Vito, Ernesto and M{\"u}cke, Nicole and Rosasco, Lorenzo},
-  year = {2019},
-  month = may,
-  url = {https://arxiv.org/abs/1905.10913v1},
-  langid = {english}
-}
-
-@article{borovitskiy2020Matern,
-  title = {Matern {{Gaussian}} Processes on {{Riemannian}} Manifolds},
-  author = {Borovitskiy, Viacheslav and Terenin, Alexander and Mostowsky, Peter and Deisenroth, Marc Peter},
-  year = {2020},
-  month = jun,
-  journal = {arXiv:2006.10160 [cs, stat]},
-  primaryclass = {cs, stat},
-  url = {http://arxiv.org/abs/2006.10160}
-}
-
-@article{jammalamadaka2019Harmonic,
-  title = {Harmonic Analysis and Distribution-Free Inference for Spherical Distributions},
-  author = {Jammalamadaka, S. Rao and Terdik, Gy{\"o}rgy H.},
-  year = {2019},
-  month = may,
-  journal = {Journal of Multivariate Analysis},
-  volume = {171},
-  pages = {436--451},
-  issn = {0047-259X},
-  doi = {10.1016/j.jmva.2019.01.012},
-  langid = {english}
-}
-
-@article{jones2008Manifold,
-  title = {Manifold {{Parametrizations}} by {{Eigenfunctions}} of the {{Laplacian}} and {{Heat Kernels}}},
-  author = {Jones, Peter W. and Maggioni, Mauro and Schul, Raanan},
-  year = {2008},
-  journal = {Proceedings of the National Academy of Sciences of the United States of America},
-  volume = {105},
-  number = {6},
-  pages = {1803--1808},
-  publisher = {{National Academy of Sciences}},
-  issn = {0027-8424},
-  url = {https://www.jstor.org/stable/25451369}
-}
-
-@article{li2019Variational,
-  title = {Variational {{Diffusion Autoencoders}} with {{Random Walk Sampling}}},
-  author = {Li, Henry and Lindenbaum, Ofir and Cheng, Xiuyuan and Cloninger, Alexander},
-  year = {2019},
-  month = oct,
-  journal = {arXiv:1905.12724 [cs, stat]},
-  primaryclass = {cs, stat},
-  url = {http://arxiv.org/abs/1905.12724},
-  langid = {english}
-}
-
-
-@book{mardia2000Directional,
-  title = {Directional Statistics},
-  author = {Mardia, K. V. and Jupp, Peter E.},
-  year = {2000},
-  series = {Wiley Series in Probability and Statistics},
-  publisher = {{J. Wiley}},
-  address = {{Chichester ; New York}},
-  isbn = {978-0-471-95333-3},
-  langid = {english},
-  lccn = {QA276 .J864 2000}
-}
-
-
-
-@article{u.g.haussmann1986Time,
-  title = {Time {{Reversal}} of {{Diffusions}}},
-  author = {{U. G. Haussmann} and {E. Pardoux}},
-  year = {1986},
-  month = oct,
-  journal = {The Annals of Probability},
-  volume = {14},
-  number = {4},
-  pages = {1188--1205},
-  doi = {10.1214/aop/1176992362}
-}
-
-@article{garnelo2018neural,
-  title   = {Neural processes},
-  author  = {Garnelo, Marta and Schwarz, Jonathan and Rosenbaum, Dan and Viola, Fabio and Rezende, Danilo J and Eslami, SM and Teh, Yee Whye},
-  journal = {arXiv preprint arXiv:1807.01622},
-  year    = {2018}
-}
-
-@inproceedings{garnelo2018conditional,
-  title     = {Conditional Neural Processes},
-  author    = {Garnelo, Marta and Rosenbaum, Dan and Maddison, Christopher and Ramalho, Tiago and Saxton, David and Shanahan, Murray and Teh, Yee Whye and Rezende, Danilo and Eslami, S. M. Ali},
-  booktitle = {Proceedings of the 35th International Conference on Machine Learning},
-  pages     = {1704--1713},
-  year      = {2018},
-  editor    = {Dy, Jennifer and Krause, Andreas},
-  volume    = {80},
-  series    = {Proceedings of Machine Learning Research},
-  month     = {10--15 Jul},
-  publisher = {PMLR},
-  pdf       = {http://proceedings.mlr.press/v80/garnelo18a/garnelo18a.pdf},
-  url       = {https://proceedings.mlr.press/v80/garnelo18a.html},
-  abstract  = {Deep neural networks excel at function approximation, yet they are typically trained from scratch for each new function. On the other hand, Bayesian methods, such as Gaussian Processes (GPs), exploit prior knowledge to quickly infer the shape of a new function at test time. Yet, GPs are computationally expensive, and it can be hard to design appropriate priors. In this paper we propose a family of neural models, Conditional Neural Processes (CNPs), that combine the benefits of both. CNPs are inspired by the flexibility of stochastic processes such as GPs, but are structured as neural networks and trained via gradient descent. CNPs make accurate predictions after observing only a handful of training data points, yet scale to complex functions and large datasets. We demonstrate the performance and versatility of the approach on a range of canonical machine learning tasks, including regression, classification and image completion.}
-}
-
-@article{hairer2011Solving,
-  title = {Solving {{Differential Equations}} on {{Manifolds}}},
-  author = {Hairer, Ernst},
-  year = {2011},
-  pages = {55},
-  langid = {english},
-  url={https://www.unige.ch/~hairer/poly-sde-mani.pdf}
-}
-
-@article{cohen2021riemannian,
-  title={Riemannian Convex Potential Maps},
-  author={Cohen, Samuel and Amos, Brandon and Lipman, Yaron},
-  journal={arXiv preprint arXiv:2106.10272},
-  year={2021}
-}
-
-@article{dupont2021Generative,
-  title = {Generative {{Models}} as {{Distributions}} of {{Functions}}},
-  author = {Dupont, Emilien and Teh, Yee Whye and Doucet, Arnaud},
-  year = {2021},
-  month = may,
-  journal = {arXiv:2102.04776 [cs, stat]},
-  primaryclass = {cs, stat},
-  url = {http://arxiv.org/abs/2102.04776}
-}
-
-@article{rezende2020Normalizing,
-  title = {Normalizing  flows on tori and spheres},
-  author = {Rezende, Danilo Jimenez and Papamakarios, George and Racani{\`e}re, S{\'e}bastien and Albergo, Michael S. and Kanwar, Gurtej and Shanahan, Phiala E. and Cranmer, Kyle},
-  year = {2020},
-  month = feb,
-  journal = {arXiv:2002.02428},
-  primaryclass = {cs, stat},
-  url = {http://arxiv.org/abs/2002.02428},
-  langid = {english}
-}
-
-@article{rezende2021Implicit,
-  title = {Implicit {R}iemannian concave potential maps},
-  author = {Rezende, Danilo J. and Racani{\`e}re, S{\'e}bastien},
-  year = {2021},
-  journal = {arXiv:2110.01288},
-}
-
-@article{falorsi2021Continuous,
-  title = {Continuous Normalizing Flows on Manifolds},
-  author = {Falorsi, Luca},
-  year = {2021},
-  month = mar,
-  journal = {arXiv:2104.14959},
-}
-
-@book{elworthy1982Stochastic,
-  title = {Stochastic Differential Equations on Manifolds},
-  author = {Elworthy, K. D.},
-  year = {1982},
-  series = {London Mathematical Society Lecture Note Series},
-  publisher = {{Cambridge University Press}},
-  address = {{Cambridge}},
-  doi = {10.1017/CBO9781107325609},
-  collection = {London Mathematical Society Lecture Note Series}
-}
-
-@article{hyvarinenEstimation,
-  title = {Estimation of {{Non}}-{{Normalized Statistical Models}} by {{Score Matching}}},
-  author = {Hyvarinen, Aapo},
-  pages = {14},
-  year = {2005},
-  langid = {english}
-}
-
-@article{song2019Sliced,
-  title = {Sliced {{Score Matching}}: A {{Scalable Approach}} to {{Density}} and {{Score Estimation}}},
-  shorttitle = {Sliced {{Score Matching}}},
-  author = {Song, Yang and Garg, Sahaj and Shi, Jiaxin and Ermon, Stefano},
-  year = {2019},
-  month = may,
-  url = {https://arxiv.org/abs/1905.07088v2},
-  langid = {english}
-}
-
-
-@incollection{oksendal2003stochastic,
-  title={Stochastic differential equations},
-  author={{\O}ksendal, Bernt},
-  booktitle={Stochastic differential equations},
-  pages={65--84},
-  year={2003},
-  publisher={Springer}
-}
-
-@misc{kushner1974stochastic,
-  title={Stochastic Differential Equations (II Gihman and AV Skorohod)},
-  author={Kushner, Harold},
-  year={1974},
-  publisher={Society for Industrial and Applied Mathematics}
-}
-
-@article{kohler2020Equivariant,
-  title = {Equivariant {{Flows}}: Exact Likelihood Generative Learning for Symmetric Densities},
-  shorttitle = {Equivariant {{Flows}}},
-  author = {K{\"o}hler, Jonas and Klein, Leon and No{\'e}, Frank},
-  year = {2020},
-  month = jun,
-  journal = {arXiv:2006.02425 [physics, stat]},
-  primaryclass = {physics, stat},
-  url = {http://arxiv.org/abs/2006.02425}
-}
-
-@article{zhao2018Exact,
-  title = {Exact Heat Kernel on a Hypersphere and Its Applications in Kernel {{SVM}}},
-  author = {Zhao, Chenchao and Song, Jun S.},
-  year = {2018},
-  month = jan,
-  journal = {Frontiers in Applied Mathematics and Statistics},
-  volume = {4},
-  pages = {1},
-  issn = {2297-4687},
-  doi = {10.3389/fams.2018.00001},
-  langid = {english}
-}
-
-@InProceedings{holderrieth2021equivariant,
-  title = 	 {Equivariant Learning of Stochastic Fields: Gaussian Processes and Steerable Conditional Neural Processes},
-  author =       {Holderrieth, Peter and Hutchinson, Michael J and Teh, Yee Whye},
-  booktitle = 	 {International Conference on Machine Learning},
-  year = 	 {2021},
-}
-
-@article{finzi2021Practicala,
-  title = {A {{Practical Method}} for {{Constructing Equivariant Multilayer Perceptrons}} for {{Arbitrary Matrix Groups}}},
-  author = {Finzi, Marc and Welling, Max and Wilson, Andrew Gordon},
-  year = {2021},
-  month = apr,
-  journal = {arXiv:2104.09459 [cs, math, stat]},
-  primaryclass = {cs, math, stat},
-  url = {http://arxiv.org/abs/2104.09459}
-}
-
-@inproceedings{
-    anonymous2022geodiff,
-    title={GeoDiff: A Geometric Diffusion Model for Molecular Conformation Generation},
-    author={Anonymous},
-    booktitle={Submitted to The Tenth International Conference on Learning Representations },
-    year={2022},
-    url={https://openreview.net/forum?id=PzcvxEMzvQC},
-    note={under review}
-}
-
-@inproceedings{
-    anonymous2022pseudo,
-    title={Pseudo Numerical Methods for Diffusion Models on Manifolds},
-    author={Anonymous},
-    booktitle={Submitted to The Tenth International Conference on Learning Representations },
-    year={2022},
-    url={https://openreview.net/forum?id=PlKWVd2yBkY},
-    note={under review}
-}
-
-
-@article{anderson1982reverse,
-  title={Reverse-time diffusion equation models},
-  author={Anderson, Brian DO},
-  journal={Stochastic Processes and their Applications},
-  volume={12},
-  number={3},
-  pages={313--326},
-  year={1982},
-  publisher={Elsevier}
-}
-@article{mijatovic2020note,
-  title = {A Note on the Exact Simulation of Spherical {{Brownian}} Motion},
-  author = {Mijatovi{\'c}, Aleksandar and Mramor, Veno and Bravo, Ger{\'o}nimo Uribe},
-  year = {2020},
-  month = oct,
-  journal = {Statistics \& Probability Letters},
-  volume = {165},
-  pages = {108836},
-  issn = {01677152},
-  doi = {10.1016/j.spl.2020.108836},
-  langid = {english}
-}
-
-
-@article{he2013lower,
-  title={A lower bound for the first eigenvalue in the Laplacian operator on compact Riemannian manifolds},
-  author={He, Yue},
-  journal={Journal of Geometry and Physics},
-  volume={71},
-  pages={73--84},
-  year={2013},
-  publisher={Elsevier}
-}
-
-@article{gemici2016normalizing,
-  title={Normalizing flows on {R}iemannian manifolds},
-  author={Gemici, Mevlana C and Rezende, Danilo and Mohamed, Shakir},
-  journal={arXiv preprint arXiv:1611.02304},
-  year={2016}
-}
-
-@inproceedings{falorsi2019reparameterizing,
-  title={Reparameterizing distributions on lie groups},
-  author={Falorsi, Luca and de Haan, Pim and Davidson, Tim R and Forr{\'e}, Patrick},
-  booktitle={International Conference on Artificial Intelligence and Statistics},
-  pages={3244--3253},
-  year={2019},
-}
-
-@article{kalatzis2021multi,
-  title={Multi-chart flows},
-  author={Kalatzis, Dimitris and Ye, Johan Ziruo and Wohlert, Jesper and Hauberg, S{\o}ren},
-  journal={arXiv preprint arXiv:2106.03500},
-  year={2021}
-}
-
-@article{rozen2021moser,
-  title={Moser Flow: Divergence-based Generative Modeling on Manifolds},
-  author={Rozen, Noam and Grover, Aditya and Nickel, Maximilian and Lipman, Yaron},
-  journal={Advances in Neural Information Processing Systems},
-  volume={34},
-  year={2021}
-}
-
-@article{mathieu2019continuous,
-  title={Continuous Hierarchical Representations with {P}oincar\'e Variational Auto-Encoders},
-  author={Mathieu, Emile and Lan, Charline Le and Maddison, Chris J and Tomioka, Ryota and Teh, Yee Whye},
-  journal={arXiv preprint arXiv:1901.06033},
-  year={2019}
-}
-
-@article{falorsi2020neural,
-  title={Neural ordinary differential equations on manifolds},
-  author={Falorsi, Luca and Forr{\'e}, Patrick},
-  journal={arXiv preprint arXiv:2006.06663},
-  year={2020}
-}
-
-@article{boomsma2008generative,
-  title={A generative, probabilistic model of local protein structure},
-  author={Boomsma, Wouter and Mardia, Kanti V and Taylor, Charles C and Ferkinghoff-Borg, Jesper and Krogh, Anders and Hamelryck, Thomas},
-  journal={Proceedings of the National Academy of Sciences},
-  volume={105},
-  number={26},
-  pages={8932--8937},
-  year={2008},
-  publisher={National Acad Sciences}
-}
-
-@article{hamelryck2006sampling,
-  title={Sampling realistic protein conformations using local structural bias},
-  author={Hamelryck, Thomas and Kent, John T and Krogh, Anders},
-  journal={PLoS Computational Biology},
-  volume={2},
-  number={9},
-  pages={e131},
-  year={2006},
-  publisher={Public Library of Science San Francisco, USA}
-}
-
-@article{karpatne2018machine,
-  title={Machine learning for the geosciences: Challenges and opportunities},
-  author={Karpatne, Anuj and Ebert-Uphoff, Imme and Ravela, Sai and Babaie, Hassan Ali and Kumar, Vipin},
-  journal={IEEE Transactions on Knowledge and Data Engineering},
-  volume={31},
-  number={8},
-  pages={1544--1554},
-  year={2018},
-  publisher={IEEE}
-}
-
-@article{peel2001fitting,
-  title={Fitting mixtures of Kent distributions to aid in joint set identification},
-  author={Peel, David and Whiten, William J and McLachlan, Geoffrey J},
-  journal={Journal of the American Statistical Association},
-  volume={96},
-  number={453},
-  pages={56--63},
-  year={2001},
-  publisher={Taylor \& Francis}
-}
-
-@article{roy2007learning,
-  title={Learning annotated hierarchies from relational data},
-  author={Roy, Daniel M and Kemp, Charles and Mansinghka, Vikash and B Tenenbaum, Joshua},
-  year={2007},
-  publisher={Carnegie Mellon University}
-}
-
-@article{steyvers2005large,
-  title={The large-scale structure of semantic networks: Statistical analyses and a model of semantic growth},
-  author={Steyvers, Mark and Tenenbaum, Joshua B},
-  journal={Cognitive science},
-  volume={29},
-  number={1},
-  pages={41--78},
-  year={2005},
-  publisher={Wiley Online Library}
-}
-
-@article{mardia2008multivariate,
-  title={A multivariate von {M}ises distribution with applications to bioinformatics},
-  author={Mardia, Kanti V and Hughes, Gareth and Taylor, Charles C and Singh, Harshinder},
-  journal={Canadian Journal of Statistics},
-  volume={36},
-  number={1},
-  pages={99--109},
-  year={2008},
-  publisher={Wiley Online Library}
-}
-
-@article{shapovalov2011smoothed,
-  title={A smoothed backbone-dependent rotamer library for proteins derived from adaptive kernel density estimates and regressions},
-  author={Shapovalov, Maxim V and Dunbrack Jr, Roland L},
-  journal={Structure},
-  volume={19},
-  number={6},
-  pages={844--858},
-  year={2011},
-  publisher={Elsevier}
-}
-
-@inproceedings{feiten2013rigid,
-  title={Rigid motion estimation using mixtures of projected {G}aussians},
-  author={Feiten, Wendelin and Lang, Muriel and Hirche, Sandra},
-  booktitle={International Conference on Information Fusion},
-  pages={1465--1472},
-  year={2013},
-  organization={IEEE}
-}
-
-@inproceedings{senanayake2018directional,
-  title={Directional grid maps: modeling multimodal angular uncertainty in dynamic environments},
-  author={Senanayake, Ransalu and Ramos, Fabio},
-  booktitle={2018 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)},
-  pages={3241--3248},
-  year={2018},
-  organization={IEEE}
-}
-
-@article{brehmer2020flows,
-  title={Flows for simultaneous manifold learning and density estimation},
-  author={Brehmer, Johann and Cranmer, Kyle},
-  journal={arXiv preprint arXiv:2003.13913},
-  year={2020}
-}
-
-@article{davidson2018hyperspherical,
-  title={Hyperspherical variational auto-encoders},
-  author={Davidson, Tim R and Falorsi, Luca and De Cao, Nicola and Kipf, Thomas and Tomczak, Jakub M},
-  journal={arXiv preprint arXiv:1804.00891},
-  year={2018}
-}
-
-@article{falorsi2018explorations,
-  title={Explorations in homeomorphic variational auto-encoding},
-  author={Falorsi, Luca and de Haan, Pim and Davidson, Tim R and De Cao, Nicola and Weiler, Maurice and Forr{\'e}, Patrick and Cohen, Taco S},
-  journal={arXiv preprint arXiv:1807.04689},
-  year={2018}
-}
-
-@article{rey2019diffusion,
-  title={Diffusion variational autoencoders},
-  author={Rey, Luis A P{\'e}rez and Menkovski, Vlado and Portegies, Jacobus W},
-  journal={arXiv preprint arXiv:1901.08991},
-  year={2019}
-}
-
-@inproceedings{nagano2019wrapped,
-  title={A wrapped normal distribution on hyperbolic space for gradient-based learning},
-  author={Nagano, Yoshihiro and Yamaguchi, Shoichiro and Fujita, Yasuhiro and Koyama, Masanori},
-  booktitle={International Conference on Machine Learning},
-  pages={4693--4702},
-  year={2019},
-  organization={PMLR}
-}
-
-
-@article{klimovskaia2020poincare,
-  title={Poincar{\'e} maps for analyzing complex hierarchies in single-cell data},
-  author={Klimovskaia, Anna and Lopez-Paz, David and Bottou, L{\'e}on and Nickel, Maximilian},
-  journal={Nature communications},
-  volume={11},
-  number={1},
-  pages={1--9},
-  year={2020},
-  publisher={Nature Publishing Group}
-}
-
-@article{lui2012advances,
-  title={Advances in matrix manifolds for computer vision},
-  author={Lui, Yui Man},
-  journal={Image and Vision Computing},
-  volume={30},
-  number={6-7},
-  pages={380--388},
-  year={2012},
-  publisher={Elsevier}
-}
-
-@inproceedings{bose2020latent,
-  title={Latent variable modelling with hyperbolic normalizing flows},
-  author={Bose, Joey and Smofsky, Ariella and Liao, Renjie and Panangaden, Prakash and Hamilton, Will},
-  booktitle={International Conference on Machine Learning},
-  pages={1045--1055},
-  year={2020},
-  organization={PMLR}
-}
-
-@book{mardia2009directional,
-  title={Directional Statistics},
-  author={Mardia, Kanti V and Jupp, Peter E},
-  volume={494},
-  year={2009},
-  publisher={John Wiley \& Sons}
-}
-
-@inproceedings{navarro2017multivariate,
-  title={The multivariate generalised von Mises distribution: inference and applications},
-  author={Navarro, Alexandre KW and Frellsen, Jes and Turner, Richard E},
-  booktitle={Thirty-First AAAI Conference on Artificial Intelligence},
-  year={2017}
-}
-
-
-@article{huang2021variational,
-  title={A Variational Perspective on Diffusion-Based Generative Models and Score Matching},
-  author={Huang, Chin-Wei and Lim, Jae Hyun and Courville, Aaron},
-  journal={arXiv preprint arXiv:2106.02808},
-  year={2021}
-}
-
-@article{lou2020neural,
-  title={Neural manifold ordinary differential equations},
-  author={Lou, Aaron and Lim, Derek and Katsman, Isay and Huang, Leo and Jiang, Qingxuan and Lim, Ser-Nam and De Sa, Christopher},
-  journal={arXiv preprint arXiv:2006.10254},
-  year={2020}
-}
-
-@article{lee2021priorgrad,
-  title={PriorGrad: Improving Conditional Denoising Diffusion Models with Data-Driven Adaptive Prior},
-  author={Lee, Sang-gil and Kim, Heeseung and Shin, Chaehun and Tan, Xu and Liu, Chang and Meng, Qi and Qin, Tao and Chen, Wei and Yoon, Sungroh and Liu, Tie-Yan},
-  journal={arXiv preprint arXiv:2106.06406},
-  year={2021}
-}
-
-@article{sinha2021d2c,
-  title={D2C: Diffusion-Denoising Models for Few-shot Conditional Generation},
-  author={Sinha, Abhishek and Song, Jiaming and Meng, Chenlin and Ermon, Stefano},
-  journal={arXiv preprint arXiv:2106.06819},
-  year={2021}
-}
-
-@article{batzolis2021conditional,
-  title={Conditional Image Generation with Score-Based Diffusion Models},
-  author={Batzolis, Georgios and Stanczuk, Jan and Sch{\"o}nlieb, Carola-Bibiane and Etmann, Christian},
-  journal={arXiv preprint arXiv:2111.13606},
-  year={2021}
-}
-
-@article{chung2021come,
-  title={Come-Closer-Diffuse-Faster: Accelerating Conditional Diffusion Models for Inverse Problems through Stochastic Contraction},
-  author={Chung, Hyungjin and Sim, Byeongsu and Ye, Jong Chul},
-  journal={arXiv preprint arXiv:2112.05146},
-  year={2021}
-}
-
-@article{kawar2021snips,
-  title={SNIPS: Solving Noisy Inverse Problems Stochastically},
-  author={Kawar, Bahjat and Vaksman, Gregory and Elad, Michael},
-  journal={arXiv preprint arXiv:2105.14951},
-  year={2021}
-}
-
-@article{kawar2021stochastic,
-  title={Stochastic Image Denoising by Sampling from the Posterior Distribution},
-  author={Kawar, Bahjat and Vaksman, Gregory and Elad, Michael},
-  journal={arXiv preprint arXiv:2101.09552},
-  year={2021}
-}
-
-@article{chen2021likelihood,
-  title={Likelihood Training of {S}chr\"odinger Bridge using Forward-Backward SDEs Theory},
-  author={Chen, Tianrong and Liu, Guan-Horng and Theodorou, Evangelos A},
-  journal={arXiv preprint arXiv:2110.11291},
-  year={2021}
-}
-
-@article{fisher1953dispersion,
-  title={Dispersion on a sphere},
-  author={Fisher, Ronald Aylmer},
-  journal={Proceedings of the Royal Society of London. Series A. Mathematical and Physical Sciences},
-  volume={217},
-  number={1130},
-  pages={295--305},
-  year={1953},
-  publisher={The Royal Society London}
-}
-
-@article{kent1982fisher,
-  title={The {F}isher-{B}ingham distribution on the sphere},
-  author={Kent, John T},
-  journal={Journal of the Royal Statistical Society: Series B (Methodological)},
-  volume={44},
-  number={1},
-  pages={71--80},
-  year={1982},
-  publisher={Wiley Online Library}
-}
-
-@article{mardia2007protein,
-  title={Protein bioinformatics and mixtures of bivariate von Mises distributions for angular data},
-  author={Mardia, Kanti V and Taylor, Charles C and Subramaniam, Ganesh K},
-  journal={Biometrics},
-  volume={63},
-  number={2},
-  pages={505--512},
-  year={2007},
-  publisher={Wiley Online Library}
-}
-
-@article{papamakarios2019normalizing,
-  title={Normalizing flows for probabilistic modeling and inference},
-  author={Papamakarios, George and Nalisnick, Eric and Rezende, Danilo Jimenez and Mohamed, Shakir and Lakshminarayanan, Balaji},
-  journal={arXiv preprint arXiv:1912.02762},
-  year={2019}
-}
-
-@article{chen2019residual,
-  title={Residual flows for invertible generative modeling},
-  author={Chen, Ricky TQ and Behrmann, Jens and Duvenaud, David and Jacobsen, J{\"o}rn-Henrik},
-  journal={arXiv preprint arXiv:1906.02735},
-  year={2019}
-}
-
-@article{chen2012triangulated,
-  title={Triangulated manifold meshing method preserving molecular surface topology},
-  author={Chen, Minxin and Tu, Bin and Lu, Benzhuo},
-  journal={Journal of Molecular Graphics and Modelling},
-  volume={38},
-  pages={411--418},
-  year={2012},
-  publisher={Elsevier}
-}
-
-@article{sei2013jacobian,
-  title={A {J}acobian inequality for gradient maps on the sphere and its application to directional statistics},
-  author={Sei, Tomonari},
-  journal={Communications in Statistics-Theory and Methods},
-  volume={42},
-  number={14},
-  pages={2525--2542},
-  year={2013},
-  publisher={Taylor \& Francis}
-}
-
-@article{bao2022analyticdpm,
-      title={Analytic-DPM: an Analytic Estimate of the Optimal Reverse Variance in Diffusion Probabilistic Models}, 
-      author={Fan Bao and Chongxuan Li and Jun Zhu and Bo Zhang},
-      year={2022},
-      journal={arXiv preprint arXiv:2201.06503},
-}
-
-
-@article{saloff1994precise,
-  title={Precise estimates on the rate at which certain diffusions tend to equilibrium},
-  author={Saloff-Coste, Laurent},
-  journal={Mathematische Zeitschrift},
-  volume={217},
-  number={1},
-  pages={641--677},
-  year={1994},
-  publisher={Springer}
-}
-
-@article{urakawa2006convergence,
-  title={Convergence rates to equilibrium of the heat kernels on compact {R}iemannian manifolds},
-  author={Urakawa, Hajime},
-  journal={Indiana University mathematics journal},
-  pages={259--288},
-  year={2006},
-  publisher={JSTOR}
-}
-
-
-@article{bismut1984large,
-  title={Large deviations and the {M}alliavin calculus},
-  author={Bismut, Jean-Michel},
-  journal={Birkhauser Prog. Math.},
-  volume={45},
-  year={1984}
-}
-
-@article{chen2021logarithmic,
-  title={Logarithmic heat kernels: estimates without curvature restrictions},
-  author={Chen, Xin and Li, Xue Mei and Wu, Bo},
-  journal={arXiv preprint arXiv:2106.02746},
-  year={2021}
-}
-
-@article{schiela2020sqp,
-  title={An {SQP} method for equality constrained optimization on manifolds},
-  author={Schiela, Anton and Ortiz, Julian},
-  journal={arXiv preprint arXiv:2005.06844},
-  year={2020}
-}
-
-@article{zhu2020riemannian,
-  title={Riemannian conjugate gradient methods with inverse retraction},
-  author={Zhu, Xiaojing and Sato, Hiroyuki},
-  journal={Computational Optimization and Applications},
-  volume={77},
-  number={3},
-  pages={779--810},
-  year={2020},
-  publisher={Springer}
-}
-
-@article{goto2021approximated,
-  title={Approximated logarithmic maps on {R}iemannian manifolds and their applications},
-  author={Goto, Jumpei and Sato, Hiroyuki},
-  journal={JSIAM Letters},
-  volume={13},
-  pages={17--20},
-  year={2021},
-  publisher={The Japan Society for Industrial and Applied Mathematics}
-}
-
-@article{sato2019riemannian,
-  title={Riemannian stochastic variance reduced gradient algorithm with retraction and vector transport},
-  author={Sato, Hiroyuki and Kasai, Hiroyuki and Mishra, Bamdev},
-  journal={SIAM Journal on Optimization},
-  volume={29},
-  number={2},
-  pages={1444--1472},
-  year={2019},
-  publisher={SIAM}
-}
-
-@article{nutzintroduction,
-year = {2021},
-  title={Introduction to Entropic Optimal Transport},
-  author={Nutz, Marcel}
-}
-
-@book{atkinson2012spherical,
-  title={Spherical Harmonics and Approximations on the Unit Sphere: An Introduction},
-  author={Atkinson, Kendall and Han, Weimin},
-  volume={2044},
-  year={2012},
-  publisher={Springer Science \& Business Media}
-}
-
-@incollection{leonard2012girsanov,
-  title={Girsanov theory under a finite entropy condition},
-  author={L{\'e}onard, Christian},
-  booktitle={S{\'e}minaire de Probabilit{\'e}s XLIV},
-  pages={429--465},
-  year={2012},
-  publisher={Springer}
-}
-
-
-@article{nutz2022stability,
-      title={Stability of {S}chr\"odinger Potentials and Convergence of {S}inkhorn's Algorithm}, 
-      author={Marcel Nutz and Johannes Wiesel},
-      year={2022},
-      journal={arXiv preprint arXiv:2201.10059},
-}
-
-@article{garcia2021brenier,
-  title={Brenier--{S}chr{\"o}dinger problem on compact manifolds with boundary},
-  author={Garc{\'\i}a-Zelada, David and Huguet, Baptiste},
-  journal={Stochastic Analysis and Applications},
-  pages={1--29},
-  year={2021},
-  publisher={Taylor \& Francis}
-}
-
-@article{leobacher2021existence,
-  title={Existence, uniqueness and regularity of the projection onto differentiable manifolds},
-  author={Leobacher, Gunther and Steinicke, Alexander},
-  journal={Annals of Global Analysis and Geometry},
-  volume={60},
-  number={3},
-  pages={559--587},
-  year={2021},
-  publisher={Springer}
-}
-
-@misc{volcanoe_dataset,
-    title = {NCEI/WDS Global Significant Volcanic Eruptions Database},
-    author={National Geophysical Data Center / World Data Service (NGDC/WDS)},
-    year={2022},
-    doi = {10.7289/V5JW8BSH},
-    howpublished= {https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.ngdc.mgg.hazards:G10147}
-} 
-
-@misc{earthquake_dataset,
-    title = {NCEI/WDS Global Significant Earthquake Database},
-    author={National Geophysical Data Center / World Data Service (NGDC/WDS)},
-    year={2022},
-    doi = {doi:10.7289/V5TD9V7K},
-    howpublished= {https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.ngdc.mgg.hazards:G012153}
-} 
-
-@misc{flood_dataset,
-    title ={Global active archive of large flood events},
-    author={G Brakenridge},
-    year={2017},
-    howpublished={http://floodobservatory.colorado.edu/Archives/index.html}
-} 
-
-@misc{fire_dataset,
-    title = {Land, Atmosphere Near real-time Capability for EOS (LANCE) system operated by NASA’s Earth Science Data and Information System (ESDIS)},
-    author={EOSDIS},
-    year={2020},
-    howpublished= {https://earthdata.nasa.gov/earth-observation-data/near-real-time/firms/active-fire-data}
-} 
-
-
-@article{cohen2015harmonic,
-      title={Harmonic Exponential Families on Manifolds}, 
-      author={Taco S. Cohen and Max Welling},
-      year={2015},
-      journal={arXiv preprint arXiv:1505.04413},
-}
-
-@article{ungar2005Einstein,
-  title = {Einstein's Special Relativity: {{Unleashing}} the Power of Its Hyperbolic Geometry},
-  author = {Ungar, A.A.},
-  year = {2005},
-  month = jan,
-  journal = {Computers \& Mathematics with Applications},
-  volume = {49},
-  number = {2},
-  pages = {187--221},
-  issn = {0898-1221},
-  doi = {10.1016/j.camwa.2004.10.030}
-}
-
-@article{kingma2015Adam,
-  title = {Adam: {{A Method}} for {{Stochastic Optimization}}},
-  shorttitle = {Adam},
-  author = {Kingma, Diederik P. and Ba, Jimmy},
-  year = {2015},
-  url = {http://arxiv.org/abs/1412.6980},
-  archivePrefix = {arXiv},
-  journal = {arXiv:1412.6980 [cs]},
-  primaryClass = {cs}
-}
-
-@article{dormand1980family,
-  title = {A Family of Embedded {{Runge}}-{{Kutta}} Formulae},
-  author = {Dormand, R. J. and Prince, J. P.},
-  year = {1980},
-  pages = {19--26},
-  journal = {Journal of Computational and Applied Mathematics}
-}
-
-@inproceedings{grathwohl2019Scalable,
-  title = {Scalable Reversible Generative Models with Free-Form Continuous Dynamics},
-  booktitle = {International Conference on Learning Representations},
-  author = {Grathwohl, Will and Chen, Ricky T. Q. and Bettencourt, Jesse and Duvenaud, David},
-  year = {2019},
-  url = {https://openreview.net/forum?id=rJxgknCcK7}
-}
-
-@article{ambrosio2003Optimal,
-  title = {Optimal Transport Maps in {{Monge-Kantorovich}} Problem},
-  author = {Ambrosio, Luigi},
-  year = {2003},
-  journal = {arXiv preprint arXiv:0304389v1},
-}
-
-@article{caterini2021Rectangular,
-  title = {Rectangular flows for manifold learning},
-  author = {Caterini, Anthony L. and {Loaiza-Ganem}, Gabriel and Pleiss, Geoff and Cunningham, John P.},
-  year = {2021},
-  journal = {arXiv preprint arXiv:2106.01413},
-  primaryclass = {cs, stat},
-}
-
-@article{collett1981Discriminating,
-  title = {Discriminating {{Between}} the {{Von Mises}} and {{Wrapped Normal Distributions}}},
-  author = {Collett, D. and Lewis, T.},
-  year = {1981},
-  month = mar,
-  journal = {Australian Journal of Statistics},
-  volume = {23},
-  number = {1},
-  pages = {73--79},
-  publisher = {{John Wiley \& Sons, Ltd}},
-  issn = {0004-9581},
-  doi = {10.1111/j.1467-842X.1981.tb00763.x}
-}
-
-
-@article{hutchinson1989stochastic,
-  title={A stochastic estimator of the trace of the influence matrix for Laplacian smoothing splines},
-  author={Hutchinson, Michael F},
-  journal={Communications in Statistics-Simulation and Computation},
-  volume={18},
-  number={3},
-  pages={1059--1076},
-  year={1989},
-  publisher={Taylor \& Francis}
-}
\ No newline at end of file
diff --git a/doc/conclusion.tex b/doc/conclusion.tex
deleted file mode 100644
index 431daa7..0000000
--- a/doc/conclusion.tex
+++ /dev/null
@@ -1,27 +0,0 @@
-\section{Discussion and limitations}
-\label{sec:conclusion}
-
-In this paper we introduced Riemannian Score-Based Generative Models (RSGMs), a class of deep generative models that represent target densities supported on compact manifolds, as the reverse diffusion process of a Brownian motion.
-The main benefits of our method stems from its scalability to high dimensions, its applicability to a broad class of manifolds due to the diversity of available loss functions and its capacity to model complex datasets.
-We proved that RSGMs are universal generative models for densities supported on compact manifolds. \valentin{universal ?}
-Empirically, we demonstrated that our method outperforms previous work on density estimation tasks with spherical geoscience datasets.
-
-
-% \paragraph{Limitation}
-% contrary to NF we cannot estimate using the KL
-% ie we need to have access to data and not just the unnormalized density
-One current limitation---similarly to other score-based generative models---is the requirement of samples from the targeted distribution, as such models cannot directly fit an unnormalized density.
-% Limitation to compact manifold, extension to hyperbolic
-% Proof of convergence similar to Theorem 1 in our paper
-An important future work direction, and a current limitation, is the manifold compactness assumption.
-Several important manifolds do not fit into this category, such as the special linear group, symmetric positive definite matrices or the hyperbolic space which underlies special relativity \citep{ungar2005Einstein}.
-\emile{Should write a paragraph in the app on the conditional extension}
-Another extension of interest is conditional sampling. By amortizing SGMs with respect to an observation it is possible to approximately sample from a given posterior distribution \citep[see for instance][]{kawar2021snips,kawar2021stochastic,lee2021priorgrad,sinha2021d2c,batzolis2021conditional,chung2021come}.
-% Conditional Riemannian SGM (CRSGM).
-% Indeed, by amortizing SGM with respect to an observation it is possible to approximately sample from a given posterior distribution, and therefore solve inverse problems, provided that we know how to sample from the corruption model \citep[see for instance][]{kawar2021snips,kawar2021stochastic,lee2021priorgrad,sinha2021d2c,batzolis2021conditional,chung2021come}.
-
-
-%%% Local Variables:
-%%% mode: latex
-%%% TeX-master: "main"
-%%% End:
diff --git a/doc/diff.tex b/doc/diff.tex
deleted file mode 100644
index bbc2b98..0000000
--- a/doc/diff.tex
+++ /dev/null
@@ -1,1254 +0,0 @@
-\documentclass[11pt,a4paper]{article}
-%DIF LATEXDIFF DIFFERENCE FILE
-%DIF DEL main_arxiv.tex   Mon Oct 18 09:48:07 2021
-%DIF ADD main_aap.tex     Mon Oct 18 09:47:45 2021
-
-\usepackage[utf8]{inputenc} % allow utf-8 input
-\usepackage[T1]{fontenc}    % use 8-bit T1 fonts
-\usepackage{hyperref}       % hyperlinks
-\usepackage{url}            % simple URL typesetting
-\usepackage{booktabs}       % professional-quality tables
-\usepackage{amsfonts}       % blackboard math symbols
-\usepackage{nicefrac}       % compact symbols for 1/2, etc.
-\usepackage{microtype}      % microtypography
-\usepackage{xcolor}         % colors
-\usepackage{tikz}
-\usepackage{caption}
-\usepackage{float}
-\usetikzlibrary{arrows.meta}
-\usetikzlibrary{calc}
- \input{header}
- \input{def}
- \usepackage{comment}
- \usepackage{authblk}
-%DIF 21d21
-%DIF <  \usepackage{cancel}
-%DIF -------
-\makeatletter
-\renewcommand\AB@affilsepx{, \protect\Affilfont}
-\makeatother
-%  \usepackage{showlabels}
-
-\hypersetup{colorlinks,citecolor=blue}
-
-
-
-\title{Quantitative Uniform Stability of the Iterative Proportional Fitting Procedure}
-
-% The \author macro works with any number of authors. There are two commands
-% used to separate the names and addresses of multiple authors: \And and \AND.
-%
-% Using \And between authors leaves it to LaTeX to determine where to break the
-% lines. Using \AND forces a line break at that point. So, if LaTeX puts 3 of 4
-% authors names on the first line, and the last on the second line, try using
-% \AND instead of \And before the third author name.
-
-% \author{George, Valentin and Arnaud}
-
-\author{George Deligiannidis\thanks{corresponding author: deligian@stats.ox.ac.uk}~, Valentin De Bortoli\thanks{valentin.debortoli@gmail.com}~, Arnaud Doucet\thanks{doucet@stats.ox.ac.uk}}
-\affil{Department of Statistics, University of Oxford, UK}
-% \affil[1]{deligian@stats.ox.ac.uk}
-% \affil[2]{valentin.debortoli@gmail.com}
-% \affil[3]{doucet@stats.ox.ac.uk}
-%DIF PREAMBLE EXTENSION ADDED BY LATEXDIFF
-%DIF UNDERLINE PREAMBLE %DIF PREAMBLE
-\RequirePackage[normalem]{ulem} %DIF PREAMBLE
-\RequirePackage{color}\definecolor{RED}{rgb}{1,0,0}\definecolor{BLUE}{rgb}{0,0,1} %DIF PREAMBLE
-\providecommand{\DIFaddtex}[1]{{\protect\color{blue}\uwave{#1}}} %DIF PREAMBLE
-\providecommand{\DIFdeltex}[1]{{\protect\color{red}\sout{#1}}}                      %DIF PREAMBLE
-%DIF SAFE PREAMBLE %DIF PREAMBLE
-\providecommand{\DIFaddbegin}{} %DIF PREAMBLE
-\providecommand{\DIFaddend}{} %DIF PREAMBLE
-\providecommand{\DIFdelbegin}{} %DIF PREAMBLE
-\providecommand{\DIFdelend}{} %DIF PREAMBLE
-\providecommand{\DIFmodbegin}{} %DIF PREAMBLE
-\providecommand{\DIFmodend}{} %DIF PREAMBLE
-%DIF FLOATSAFE PREAMBLE %DIF PREAMBLE
-\providecommand{\DIFaddFL}[1]{\DIFadd{#1}} %DIF PREAMBLE
-\providecommand{\DIFdelFL}[1]{\DIFdel{#1}} %DIF PREAMBLE
-\providecommand{\DIFaddbeginFL}{} %DIF PREAMBLE
-\providecommand{\DIFaddendFL}{} %DIF PREAMBLE
-\providecommand{\DIFdelbeginFL}{} %DIF PREAMBLE
-\providecommand{\DIFdelendFL}{} %DIF PREAMBLE
-%DIF HYPERREF PREAMBLE %DIF PREAMBLE
-\providecommand{\DIFadd}[1]{\texorpdfstring{\DIFaddtex{#1}}{#1}} %DIF PREAMBLE
-\providecommand{\DIFdel}[1]{\texorpdfstring{\DIFdeltex{#1}}{}} %DIF PREAMBLE
-%DIF LISTINGS PREAMBLE %DIF PREAMBLE
-\RequirePackage{listings} %DIF PREAMBLE
-\RequirePackage{color} %DIF PREAMBLE
-\lstdefinelanguage{DIFcode}{ %DIF PREAMBLE
-%DIF DIFCODE_UNDERLINE %DIF PREAMBLE
-  moredelim=[il][\color{red}\sout]{\%DIF\ <\ }, %DIF PREAMBLE
-  moredelim=[il][\color{blue}\uwave]{\%DIF\ >\ } %DIF PREAMBLE
-} %DIF PREAMBLE
-\lstdefinestyle{DIFverbatimstyle}{ %DIF PREAMBLE
-	language=DIFcode, %DIF PREAMBLE
-	basicstyle=\ttfamily, %DIF PREAMBLE
-	columns=fullflexible, %DIF PREAMBLE
-	keepspaces=true %DIF PREAMBLE
-} %DIF PREAMBLE
-\lstnewenvironment{DIFverbatim}{\lstset{style=DIFverbatimstyle}}{} %DIF PREAMBLE
-\lstnewenvironment{DIFverbatim*}{\lstset{style=DIFverbatimstyle,showspaces=true}}{} %DIF PREAMBLE
-%DIF END PREAMBLE EXTENSION ADDED BY LATEXDIFF
-
-\begin{document}
-
-\maketitle
-
-\begin{abstract}
-  We establish \DIFdelbegin \DIFdel{the uniform in time stability, w.r.t.\ the marginals, }\DIFdelend \DIFaddbegin \DIFadd{that the iterates }\DIFaddend of the Iterative Proportional Fitting Procedure, also known as  Sinkhorn\DIFdelbegin \DIFdel{algorithm, }\DIFdelend \DIFaddbegin \DIFadd{'s algorithm and commonly }\DIFaddend used to solve entropy-regularised Optimal Transport problems\DIFdelbegin \DIFdel{. }\DIFdelend \DIFaddbegin \DIFadd{, are stable w.r.t.\ perturbations of the marginals, uniformly in time. }\DIFaddend Our result is quantitative and stated in terms of the 1-Wasserstein metric. As a corollary we establish a quantitative stability result for Schr\"odinger bridges. 
-\end{abstract}
-
-\section{Introduction}
-\label{sec:introduction}
-
-The basic problem of Optimal Transport (OT) (see \cite{villani2009optimal} for a broad overview),
-in its modern formulation introduced by \cite{kantorovich1942transfer}, 
-is to find a \emph{coupling} of two distributions $\mu, \nu$ that minimises
-\begin{equation}
-    \label{eq:OTproblem}%\tag{$\mathsf{OT}(\mu, \nu)}$}
-    \tag*{\textsf{OT}$(\mu,\nu)$}
-    \inf_{\pi \in \mathcal{C}(\mu, \nu)} \int \DIFdelbegin %DIFDELCMD < \rmd %%%
-\DIFdelend \pi(\DIFaddbegin \rmd \DIFaddend x, \DIFaddbegin \rmd \DIFaddend y) \|x-y\|^2,
-\end{equation}
-where \DIFdelbegin \DIFdel{$\mathcal{C}(\mu,\nu)$ denotes the collection of probability measures with marginals $\mu, \nu$ and the }\DIFdelend \DIFaddbegin \DIFadd{the }\DIFaddend Euclidean distance $\|x-y\|^2$ may be replaced by any other metric or cost function $c(x,y)$. 
-OT provides a theoretical framework for analysis in the space of probability measures and has deep connections with many branches of mathematics including partial differential equations and probability. Beyond its intrinsic interest, OT has recently become an extremely important tool for data science and machine learning, finding numerous applications in fields as diverse as imaging, computer vision, natural language processing \citep{peyre2019computational}.
-
-This ubiquity of OT in modern applications is largely due to the computational tractability of the \emph{Entropy-Regularised} Optimal Transport problem
-\begin{equation}
-    \label{eq:RegOTproblem}\tag*{\textsf{OT}$_\epsilon(\mu,\nu)$}
-    \inf_{\pi \in \mathcal{C}(\mu, \nu)} \int \DIFdelbegin %DIFDELCMD < \rmd %%%
-\DIFdelend \pi(\DIFaddbegin \rmd \DIFaddend x, \DIFaddbegin \rmd \DIFaddend y) \|x-y\|^2 +\epsilon \KL{\pi}{\mu\otimes \nu};
-\end{equation}
-this is closely related to the \emph{static Sch\"odinger bridge},
-a problem going back to \cite{schrodinger1931umkehrung}, see \Cref{eq:schrodinger_bridge} in Section \ref{sec:main-results}.   
-Here $\KL{\pi}{\rho}$ denotes the \emph{Kullback--Leibler divergence} between the probability measures $\pi$ and $\rho$, defined as 
-\begin{equation}
-\KL{\pi}{\rho} := \DIFdelbegin %DIFDELCMD < \begin{cases}
-%DIFDELCMD <                         \int \rmd\pi \log \frac{\rmd \pi}{\rmd \rho}, & \pi \ll \rho,\\
-%DIFDELCMD <                         +\infty, & \text{otherwise}.
-%DIFDELCMD <                         \end{cases}%%%
-\DIFdelend \DIFaddbegin \begin{cases}
-                        \int \pi \log \frac{\rmd \pi}{\rmd \rho}, & \pi \ll \rho\\
-                        +\infty, & \text{otherwise}.
-                        \end{cases}\DIFaddend 
- \end{equation}
-In particular, as explained in the seminal paper of \cite{cuturi2013sinkhorn}, \textsf{OT}$_\epsilon(\mu,\nu)$
-is amenable to \emph{Iterative Proportional Fitting Procedure} (IPFP), also known as the Sinkhorn algorithm when applied to discrete measures. The theoretical properties of IPFP and Sinkhorn algorithm have been the subject of intense investigation, and are therefore fairly well understood. In particular, under technical conditions, that are generally satisfied in compact spaces, it is known that IPFP converges at an exponential rate; see e.g.\ \cite{ruschendorf1995convergence} and for results on discrete measures, see e.g. \cite{altschuler2017near}.
-
-Due to its computational tractability, \textsf{OT}$_\epsilon(\mu,\nu)$ has been used in applications as an approximation to \textsf{OT}$(\mu,\nu)$. Rigorous justification of this approximation has been the subject of intense research recently. Indeed it has been established, see e.g.\  \cite{cominetti1994asymptotic,leonard2012schrodinger,carlier2017convergence}, that as the regularisation parameter $\epsilon \to 0$, the solution of \textsf{OT}$_\epsilon(\mu,\nu)$ converges to that of \textsf{OT}$(\mu,\nu)$.
-
-More recently however \DIFdelbegin \DIFdel{, }\DIFdelend Sch\"odinger bridges and entropy-regularised OT are being studied for their own sake, finding applications in control, computational statistics and machine learning, see e.g.\
-\cite{bernton2019schr,chen2021optimal,corenflos2021differentiable,de2021diffusion, huang2021schrodinger, vargas2021solving}. In these applications, the entropy regularisation may be a desirable feature rather than an approximation, and the main source of error is the fact that the marginal distributions are typically intractable and often approximated by empirical versions. It is then  desirable that as the number of samples increases, this error vanishes. \DIFdelbegin \DIFdel{\textcolor{orange}{A quantitative version of this statement, can then be used to establish for example that the differentiable particle filter proposed in \cite{corenflos2021differentiable}, based on regularised optimal transport, converges as the sample size increases, for any $\epsilon>0$.}
-}%DIFDELCMD < 
-
-%DIFDELCMD < %%%
-\DIFdelend This is the question we study in this paper, in particular we study the stability of the IPFP and of the solution of the corresponding Schr\"odinger bridge problem, with respect to perturbations of the marginals. 
-% One particular motivation for studying this problem is the fact that in typical applications, one only has access to samples from the marginal distributions therefore solving the corresponding OT problem for the result empirical distributions. It is therefore important to know that the resulting transport plan is close to the solution of corresponding problem for the true marginal distributions.\arnaud{repetitive}
-
-For standard OT, a classical argument 
-using compactness  and cyclical monotonicity guarantees a qualitative version of this result, see e.g.\cite[Theorem~5.23, Corollary~5.23]{villani2009optimal}. Quantitative versions of this result appeared much more recently, at least in the case of quadratic costs, in \cite{merigot2020quantitative}, \cite{li2020quantitative}, \cite{delalande2021quantitative}. In particular it is established that the optimal transport plans, or maps in the case of absolutely continuous measures, is H\"older continuous in the marginals, with exponent $1/2$ w.r.t.\ the marginals. 
-It is also known that the exponent $1/2$ is the best possible, see \cite{gigli2011holder}.
-
-For entropy-regularised OT and the static Schr\"odinger bridge problem, the first qualitative result appeared very recently in \cite{ghosal2021stability}, based on a version of cyclical monotonicity for entropy-regularised OT introduced by \cite{bernton2021entropic}. \DIFdelbegin \DIFdel{\textcolor{orange}{In the quantitative direction, \cite{luise2019sinkhorn} establish Lipschitz continuity of the potentials w.r.t.\ the marginals, measured in the total variation metric, which is too strong to capture the situation where the marginals are being approximated by empirical versions. For \emph{smooth} cost functions, \cite{luise2019sinkhorn} also establish that the sample complexity of learning the potentials is of order $n^2$, leveraging results from \cite{genevay2019sample} on the regularity of potentials and the duality between MMD type metrics and Sobolev spaces. However, if one is interested in learning the Sch\"odinger bridge the situation is more complicated; 
-the Wasserstein-1 distance between two couplings is lower bounded by the distance of the marginals and so the results by \cite{fournier2015rate} imply that the sample complexity of learning the Schr\"odinger bridge must scale at least as $n^{d}$.}
-}%DIFDELCMD < 
-
-%DIFDELCMD < %%%
-\DIFdelend We present here the first, to the best of our knowledge, quantitative stability result for entropy-regularised OT. In particular, this follows from a stronger result, namely the uniform in time stability of IPFP, that is the Sinkhorn iterates, with respect to the marginal distributions. One interesting fact is that in contrast to the standard OT problem, the solution of the entropy-regularised problem is Lipschitz continuous, in the Wasserstein metric, w.r.t.\ the marginals. However, as the regularisation parameter $\epsilon$ vanishes, the Lipschitz constant blows up as expected by the H\"older continuity of the OT plan. 
-
-
-% Qualitative for Schr\"odinger bridge \cite{ghosal2021stability}
-
-% Non quantitative -- \cite{ghosal2021stability}
-
-% Theorem 28.9 non quantitative + OT and not regularised OT \cite{villani2009optimal}\george{I think this refers to stability wrt perturbations of the metric space, rather than of the marginals. Theorem 5.23 is for perturbations of marginals}
-
-% Holder OT \cite{gigli2011holder}
-
-
-\section{Notation}
-\label{sec:notation}
-For a metric space $(\msz, d_\msz)$, we write \DIFdelbegin \DIFdel{$\diam_\msz$ }\DIFdelend \DIFaddbegin \DIFadd{$\mathfrak{d}_\msz$ }\DIFaddend for the diameter of $\msz$, that is \DIFdelbegin \DIFdel{$\diam_\msz:= \sup\{d_\msz(z,z'): z,z' \in \msz\}$}\DIFdelend \DIFaddbegin \DIFadd{$d_\msz:= \sup\{d_\msz(z,z'): z,z' \in \msz\}$}\DIFaddend . We also write $\Mens(\msz)$ to denote the space of Borel probability measures on $\msz$, $\Pens(\msz)$ for the subspace of Borel probability measures, and $\Pens_p(\msz)$ the collection of Borel probability measures with finite $p$-th moments.
-For $\pi \in \Pens(\msx)$, we define the \DIFdelbegin \DIFdel{support }\DIFdelend \DIFaddbegin \DIFadd{suport }\DIFaddend of $\pi$ as  $$\mathsf{supp}(\pi):= \left\{A\in \mathcal{B}(\msx): \text{$A$ is closed, $\pi(A^\textsf{c})=0$} \right\}.$$
-For two metric spaces $(\msx, d_\msx)$, $(\msy, d_\msy)$,  $\Mens(\msx\times \msy)$ and $\Pens(\msx\times\msy)$ are always defined with respect to the product $\sigma$-algebra. For a measure $\Pbb\in \Mens(\msx\times\msy)$, we will write $\Pbb_0$, $\Pbb_1$ to denote the first and second marginals respectively. 
-\DIFdelbegin \DIFdel{For $\mu\in \Pens(\msx), \nu \in \Pens(\msy)$, we let 
-}\begin{displaymath}\DIFdel{\mathcal{C}(\mu, \nu) := \{\mathbb{P}\in \Pens(\msx \times \msy): \mathbb{P}_0=\mu,\, \mathbb{P}_1=\nu\}.}\end{displaymath}%DIFAUXCMD
-\DIFdelend 
-
-For a function $f:\msx \mapsto \mathbb{R}^d$, we write $\|f\|_\infty:= \sup_{x\in \msx} \| f(x)\|$,
-where $\|\cdot\|$ denotes the usual Euclidean norm. For a function $f:\msx \mapsto \msy$, we write $\Lip(f)$ for the Lipschitz constant of $f$, that is the best constant $C$ such that 
-$$\ddy(f(x), f(x')) \leq C \ddx(x,x'), \qquad x, x' \in \msx.$$
-Let  
-\begin{align}
-    &\Lip(\msx, \msy) := \{f: \msx \mapsto \msy: \Lip(f)<\infty\},\\
-    &\Lip_1(\msx, \msy) := \{f: \msx \mapsto \msy: \Lip(f)\leq 1\}.
-\end{align}
-We also write $\rmc(\msx, \msy)$ for the class of continuous functions from $\msx$ to $\msy$.
-
-\section{Main results}
-\label{sec:main-results}
-Let $(\msx, \ddx)$ , $(\msy, \ddy)$ be two compact metric spaces and write $\mcx, \mcy$ for their respective Borel $\sigma$-algebras. We will use $d$ to denote the metric for both $\msx, \msy$ when the context allows. 
-Let
-%DIF <  $\diam_\msx = \sup \ensembleLigne{\ddx(x_0, x_1)}{x_0, x_1 \in \msx}$,
-%DIF <  $\diam_\msy = \sup \ensembleLigne{\ddy(y_0, y_1)}{y_0, y_1 \in \msy}$ and let
-\DIFaddbegin \DIFadd{$\diam_\msx = \sup \ensembleLigne{\ddx(x_0, x_1)}{x_0, x_1 \in \msx}$,
-$\diam_\msy = \sup \ensembleLigne{\ddy(y_0, y_1)}{y_0, y_1 \in \msy}$ and let
-}\DIFaddend $\pi_0 \in \Pens(\msx), \pi_1 \in \Pens(\msy)$.  We begin by recalling the
-Iterative Proportional Fitting Procedure (IPFP) solving the following \schro
-bridge problem
-\begin{equation}
-  \label{eq:schrodinger_bridge}
-\Pbb^\star = \argmin \ensembleLigne{\KLLigne{\Pbb}{\Qbb}}{\Pbb \in \Pens(\msx \times \msy) \eqsp , \Pbb_0 = \pi_0 \eqsp , \Pbb_1 = \pi_1} \eqsp ,
-\end{equation}
-where $\Qbb \in \Pens(\msx \times \msy)$ is a reference measure
-admitting a density w.r.t.\ $\rho_0 \otimes \rho_1$, with $\rho_0 \in \Pens(\msx)$ equivalent to $\pi_0$, and $\rho_1\in\Pens(\msy)$, equivalent to $\pi_1$; that is for any
-$(x,y) \in \msx \times \msy$
-\begin{equation}
-  \label{eq:ref_measure}
-  \rmd \Qbb / \rmd (\rho_0 \otimes \rho_1)(x,y) = K(x,y) = \exp[-c(x,y)] \eqsp ,
-\end{equation}
-where we assume that $c \in \Lip(\mcx\times\mcy, \mathbb{R})$\DIFdelbegin \DIFdel{. It }\DIFdelend \DIFaddbegin \DIFadd{;
-it }\DIFaddend is easy to see that \Cref{eq:schrodinger_bridge} with the choice $c(x,y):= \|x-y\|^2/\epsilon$ is equivalent to \ref{eq:RegOTproblem}. The following proposition ensures
-that we can assume without loss of generality that $\rho_0 = \pi_0$ and
-$\rho_1 = \pi_1$.
-\begin{proposition}
-  Let $\Pbb^\star$ solution of \eqref{eq:schrodinger_bridge}
-  with $\Qbb$ given by \eqref{eq:ref_measure} and $\hat{\Pbb}^\star$ the solution of \eqref{eq:schrodinger_bridge}
-  with $\Qbb$ such that for any
-$(x,y) \in \msx \times \msy$
-\begin{equation}
-  \label{eq:ref_measure_duo}
-  \rmd \Qbb / \rmd (\pi_0 \otimes \pi_1)(x,y) = K(x,y) \DIFaddbegin \eqsp \DIFaddend .
-\end{equation}
-Then $\Pbb^\star = \hat{\Pbb}^\star$
-\end{proposition}
-As a consequence, for the rest of this paper, we assume that $\rho_0 = \pi_0$
-and $\rho_1 = \pi_1$.  In order to solve \eqref{eq:schrodinger_bridge} we
-consider the IPFP sequence which iteratively solves each half-bridge problem,
-\ie \ we define $(\Pbb^n)_{n \in \nset}$ such that for any $n \in \nset$
-\begin{align}
-  \label{eq:ipfp}
-  &\Pbb^{2n+1} = \argmin \DIFdelbegin %DIFDELCMD < \ensembleLigne{\KL{\Pbb}{\Pbb^{2n}}}{\Pbb \in \Pens(\msx \times \msy), \Pbb_0 = \pi_0}  %%%
-\DIFdelend \DIFaddbegin \ensembleLigne{\KL{\Pbb}{\Pbb^{2n}}}{\Pbb \in \Pens(\msx \times \msy) \eqsp , \Pbb_0 = \pi_0}  \DIFaddend \eqsp , \\
-  &\Pbb^{2n+2} = \argmin \ensembleLigne{\KL{\Pbb}{\Pbb^{2n+1}}}{\Pbb \in \Pens(\msx \times \msy) \eqsp , \Pbb_1 = \pi_1} \DIFaddbegin \eqsp \DIFaddend ,
-\end{align}
-with $\Pbb^0 = \Qbb$, where recall that $\Pbb_0, \Pbb_1$ denote the marginals of the joint distribution $\Pbb$. Our main result is a quantitative 
-uniform stability estimate for the IPFP.
-
-\begin{theorem}
-  \label{thm:stability_ipfp}
-  For any $\pi_0, \hat{\pi}_0 \in \Pens(\msx)$,
-  $\pi_1, \hat{\pi}_1 \in \Pens(\msy)$ let $(\Pbb^{n})_{n\in \nset}$ and
-  $(\hat{\Pbb}^{n})_{n\in \nset}$ the IPFP sequence with marginals
-  $(\pi_0, \pi_1)$ respectively $(\hat{\pi}_0, \hat{\pi}_1)$. Then \DIFdelbegin \DIFdel{for }\DIFdelend any $n \in \nset$ we have
-  \begin{equation}
-    \wassersteinD[1](\Pbb^n, \hat{\Pbb}^n) \leq C \defEns{\wassersteinD[1](\pi_0, \hat{\pi}_0) + \wassersteinD[1](\pi_1, \hat{\pi}_1)},
-  \end{equation}
-  with
-  \begin{equation}
-    C = \rme^{10 \normLigne{c}_\infty} \{1 + (2\Lip(c)+10) (\diam_\msx + \diam_\msy) \}.
-  \end{equation}
-\end{theorem}
-
-As an immediate consequence of \Cref{thm:stability_ipfp} and the fact that
-the IPFP sequence converges, we obtain the quantitative stability of \schro bridge.
-
-\begin{corollary}
-  \label{thm:stability_schro}
-  For any $\pi_0, \hat{\pi}_0 \in \Pens(\msx)$,
-  $\pi_1, \hat{\pi}_1 \in \Pens(\msy)$ let $\Pbb^\star$, respectively $\hat{\Pbb}^\star$, be
-  the \schro bridge with marginals $(\pi_0, \pi_1)$, respectively
-  $(\hat{\pi}_0, \hat{\pi}_1)$. Then  for any
-  $n \in \nset$ we have
-  \begin{equation}
-    \wassersteinD[1](\Pbb^\star, \hat{\Pbb}^\star) \leq C \defEns{\wassersteinD[1](\pi_0, \hat{\pi}_0) + \wassersteinD[1](\pi_1, \hat{\pi}_1)},
-  \end{equation}
-  with $C$ as in \Cref{thm:stability_ipfp}.
-%   \begin{equation}
-%     C = \rme^{10 \normLigne{c}_\infty} \{1 + (2\Lip(c)+10) (\diam_\msx + \diam_\msy) \}.
-%   \end{equation}
-\end{corollary}
-
-
-\DIFdelbegin %DIFDELCMD < \begin{remark}
-%DIFDELCMD < %%%
-\DIFdel{Although the constants are far from sharp, Lipschitz continuity in the marginals is the best one can hope. In particular this implies a sample complexity of $n^{d}$ for learning the Schr\"odinger bridge, see \mbox{%DIFAUXCMD
-\cite{fournier2015rate}}\hspace{0pt}%DIFAUXCMD
-. 
-}%DIFDELCMD < 
-
-%DIFDELCMD < %%%
-\DIFdel{For any $\mathbb{P}\in \mathcal{C}(\pi_0, \hat{\pi}_0)$, $\mathbb{P}'\in \mathcal{C}(\pi_1, \hat{\pi}_1)$ we have that 
-}\begin{eqnarray*}
-\DIFdel{\wassersteinD[1](\mathbb{P}, \mathbb{P}')
-}&\DIFdel{= \sup_{f \in Lip(\msx\times \msy)} \int f \left[ \rmd \mathbb{P}-\rmd \mathbb{P}' \right]\geq \sup_{f \in Lip(\msx)} \int f \left[ \rmd \mathbb{P}-\rmd \mathbb{P}' \right] = \wassersteinD[1](\pi_0, \hat{\pi}_0),
-}\end{eqnarray*}%DIFAUXCMD
-\DIFdel{and a similar calculation shows that $\wassersteinD[1](\mathbb{P}, \mathbb{P}')\geq \min\{\wassersteinD[1](\pi_0, \hat{\pi}_0), \wassersteinD[1](\pi_1, \hat{\pi}_1)\}$.
- Therefore the sample complexity for learning the Schr\"odinger bridge cannot be better than $n^{d}$. 
-}%DIFDELCMD < \end{remark}
-%DIFDELCMD < 
-
-%DIFDELCMD < %%%
-\DIFdelend % Note that solving
-% \eqref{eq:schrodinger_bridge} is equivalent to solving
-% \begin{equation}
-%   \textstyle{\Pbb^\star = \argmin \ensembleLigne{\int_{\msx \times \msy} c(x,y) \rmd \Pbb(x,y) + \KL{\Pbb}{\pi_0 \otimes \pi_1}}{\Pbb \in \Pens(\msx \times \msy) \eqsp , \Pbb_0 = \pi_0 \eqsp , \Pbb_1 = \pi_1} \eqsp . }
-% \end{equation}
-
-
-\section{Proof}
-\label{sec:proof}
-% \subsection{Sketch of proof}
-% We first give a sketch of the proof to explain the main ideas. The first result we obtained was \Cref{thm:stability_schro} through the following argument: 
-% the solution to \Cref{eq:schrodinger_bridge} will take the form
-% $\Pbb^\ast(\rmd x, \rmd y)= f(x) g(y) K(x,y)\rmd x \rmd y$, where $F=(f,g)$ are fixed points of the IPFP scheme. Abusing notation, let us write $\mathfrak{S}_{\mu,\nu}$  for the map generated by the IPFP iterations with respect to the measures $\mu,\nu$. For two measures $\mu,\nu$ let  $F_{\mu,\nu}$ be the fixed of the map $\mathfrak{S}_{\mu,\nu}$. 
-% Let $\hat\mu, \hat\nu$ be approximations of $\mu,\nu$.
-% We then have 
-% \begin{align}
-%     d_H( F_{\mu,\nu}, F_{\hat \mu, \hat\nu})
-% &=d_H( \mathfrak{S}_{\mu,\nu}F_{\mu,\nu}, \mathfrak{S}_{\hat \mu, \hat\nu}F_{\hat \mu, \hat\nu})\\
-% &\leq d_H( S_{\mu,\nu}F_{\mu,\nu}, \mathfrak{S}_{\hat \mu, \hat\nu}F_{\mu,\nu}) 
-% + d_H( \mathfrak{S}_{\hat \mu, \hat\nu}F_{\mu,\nu}, \mathfrak{S}_{\hat \mu, \hat\nu}F_{\hat \mu, \hat\nu}).
-% \end{align}
-% We know that $\mathfrak{S}_{\mu,\nu}$ is a contraction w.r.t.\ the Hilbert metric, for all $\mu,\nu$ so we have for $\kappa<1$
-% \begin{align}
-%     d_H( F_{\mu,\nu}, F_{\hat \mu, \hat\nu})
-% &\leq d_H( S_{\mu,\nu}F_{\mu,\nu}, \mathfrak{S}_{\hat \mu, \hat\nu}F_{\mu,\nu}) 
-% +\kappa d_H( F_{\mu,\nu}, F_{\hat \mu, \hat\nu})\\
-%  d_H( F_{\mu,\nu}, F_{\hat \mu, \hat\nu}) & \leq \frac{1}{1-\kappa}d_H( S_{\mu,\nu}F_{\mu,\nu}, \mathfrak{S}_{\hat \mu, \hat\nu}F_{\mu,\nu}).
-% \end{align}
-% If we can now show that 
-% $$d_H( \mathfrak{S}_{\mu,\nu} F, \mathfrak{S}_{\hat\mu, \hat\nu} F)\leq C \left[\wassersteinD[1](\mu, \hat\mu)+\wassersteinD[1](\nu, \hat\nu)\right],$$
-% we can conclude that 
-% \begin{align}
-% d_H( F_{\mu,\nu}, F_{\hat \mu, \hat\nu}) & \leq \frac{1}{1-\kappa}
-% C \left[\wassersteinD[1](\mu, \hat\mu)+\wassersteinD[1](\nu, \hat\nu)\right].
-% \end{align}
-
-The proof is divided into four parts. First, we recall that the IPFP \DIFdelbegin \DIFdel{sequence
-}\DIFdelend \DIFaddbegin \DIFadd{sequences
-}\DIFaddend is associated with a sequence of potentials. In \Cref{sec:prop-extens-potent} we
-show quantitative regularity and boundedness properties for these
-potentials. The boundedness is due to a reparameterization by
-\cite{carlier2020differential}. Then, in \Cref{sec:hilb-birkh-metr} we show a
-contraction property as well as useful Lipschitz properties with respect to the
-Hilbert--Birkhoff metric. We then show the uniform quantitative stability of the
-potentials w.r.t.\ \DIFdelbegin \DIFdel{this }\DIFdelend \DIFaddbegin \DIFadd{the Hilbert--Birkhoff }\DIFaddend metric in
-\Cref{sec:quant-unif-bounds}. Finally, in \Cref{sec:from-potent-prob} we show
-how uniform quantitative bounds on the potentials translate into bounds onto
-probability measures which concludes the proof.
-
-\subsection{Regularity properties of the potentials}
-\label{sec:prop-extens-potent}
-
-In this section, we fix $\pi_0 \in \Pens(\msx)$ and $\pi_1 \in \Pens(\msy)$ and
-let $(\Pbb^n)_{n \in \nset}$ the IPFP sequence given by \eqref{eq:ipfp}. The
-IPFP sequence can be described by a corresponding sequence of (measurable)
-potentials $(\tilde f_n, \tilde g_n)_{n \in \nset}$ such that for any $n \in \nset$, 
-$\tilde{f}_n: \ \msx \to \ooint{0,+\infty}$,
-$\tilde{g}_n: \ \msy \to \ooint{0,+\infty}$ and $\tilde{f}_0 = \tilde{g}_0 = 1$.
-
-\begin{proposition}
-  For any $n \in \nset$ and $(x, y) \in \msx \times \msy$ we have
-  \begin{align}
-    &(\rmd \Pbb^{2n} / \rmd \pi_0 \otimes \pi_1)(x,y) = \tilde{f}_n(x) K(x,y) \tilde{g}_n(y) \DIFaddbegin \eqsp \DIFaddend , \\
-    &(\rmd \Pbb^{2n+1} / \rmd \pi_0 \otimes \pi_1)(x,y) = \tilde{f}_{n+1}(x) K(x,y) \tilde{g}_n(y) \DIFaddbegin \eqsp \DIFaddend , \\
-    &\DIFdelbegin \DIFdel{\textstyle{\tilde{f}_{n+1}(x) = \parenthese{\int_{\msy} K(x,y) \tilde{g}_n(y) \rmd \pi_1(y)}^{-1},} }\DIFdelend \DIFaddbegin \DIFadd{\textstyle{\tilde{f}_{n+1}(x) = \parenthese{\int_{\msy} K(x,y) \tilde{g}_n(y) \rmd \pi_1(y)}^{-1} \eqsp ,} }\DIFaddend \\
-    &\DIFdelbegin \DIFdel{\textstyle{\tilde{g}_{n+1}(y) = \parenthese{\int_{\msx} K(x,y) \tilde{f}_{n+1}(x) \rmd \pi_0(x)}^{-1}.} 
-  }\DIFdelend \DIFaddbegin \DIFadd{\textstyle{\tilde{g}_{n+1}(y) = \parenthese{\int_{\msx} K(x,y) \tilde{f}_{n+1}(x) \rmd \pi_0(x)}^{-1} \eqsp .} 
-  }\DIFaddend \end{align}
-\end{proposition}
-For any $n \in \nset$, $a_n > 0$ and $(x,y) \in \msx \times \msy$ we have also
-\begin{equation}
-  (\rmd \Pbb^{2n} / \rmd \pi_0 \otimes \pi_1)(x,y) = (a_n \tilde{f}_n(x)) K(x,y) (\tilde{g}_n(y)/a_n) \DIFaddbegin \eqsp \DIFaddend .
-\end{equation}
-In other words, we can rescale the potentials $\tilde{f}_n$ and $\tilde{g}_n$ and
-still preserve the measure $\Pbb^{2n}$. This observation is at the core of the
-work of \cite{carlier2020differential} which proves the geometric convergence of
-the IPFP w.r.t.\ the $\mathrm{L}^p$ metric.  For any $n \in \nset$, let
-$\tilde{\varphi}_n = \log(f_n)$ and $\tilde{\Psi}_n = \log(g_n)$ and let
-$a_n = \exp[-\int_{\msx} \tilde{\varphi}_n(x) \rmd \pi_0(x)]$. Finally, for any
-$n \in \nset$, let $\varphi_n = \tilde{\varphi}_n + a_n$ and
-$\Psi_n = \tilde{\Psi}_n - a_n$. In particular we have the following proposition.
-
-\begin{proposition}
-  \label{prop:potential_rescale}
-  For any $n \in \nset$ and $(x,y) \in \msx \times \msy$ we have
-  \begin{align}
-    &\textstyle{\varphi_{n+1}(x) = -\log \left\{\int_{\msy} K(x,y) \exp[\Psi_n(y)] \rmd \pi_1(y)\right\} } \\
-     &\qquad \qquad \qquad   \DIFdelbegin \DIFdel{\textstyle{+ \int_{\msx} \log \{ \int_{\msy} K(x,y) \exp[\Psi_n(y)] \rmd \pi_1(y) \} \rmd \pi_0(x), }  }\DIFdelend \DIFaddbegin \DIFadd{\textstyle{+ \int_{\msx} \log \{ \int_{\msy} K(x,y) \exp[\Psi_n(y)] \rmd \pi_1(y) \} \rmd \pi_0(x) \eqsp , }  }\DIFaddend \\
-    &\DIFdelbegin \DIFdel{\textstyle{\Psi_{n+1}(y) = -\log \left\{\int_{\msx} K(x,y) \exp[\varphi_{n+1}(y)] \rmd \pi_0(x)\right\},} }\DIFdelend \DIFaddbegin \DIFadd{\textstyle{\Psi_{n+1}(y) = -\log \left\{\int_{\msx} K(x,y) \exp[\varphi_{n+1}(y)] \rmd \pi_0(x)\right\} \eqsp ,} }\DIFaddend \\
-    &(\rmd \Pbb^{2n}/ \rmd \DIFdelbegin \DIFdel{(}\DIFdelend \pi_0 \otimes \pi_1)\DIFdelbegin \DIFdel{)}\DIFdelend (x,y) = \exp[\varphi_n(x) + \Psi_n(y)] K(x,y) \DIFaddbegin \eqsp \DIFaddend . 
-  \end{align}
-\end{proposition}
-
-Recall that for any $x, y \in \msx \times \msy$ we have $K(x,y) = \exp[-c(x,y)]$.
-Using \cite[Lemma 3.1]{carlier2020differential} we have the following result.
-
-\begin{proposition}
-  \label{prop:bound_0}
-  For any $n \in \nset$ we have
-  $\max(\normLigne{\varphi_n}_{\infty}, \normLigne{\Psi_n}_{\infty}) \leq 3
-  \normLigne{c}_{\infty}$. 
-\end{proposition}
-
-We now establish the Lipschitz property of these potentials under the assumption that the cost function $c$ is Lipschitz; this is automatically satisfied in the case where $c(x,y)=|x-y|^2/\epsilon$ and $\msx,\msy$ are compact, or when $c$ is a metric by the triangle inequality.
-
-
-% % $c \in\Lip(\mcx\times\mcy, \mathbb{R})$
-% \begin{proposition}
-%   \label{prop:bound_1}
-%   Assume that $c \in \rmc^1(\msx \times \msy, \rset)$. Then, for any
-%   $n \in \nset$,
-%   \begin{equation}
-%     \max(\normLigne{\nabla \varphi_n}_\infty, \normLigne{\nabla \Psi_n}_\infty) \leq \norm{\nabla c}_\infty \exp[8
-% \normLigne{c}_\infty] \eqsp . 
-%   \end{equation}
-% \end{proposition}
-
-% \begin{proof}
-%   Using \Cref{prop:potential_rescale}, that
-%   $c \in \rmc^1(\msx \times \msy, \rset)$, $\msx$ and $\msy$ are compact spaces
-%   and the Lebesgue dominated convergence theorem we get that for any
-%   $n \in \nset$, $\varphi_n \in \rmc^1(\msx, \rset)$ and
-%   $\Psi_n \in \rmc^1(\msx, \rset)$. In addition, for any $x \in \msx$ and $n \in \nset$ we have
-%   \begin{equation}
-%     \textstyle{
-%     \nabla \varphi_{n+1}(x) = \int_{\msy} \nabla_x c(x,y) K(x,y) \exp[\Psi_n(y)] \rmd \pi_1(y)  / \int_{\msy}  K(x,y) \exp[\Psi_n(y)] \rmd \pi_1(y) \eqsp .} 
-% \end{equation}
-% For any $(x,y) \in \msx \times \msy$ and $n \in \nset$ we have
-% \begin{align}
-%   K(x,y) \exp[\Psi_n(y)] &\geq \exp[-4 \normLigne{c}_\infty] \eqsp ,\\
-%   \norm{\nabla_x c(x,y) K(x,y) \exp[\Psi_n(y)]} &\leq \norm{\nabla c}_\infty \exp[4 \normLigne{c}_\infty] \eqsp . 
-% \end{align}
-% Therefore, we get that for any $x \in \msx$ and $n \in \nset$ we have
-% $\normLigne{\nabla \varphi_{n+1}(x)} \leq \norm{\nabla c}_\infty \exp[8
-% \normLigne{c}_\infty]$. The proof that for any $y \in \msy$ and $n \in \nset$ we have
-% $\normLigne{\nabla \Psi_{n+1}(y)} \leq \norm{\nabla c}_\infty \exp[8
-% \normLigne{c}_\infty]$, is similar.
-% \end{proof}
-
-\begin{proposition}
-  \label{prop:bound_1}
-  Assume that $c \in\Lip(\mcx\times\mcy, \mathbb{R})$. Then, for any
-  $n \in \nset$,
-  \begin{equation}
-    \max\{ \Lip( \varphi_{n+1}), \Lip(\Psi_{n+1}) \} \leq \Lip( c). 
-  \end{equation}
-\end{proposition}
-
-\begin{proof}
-  Using \Cref{prop:potential_rescale} and the fact that 
-  $c \in \Lip(\msx \times \msy, \rset)$, 
-  we have that 
-  %
-  \begin{align}
-  \lefteqn{\phi_{n+1}(x)-\phi_{n+1}(x')}\\
-  &=\log\left\{  \int_{\msy} K(x',y) \exp[\Psi_n(y)] \rmd \pi_1(y)/ \int_{\msy} K(x,y) \exp[\Psi_n(y)] \rmd \pi_1(y) \right\}\\
-  &= \log \left\{ 
-  \int_{\msy} \exp[-c(x,y) + c(x,y)-c(x',y)+\Psi_n(y)] \rmd \pi_1(y)/ \int_{\msy} \exp[-c(x,y+\Psi_n(y)] \rmd \pi_1(y)\right\} \\
-  &\leq \log \left\{ 
-  \int_{\msy} \exp[-c(x,y) + \Lip(c)\ddx(x,x')+\Psi_n(y)] \rmd \pi_1(y)/ \int_{\msy} \exp[-c(x,y)+\Psi_n(y)] \rmd \pi_1(y)\right\} \\ 
-  &\leq \Lip(c)\ddx (x,x').
-  \end{align}
-  %
-  Similarly we obtain that $\phi_{n+1}(x')-\phi_{n+1}(x)\leq \Lip(c)\ddx (x,x')$, whence it follows that $\Lip(\phi_{n+1})\leq \Lip(c)$.
-  %
-  Similarly we have that 
-  \begin{align}
-     \lefteqn{\Psi_{n+1}(y')-\Psi_{n+1}(y)}\\
-     &= \log \left \{ \int_{\msx} K(x,y) \exp[\varphi_{n+1}(y)] \rmd \pi_0(x) /
-     \int_{\msx} K(x,y') \exp[\varphi_{n+1}(y)] \rmd \pi_0(x)
-     \right\}\\
-     &= \log \left \{ \int_{\msx}  \exp[-c(x,y')+c(x,y')-c(x,y)+\varphi_{n+1}(y)] \rmd \pi_0(x) /
-     \int_{\msx} \exp[-c(x,y')+\varphi_{n+1}(y)] \rmd \pi_0(x)
-     \right\}\\
-     &\leq \log \left \{ \int_{\msx}  \exp[-c(x,y')+\Lip(c)\ddy(y,y')+\varphi_{n+1}(y)] \rmd \pi_0(x) /
-     \int_{\msx} \exp[-c(x,y')+\varphi_{n+1}(y)] \rmd \pi_0(x)
-     \right\}\\
-     &\leq \Lip(c) \ddy(y,y').\qedhere
-  \end{align}
-\end{proof}
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\begin{remark}\label{rem:lipconstant}
-Notice that $\Lip(\phi_{n+1}), \Lip(\Psi_{n+1})$ \DIFdelbegin \DIFdel{are }\DIFdelend \DIFaddbegin \DIFadd{is }\DIFaddend independent of the functions $\phi_n, \Psi_n$ and only \DIFdelbegin \DIFdel{depend }\DIFdelend \DIFaddbegin \DIFadd{depends }\DIFaddend on the properties of the kernel $K(\cdot, \cdot)$. This fact will be useful when we will study the regularity properties of the potentials associated with the solution of \eqref{eq:schrodinger_bridge}. 
-\end{remark}
-In this section the potentials $(\varphi_n)_{n \in \nset}$ and
-$(\Psi_n)_{n \in \nset}$ are defined over $\msx$ and $\msy$
-respectively. However, they only need to be defined on the support of $\pi_0$
-and $\pi_1$ respectively. In what follows we derive similar regularity bounds
-for the potentials associated with the \schro bridge, which as explained may only be defined on the supports of $\pi_0, \pi_1$.
-\begin{proposition}\label{prop:extensions}
-  Suppose that $\phi, \psi$ are the potentials associated with the solution of \eqref{eq:schrodinger_bridge}, that is 
-  $$\Pbb^\star(\rmd x, \rmd y) = \exp[\phi(x)+\psi(y)] K(x,y) \DIFdelbegin %DIFDELCMD < \rmd %%%
-\DIFdelend \pi_0(\DIFaddbegin \rmd \DIFaddend x) \DIFdelbegin %DIFDELCMD < \rmd %%%
-\DIFdelend \pi_1(\DIFaddbegin \rmd \DIFaddend y).$$
-  Then there exist $\tilde{\phi}\in \Lip(\mcx,\rset)$, $\tilde{\psi}\in \Lip(\mcy, \rset)$, such that
-  $(\tilde\phi, \tilde\psi)$ also solve \eqref{eq:schrodinger_bridge}. In particular \DIFdelbegin \DIFdel{$\tilde{\phi}|_{\supp(\pi_0)} = \phi+a$,
-  $\tilde{\psi}|_{\supp(\pi_1)} = \psi-a$,  
-  for some $a\in \mathbb{R}$,   
-  }\DIFdelend \DIFaddbegin \DIFadd{$\tilde{\phi}|_{\supp(\pi_0)} = \phi+c$,
-  $\tilde{\psi}|_{\supp(\pi_1)} = \psi-c$,  
-  }\DIFaddend $\|\tilde{\psi}\|_\infty, \|\tilde\phi\|_\infty\leq 3 \|c\|_\infty$, and 
-  $$\Lip(\tilde{\phi}), \Lip(\tilde{\psi}) \leq \Lip(c).$$
-\end{proposition}
-\begin{proof}
- The potentials $(\phi, \psi)$ corresponding to the solution $\Pbb^\star$ of \Cref{eq:schrodinger_bridge} satisfy the Schr\"odinger equations 
- \begin{equation}
-     \DIFdelbegin %DIFDELCMD < \begin{split}\label{eq:schrodingersystem}
-%DIFDELCMD <          \exp[\phi(x)] \int_\mcy \exp[\psi(x)] K(x,y) \rmd \pi_1( y) &=1, \quad \text{for $\pi_0$-a.e. $x$}, \\
-%DIFDELCMD <          \exp[\psi(x)] \int_\mcy  \exp[\phi(x)] K(x,y) \rmd\pi_0( y) &=1, \quad \text{for $\pi_1$-a.e. $y$}.
-%DIFDELCMD <      \end{split}%%%
-\DIFdelend \DIFaddbegin \begin{split}\label{eq:schrodingersystem}
-         \exp[\phi(x)] \int_\mcy \exp[\psi(x)] K(x,y) \pi_1(\rmd y) &=1, \quad \text{for $\pi_0$-a.e. $x$}, \\
-         \exp[\psi(x)] \int_\mcy  \exp[\phi(x)] K(x,y) \pi_0(\rmd y) &=1, \quad \text{for $\pi_1$-a.e. $y$}.
-     \end{split}\DIFaddend 
- \end{equation}
- First notice that \eqref{eq:schrodingersystem} characterizes the potentials solving \Cref{eq:schrodinger_bridge} up to an additive constant, in the sense that \DIFdelbegin \DIFdel{$(\phi+a, \psi-a)$ }\DIFdelend \DIFaddbegin \DIFadd{$(\phi+c, \psi-c)$ }\DIFaddend is also a solution for any \DIFdelbegin \DIFdel{$a\in \rset$}\DIFdelend \DIFaddbegin \DIFadd{$c\in \rset$}\DIFaddend . 
- Second, notice that a pair of potentials $(\phi, \psi)$ that solves \Cref{eq:schrodinger_bridge} is a fixed point of the IPFP. Indeed, given a solution $(\phi, \psi)$ and letting
- \begin{align}
-    &\textstyle{\tilde\phi(x) := -\log \left\{\int_{\msy} K(x,y) \exp[\psi(y)] \rmd \pi_1(y)\right\} } \\
-     &\qquad \qquad \qquad   \textstyle{+ \int_{\msx} \log \{ \int_{\msy} K(x,y) \exp[\psi(y)] \rmd \pi_1(y) \} \rmd \pi_0(x) \eqsp ,  \quad x\in \mcx} \\
-    &\textstyle{\tilde\psi(y) := -\log \left\{\int_{\msx} K(x,y) \exp[\tilde\phi(y)] \rmd \pi_0(x)\right\} \eqsp , \quad y\in \mcy,} 
-  \end{align}
-  from \Cref{eq:schrodingersystem}, 
- one can easily see that for $x\in \supp(\pi_0)$, \DIFdelbegin \DIFdel{$\phi'(x)=\phi(x)+a$, where 
- }\DIFdelend \DIFaddbegin \DIFadd{$\phi'(x)=\phi(x)+c$, where 
- }\DIFaddend $$\DIFdelbegin \DIFdel{as}\DIFdelend \DIFaddbegin \DIFadd{c}\DIFaddend =\int_{\msx} \log \{ \int_{\msy} K(x,y) \exp[\Psi_n(y)] \rmd \pi_1(y) \} \rmd \pi_0(x) \eqsp ,  \quad x\in \supp(\pi_0),$$
- and therefore also that \DIFdelbegin \DIFdel{$\psi'(y)=\psi(y)-a$ }\DIFdelend \DIFaddbegin \DIFadd{$\psi'(y)=\psi(y)-c$ }\DIFaddend for all $y\in \supp(\pi_1)$. 
-
- Notice however that $\tilde\phi, \tilde\psi$ are well-defined for all $x\in \mcx, y\in \mcy$ respectively. In addition, following \cite[Lemma~3.1]{carlier2021linear} it is easy to verify that $\|\tilde\phi\|_\infty, \|\tilde\psi\|_\infty\leq 3\|c\|_\infty$, where recall that in our context $\|c\|_\infty := \sup_{x\in \mcx, y\in \mcy} |c(x,y)|$. 
-
- The proof of the Lipschitz property is identical to that of \Cref{prop:bound_1}; see \Cref{rem:lipconstant}.
-\end{proof}
-
-
-\subsection{The Hilbert--Birkhoff metric, contraction and Lipschitz properties}
-\label{sec:hilb-birkh-metr}
-
-We now recall basic properties of the Hilbert--Birkhoff metric. We refer to
-\cite{lemmens2013birkhoff,kohlberg1982contraction,bushell1973hilbert} for a
-review of the Hilbert--Birkhoff metric. Let $\mse$ be a real vector space and
-$\msk$ a cone in this vector space, \ie \ \DIFdelbegin \DIFdel{$\msk$ is convex, }\DIFdelend $\msk \cap (-\msk) = \{0\}$\DIFdelbegin \DIFdel{and $\lambda \msk \subset \msk$ }\DIFdelend \DIFaddbegin \DIFadd{, }\DIFaddend for any
-$\lambda \geq 0$ \DIFaddbegin \DIFadd{$\lambda \msk \subset \msk$ and $\msk$ is convex}\DIFaddend . In what
-follows, we let $\msc$ be a part of the cone \ie \ for any $x, y \in \msc$ there
-exist $\alpha, \beta \geq 0$ such that $\alpha x - y \in \msk$ and
-$\beta y -\alpha \in \msk$. In addition, assume that $\msc$ is convex and that
-for any $\lambda > 0$ $\lambda \msk \subset \msk$. In this case we have for
-any $x, y \in \msc$ that
-\begin{equation}
-  M(x,y) = \inf \ensembleLigne{\beta \geq 0}{\beta y - x \in \msk} > \DIFdelbegin \DIFdel{0. 
-}\DIFdelend \DIFaddbegin \DIFadd{0 }\eqsp \DIFadd{. 
-}\DIFaddend \end{equation}
-Similarly we define for any $x, y \in \msc$
-\begin{equation}
-  m(x,y) = \sup \ensembleLigne{\alpha \geq 0}{x - \alpha \in \msk} \DIFaddbegin \eqsp \DIFaddend . 
-\end{equation}
-Note that $m(x,y) = M(y,x)^{-1} > 0$. Finally, the Hilbert--Birkhoff metric is
-defined for any $x, y \in \msc$ by
-\begin{equation}
-  d_H(x,y) = \log(M(x,y) / m(x,y)) \DIFaddbegin \eqsp \DIFaddend . 
-\end{equation}
-By \cite[Lemma 2.1]{lemmens2013birkhoff}, $d_H$ is a metric on $\msc/\sim$ the
-space $\msc$ quotiented by the equivalence relation: $x \sim y$ if there exists
-$\lambda > 0$ such that $y = \lambda x$. In particular, if $\normLigne{\cdot}$
-is a norm on $\msv$ then letting
-$\tilde{\msc} = \ensembleLigne{x \in \msc}{\normLigne{x}=1}$, we have that
-$(\tmsc,d_H)$ is a metric space.
-
-Let $(\msv, \normLigne{\cdot})$ and $(\msv', \normLigne{\cdot}')$ be two normed
-real vector space and $\msk \subset \msv$, $\msk' \subset \msv'$ two cones. In
-addition, let $\msc$ and $\msc'$ be convex parts of $\msk$ and 
-$\msk'$ respectively, such that for any $\lambda > 0$, $\lambda \msc \subset \msc$ and
-$\lambda \msc' \subset \msc'$. Let $u: \ \msv \to \msv'$ be a linear mapping
-such that $u(\msc) \subset \msc'$. The projective diameter of $u$ is given by
-\begin{equation}
-  \Delta(u) = \sup \ensembleLigne{d_H(u(x), u(y))}{x, y \in \tilde{\msc}} \DIFaddbegin \eqsp \DIFaddend . 
-\end{equation}
-Similarly, we also define the Birkhoff contraction ratio of $u$
-\begin{equation}
-  \kappa(u) = \sup \ensembleLigne{\kappa}{d_H(u(x), u(y)) \leq \kappa d_H(x,y), \eqsp x,y \in \tmsc} \DIFaddbegin \eqsp \DIFaddend . 
-\end{equation}
-Using the Birkhoff contraction theorem \citep{birkhoff1957extensions,bauer1965elementary,hopf1963inequality} we have that
-\begin{equation}
-  \label{eq:uno}
-  \kappa(u) \leq \tanh(\Delta(u)/4) \DIFaddbegin \eqsp \DIFaddend . 
-\end{equation}
-
-In order to use the Birkhoff contraction theorem, we collect the following basic
-facts on cones in function spaces.
-\begin{proposition}
-  \label{prop:hilbert_birkhoff}
-  Let $\msx$ be a compact space. $\msf = \coint{0,+\infty}^\msx$ is a cone and
-  $\tmsf = \rmc(\msx, \ooint{0,+\infty})$ is a convex part of $\msf$ such that
-  for any $\lambda > 0$, $\lambda \tmsf \subset \tmsf$. In addition, we have
-  that for any $f, g \in \tmsf$
-  \begin{equation}
-    d_H(f,g) = \log(\normLigne{f/g}_{\infty}) + \log(\normLigne{g/f}_{\infty}) \DIFaddbegin \eqsp \DIFaddend . 
-  \end{equation}
-\end{proposition}
-
-In what follows, we introduce key mappings which allow us to compute the IPFP
-potential $(f_n)_{n \in \nset}$ and $(g_n)_{n \in \nset}$, similarly to
-\cite{chen2016entropic}. Recall that for any $n \in \nset$ we have 
-\begin{align}
-  \label{eq:potentials_rescale_form}
-  &\DIFdelbegin \DIFdel{\textstyle{f_{n+1}(x) = a_n \parenthese{\int_{\msy} K(x,y) g_n(y) \rmd \nu_1(y)}^{-1},} }\DIFdelend \DIFaddbegin \DIFadd{\textstyle{f_{n+1}(x) = a_n \parenthese{\int_{\msy} K(x,y) g_n(y) \rmd \nu_1(y)}^{-1}  \eqsp ,} }\DIFaddend \\
-  &\textstyle{a_n = \exp[\int_{\msx} \log \parenthese{\int_{\msy} K(x,y) g_n(y) \rmd \nu_1(y)} \rmd \pi_0(x)]} \DIFaddbegin \eqsp \DIFaddend , \\
-    &\DIFdelbegin \DIFdel{\textstyle{g_{n+1}(y) = \parenthese{\int_{\msx} K(x,y) f_{n+1}(x) \rmd \pi_0(x)}^{-1}.} 
-}\DIFdelend \DIFaddbegin \DIFadd{\textstyle{g_{n+1}(y) = \parenthese{\int_{\msx} K(x,y) f_{n+1}(x) \rmd \pi_0(x)}^{-1} \eqsp .} 
-}\DIFaddend \end{align}
-Let $\pi_0 \in \Pens(\msx)$ and $\nu_1 \in \Pens(\msy)$. We define
-$\mce_{\pi_0}^x$ and $\mce_{\nu_1}^y$ such that for any
-$f: \ \msx \to \coint{0,+\infty}$ and $g: \ \msy \to \coint{0,+\infty}$ we have
-\begin{equation}
-  \DIFdelbegin \DIFdel{\textstyle{\mce_{\pi_0}^x(f)(y) = \int_{\msx} K(x,y) f(x) \rmd \pi_0(x) \eqsp , \quad \mce_{\nu_1}^y(g)(x) = \int_{\msy} K(x,y) g(y) \rmd \nu_1(y).}
-}\DIFdelend \DIFaddbegin \DIFadd{\textstyle{\mce_{\pi_0}^x(f)(y) = \int_{\msx} K(x,y) f(x) \rmd \pi_0(x) \eqsp , \quad \mce_{\nu_1}^y(g)(x) = \int_{\msy} K(x,y) g(y) \rmd \nu_1(y) \eqsp .}
-}\DIFaddend \end{equation}
-The following proposition is a consequence of the Birkhoff contraction theorem,
-see also \cite{chen2016entropic}.
-
-\begin{proposition}
-  \label{prop:birkhoff_contraction}
-  For any $\nu_0 \in \Pens(\msx)$ and $\nu_1 \in \Pens(\msy)$, $\mce_{\nu_0}^x(\rmc(\msx, \ooint{0,+\infty})) \subset \Lip(\msy, \ooint{0,+\infty})$ and $\mce_{\nu_1}^y(\rmc(\msy, \ooint{0,+\infty})) \subset \Lip(\msx, \ooint{0,+\infty})$. In addition, we have
-  \begin{equation}
-    \max(\kappa(\mce_{\nu_0}^x), \kappa(\mce_{\nu_1}^y)) \leq \tanh(\norm{c}_\infty) \DIFaddbegin \eqsp \DIFaddend . 
-  \end{equation}
-\end{proposition}
-
-\begin{proof}
-  Let $\nu_0 \in \Pens(\msx)$. Since
-  $K:\ \msx \times \msy \to \ooint{0,+\infty}$ is continuous and
-  $\msx \times \msy$ is compact we get that for any
-  $f \in \rmc(\msx, \ooint{0,+\infty})$,
-  $\mce_{\nu_0}^x(f) \in \rmc(\msy, \ooint{0,+\infty})$. In addition, let
-  $u \in \rmc(\msy, \ooint{0,+\infty})$ such that for any $y \in \msy$,
-  $u(y) = 1$. Then, we have that for any $f, g \in \rmc(\msx, \ooint{0,+\infty})$
-  \begin{equation}
-    \label{eq:dos}
-    \Delta(\mce_{\nu_0}^x) \leq 2 \sup \ensembleLigne{d_H(\mce_{\nu_0}^x(f), u)}{f \in \rmc(\msx, \ooint{0,+\infty})} \DIFaddbegin \eqsp \DIFaddend . 
-  \end{equation}
-  In addition, using \Cref{prop:hilbert_birkhoff}, we have for any $f \in \rmc(\msx, \ooint{0,+\infty})$ 
-  \begin{equation}
-    \label{eq:tres}
-    d_H(\mce_{\nu_0}^x(f), u) = \log(\sup \ensembleLigne{\mce_{\nu_0}^x(f)(y)}{y \in \msy}) - \log(\inf \ensembleLigne{\mce_{\nu_0}^x(f)(y)}{y \in \msy}) \DIFaddbegin \eqsp \DIFaddend . 
-  \end{equation}
-  For any $f \in \rmc(\msx, \ooint{0,+\infty})$ and $y \in \msy$ we have
-  \begin{equation}
-    \DIFdelbegin \DIFdel{\textstyle{\mce_{\nu_0}^x(f)(y) \geq \exp[-\norm{c}_\infty] \int_{\msx} f(x) \rmd \nu_0(x), \quad \mce_{\nu_0}^x(f)(y) \leq \exp[\norm{c}_\infty] \int_{\msx} f(x) \rmd \nu_0(x).}
-  }\DIFdelend \DIFaddbegin \DIFadd{\textstyle{\mce_{\nu_0}^x(f)(y) \geq \exp[-\norm{c}_\infty] \int_{\msx} f(x) \rmd \nu_0(x) \eqsp , \quad \mce_{\nu_0}^x(f)(y) \leq \exp[\norm{c}_\infty] \int_{\msx} f(x) \rmd \nu_0(x) \eqsp .}
-  }\DIFaddend \end{equation}
-  Combining this result \DIFdelbegin \DIFdel{, }\DIFdelend \eqref{eq:uno}, \eqref{eq:dos} and \eqref{eq:tres} we
-  get that $\Delta(\mce_{\nu_0}^x) \leq \tanh(\norm{c}_\infty)$. The proof that
-  $\Delta(\mce_{\nu_1}^y) \leq \tanh(\norm{c}_\infty)$ is similar.
-
-  Lipschitz continuity follows easily from the definitions of $\mce_{\nu_0}^x, \mce_{\nu_1}^y$ and the Lipschitz continuity of $K$. 
-  In fact, for any function $f$, resp.\ $g$, that does not vanish $\nu_0$ a.e., resp.\ $\nu_1$-a.e., $y\mapsto \mce_{\nu_0}^x(f)(y)$, resp. $x\mapsto \mce_{\nu_0}^x(g)(x)$, is Lipshitz continuous.
-\end{proof}
-
-\begin{proposition}
-  \label{prop:bound_wass_mce}
-  Let $\nu_0, \hat{\nu}_0 \in \Pens(\msx)$ and
-  $\nu_1, \hat{\nu}_1 \in \Pens(\msy)$. Then for any 
-  $f \in \Lip(\msx, \ooint{0,+\infty})$ and
-  $g \in \Lip(\msy, \ooint{0,+\infty})$ we have 
-  \begin{align}
-    &d_H(\mce^x_{\nu_0}(f), \mce^x_{\hat{\nu}_0}(f)) \leq 2 \DIFdelbegin \DIFdel{\textcolor{red}{\cancel{\norm{1/f}_\infty}} }\DIFdelend \norm{1/f}_\infty \DIFaddbegin \norm{1/f}\DIFadd{_\infty }\DIFaddend \left[\Lip(f)  + \Lip(c)  \norm{f}_\infty\right]\exp[2\norm{c}_\infty] \wassersteinD[1](\nu_0, \hat{\nu}_0) \DIFaddbegin \eqsp \DIFaddend , \\
-    &d_H(\mce^y_{\nu_1}(g), \mce^y_{\hat{\nu}_1}(g)) \leq 2 \norm{1/g}_\infty \DIFdelbegin \DIFdel{\textcolor{red}{\cancel{\norm{1/f}_\infty}} }\DIFdelend \DIFaddbegin \norm{1/f}\DIFadd{_\infty }\DIFaddend \left[\Lip(g)  + \Lip(c)  \norm{g}_\infty\right]\exp[2\norm{c}_\infty]\DIFdelbegin \DIFdel{\textcolor{red}{\cancel{\exp[\norm{c}_\infty]}} }\DIFdelend \DIFaddbegin \DIFadd{\exp[\norm{c}_\infty] }\DIFaddend \wassersteinD[1](\nu_1, \hat{\nu}_1) 
-    \DIFaddbegin \eqsp \DIFaddend . 
-  \end{align}
-\end{proposition}
-
-\begin{proof}
-  Let $f \in \Lip(\msx, \ooint{0,+\infty})$. We have
-  \begin{equation}\DIFdelbegin %DIFDELCMD < \label{eq:decomporatio}
-%DIFDELCMD <     %%%
-\DIFdel{\textstyle{\mce_{\nu_0}^x(f)(y)/\mce_{\hat{\nu}_0}^x(f)(y) = 1 + \int_{\msx} K(x,y) f(x) \rmd (\nu_0 -  \hat{\nu}_0)(x) / \int_{\msx} K(x,y) f(x) \rmd \hat{\nu}_0(x). }
-  }\DIFdelend 
-    \DIFaddbegin \DIFadd{\textstyle{\mce_{\nu_0}^x(f)(y)/\mce_{\hat{\nu}_0}^x(f)(y) = 1 + \int_{\msx} K(x,y) f(x) \rmd (\nu_0 -  \hat{\nu}_0)(x) / \int_{\msx} K(x,y) f(x) \rmd \hat{\nu}_0(x) \eqsp . }
-  }\DIFaddend \end{equation}
-In addition, we have for any $x, x' \in \msx, , y \in \msy$
-\begin{align}
-    |K(x,y)f(x) - K(x',y) f(x')|
-    &\leq |K(x,y) f(x)- K(x',y)f(x)| + |K(x',y) f(x) - K(x',y) f(x')|\\
-    &\leq \|f\|_\infty \Lip(K(y,\cdot)) \ddx(x,x')  
-    + \|K(\cdot, \cdot)\|_\infty \Lip(f) \ddx(x,x').
-\end{align}
-Since $K(x,y)=\exp[-c(x,y)]$, using the fact that for $|s|,|t|<M$ we have $|\exp(s)-\exp(t)|\leq \exp(M)|t-s|$, we have that 
-$$|K(x',y)-K(x,y)| \leq \exp(\|c\|_\infty) |c(x',y)-c(x,y)| \leq
- \exp(\|c\|_\infty) \Lip(c)\ddx(x,x').$$
- Therefore we have that for all $y\in\mcy$
- $$\Lip[ K(\cdot, y) f(\cdot)]\leq \|f\|_\infty \exp(\|c\|_\infty) \Lip(c) + \exp(\|c\|_\infty) \Lip(f).$$
-%
-Using this result we get that
-\begin{equation}
-  \label{eq:numerator}
-  \DIFdelbegin \DIFdel{\textstyle{\abs{\int_{\msx} K(x,y) f(x) \rmd (\nu_0 -  \hat{\nu}_0)(x)} \leq \left[\Lip(f)  + \Lip(c)  \norm{f}_\infty\right]\exp[\norm{c}_\infty] \wassersteinD[1](\nu_0, \hat{\nu}_0).}
-}\DIFdelend \DIFaddbegin \DIFadd{\textstyle{\abs{\int_{\msx} K(x,y) f(x) \rmd (\nu_0 -  \hat{\nu}_0)(x)} \leq \left[\Lip(f)  + \Lip(c)  \norm{f}_\infty\right]\exp[\norm{c}_\infty] \wassersteinD[1](\nu_0, \hat{\nu}_0) \eqsp .}
-}\DIFaddend \end{equation}
-In addition, we have that for any $y \in \msy$
-\begin{equation}
-  \label{eq:denominator}
-  \DIFdelbegin \DIFdel{\textstyle{\int_{\msx} K(x,y) f(x) \rmd x \geq \exp[-\norm{c}_\infty] / \norm{1/f}_\infty. }
-}\DIFdelend \DIFaddbegin \DIFadd{\textstyle{\int_{\msx} K(x,y) f(x) \rmd x \geq \exp[-\norm{c}_\infty] / \norm{1/f}_\infty \eqsp . }
-}\DIFaddend \end{equation}
-Combining \DIFdelbegin %DIFDELCMD < \eqref {%%%
-\DIFdel{eq:decomporatio}%DIFDELCMD < }%%%
-\DIFdel{, }\DIFdelend \eqref{eq:numerator} and \eqref{eq:denominator} we get that for any $y \in \msy$
-\begin{align}
-  \label{eq:upper_bound_left}
-  &\textstyle{\mce_{\nu_0}^x(f)(y)/\mce_{\hat{\nu}_0}^x(f)(y)  }\\
-  & \qquad \DIFdelbegin \DIFdel{\textstyle{\leq 1 + \norm{1/f}_\infty \left[\Lip(f)  + \Lip(c)  \norm{f}_\infty\right]\exp[2\norm{c}_\infty]\wassersteinD[1](\nu_0, \hat{\nu}_0). } 
-}\DIFdelend \DIFaddbegin \DIFadd{\textstyle{\leq 1 + \norm{1/f}_\infty \left[\Lip(f)  + \Lip(c)  \norm{f}_\infty\right]\exp[2\norm{c}_\infty]\wassersteinD[1](\nu_0, \hat{\nu}_0) \eqsp . } 
-}\DIFaddend \end{align}
-Similarly, we have
-\begin{align}
-  \label{eq:upper_bound_right}
-  &\textstyle{\mce_{\hat{\nu_0}}^x(f)(y)/\mce_{\nu_0}^x(f)(y)  }\\
-  & \qquad \DIFdelbegin \DIFdel{\textstyle{\leq 1 + \norm{1/f}_\infty \left[\Lip(f)  + \Lip(c)  \norm{f}_\infty\right]\exp[2\norm{c}_\infty] \wassersteinD[1](\nu_0, \hat{\nu}_0). } 
-}\DIFdelend \DIFaddbegin \DIFadd{\textstyle{\leq 1 + \norm{1/f}_\infty \left[\Lip(f)  + \Lip(c)  \norm{f}_\infty\right]\exp[2\norm{c}_\infty] \wassersteinD[1](\nu_0, \hat{\nu}_0) \eqsp . } 
-}\DIFaddend \end{align}
-Combining \Cref{prop:hilbert_birkhoff}, \eqref{eq:upper_bound_left},
-\eqref{eq:upper_bound_right} and the fact that for any $t \geq 0$,
-$\log(1 + t) \leq t$ we get that
-\begin{equation}
-  d_H(\mce_{\nu_0}^x(f), \mce_{\hat{\nu}_0}^x(f)) \leq 2 \norm{1/f}_\infty \left[\Lip(f)  + \Lip(c)  \norm{f}_\infty\right]\exp[2\norm{c}_\infty]\wassersteinD[1](\nu_0, \hat{\nu}_0) \DIFaddbegin \eqsp \DIFaddend .
-\end{equation}
-The proof for $d_H(\mce^y_{\nu_1}(g), \mce^y_{\hat{\nu}_1}(g))$ is similar.
-\end{proof}
-
-% \begin{proposition}
-%   \label{prop:bound_wass_mce}
-%   Let $\nu_0, \hat{\nu}_0 \in \Pens(\msx)$ and
-%   $\nu_1, \hat{\nu}_1 \in \Pens(\msy)$. Then for any 
-%   $f \in \rmc^1(\msx, \ooint{0,+\infty})$ and
-%   $g \in \rmc^1(\msy, \ooint{0,+\infty})$ we have 
-%   \begin{align}
-%     &d_H(\mce^x_{\nu_0}(f), \mce^x_{\hat{\nu}_0}(f)) \leq 2 \norm{1/f}_\infty (\norm{\nabla f}_\infty  + \norm{\nabla c}_\infty  \norm{f}_\infty)\exp[\norm{c}_\infty] \wassersteinD[1](\nu_0, \hat{\nu}_0) \eqsp , \\
-%     &d_H(\mce^y_{\nu_1}(g), \mce^y_{\hat{\nu}_1}(g)) \leq 2 \norm{1/g}_\infty (\norm{\nabla g}_\infty  + \norm{\nabla c}_\infty  \norm{g}_\infty)\exp[\norm{c}_\infty] \wassersteinD[1](\nu_1, \hat{\nu}_1) 
-%     \eqsp . 
-%   \end{align}
-% \end{proposition}
-
-% \begin{proof}
-%   Let $f \in \rmc^1(\msx, \ooint{0,+\infty})$. We have
-%   \begin{equation}
-%     \textstyle{\mce_{\nu_0}^x(f)(y)/\mce_{\hat{\nu}_0}^x(f)(y) = 1 + \int_{\msx} K(x,y) f(x) \rmd (\nu_0 -  \hat{\nu}_0)(x) / \int_{\msx} K(x,y) f(x) \rmd \hat{\nu}_0(x) \eqsp . }
-%   \end{equation}
-% In addition, we have for any $x, y \in \msx \times \msy$
-% \begin{equation}
-%   \norm{\nabla_x K(x,y) f(x) } \leq (\norm{\nabla f}_\infty  + \norm{\nabla c}_\infty  \norm{f}_\infty)\exp[\norm{c}_\infty] \eqsp . 
-% \end{equation}
-% Using this result we get that
-% \begin{equation}
-%   \label{eq:numerator}
-%   \textstyle{\abs{\int_{\msx} K(x,y) f(x) \rmd (\nu_0 -  \hat{\nu}_0)(x)} \leq (\norm{\nabla f}_\infty  + \norm{\nabla c}_\infty  \norm{f}_\infty)\exp[\norm{c}_\infty] \wassersteinD[1](\nu_0, \hat{\nu}_0) \eqsp .}
-% \end{equation}
-% In addition, we have that for any $y \in \msy$
-% \begin{equation}
-%   \label{eq:denominator}
-%   \textstyle{\int_{\msx} K(x,y) f(x) \rmd x \geq \exp[-\norm{c}_\infty] / \norm{1/f}_\infty \eqsp . }
-% \end{equation}
-% Combining \eqref{eq:numerator} and \eqref{eq:denominator} we get that for any $y \in \msy$
-% \begin{align}
-%   \label{eq:upper_bound_left}
-%   &\textstyle{\mce_{\nu_0}^x(f)(y)/\mce_{\hat{\nu}_0}^x(f)(y)  }\\
-%   & \qquad \textstyle{\leq 1 + \norm{1/f}_\infty (\norm{\nabla f}_\infty  + \norm{\nabla c}_\infty  \norm{f}_\infty)\exp[\norm{c}_\infty] \wassersteinD[1](\nu_0, \hat{\nu}_0) \eqsp . } 
-% \end{align}
-% Similarly, we have
-% \begin{align}
-%   \label{eq:upper_bound_right}
-%   &\textstyle{\mce_{\hat{\nu_0}}^x(f)(y)/\mce_{\nu_0}^x(f)(y)  }\\
-%   & \qquad \textstyle{\leq 1 + \norm{1/f}_\infty (\norm{\nabla f}_\infty  + \norm{\nabla c}_\infty  \norm{f}_\infty)\exp[\norm{c}_\infty] \wassersteinD[1](\nu_0, \hat{\nu}_0) \eqsp . } 
-% \end{align}
-% Combining \Cref{prop:hilbert_birkhoff}, \eqref{eq:upper_bound_left},
-% \eqref{eq:upper_bound_right} and the fact that for any $t \geq 0$,
-% $\log(1 + t) \leq t$ we get that
-% \begin{equation}
-%   d_H(\mce_{\nu_0}^x(f), \mce_{\hat{\nu}_0}^x(f)) \leq 2 \norm{1/f}_\infty (\norm{\nabla f}_\infty  + \norm{\nabla c}_\infty  \norm{f}_\infty)\exp[\norm{c}_\infty] \wassersteinD[1](\nu_0, \hat{\nu}_0) \eqsp .
-% \end{equation}
-% The proof for $d_H(\mce^y_{\nu_1}(g), \mce^y_{\hat{\nu}_1}(g))$ is similar.
-% \end{proof}
-
-
-% DOES NOT SEEM TO BE NECESSARY
-% Let $\rmD^x: \ \rmc(\msx, \ooint{0,+\infty}) \to \rmc(\msx, \ooint{0,+\infty})$
-% and $\rmD^y: \ \rmc(\msy, \ooint{0,+\infty}) \to \rmc(\msy, \ooint{0,+\infty})$
-% such that for any $f \in \rmc(\msx, \ooint{0,+\infty})$ and
-% $g \in \rmc(\msy, \ooint{0,+\infty})$ we have $\rmD^x(f) = 1/f$ and
-% $\rmD^y(g) = 1/g$. We finish this section with the following technical lemma.
-
-% \begin{lemma}
-%   Let $f \in \rmc(\msx, \ooint{0,+\infty})$ and
-%   $g \in \rmc(\msy, \ooint{0,+\infty})$.  Let
-%   $\tilde{f} : \ \msy \to \ooint{0,+\infty}$ and
-%   $\tilde{g} : \ \msx \to \ooint{0,+\infty}$ such that
-%   \begin{equation}
-%     \tilde{f} = \rmD^y \circ \mce_{\nu_0}^x(f) \eqsp , \qquad \tilde{g} = \rmD^x \circ \mce_{\nu_1}^y(g) \eqsp .
-%   \end{equation}
-%   Then, $\tilde{f} \in \rmc^1(\msy, \ooint{0,+\infty})$ and
-%   $\tilde{g} \in \rmc^1(\msx, \ooint{0,+\infty})$. In addition, we have
-%   \begin{align}
-%     &\textstyle{\normLigne{\tilde{f}}_\infty \leq \normLigne{1/f}_\infty \rme^{\normLigne{c}_\infty} \eqsp , \quad \normLigne{\tilde{f}}_\infty \geq  \rme^{-\normLigne{c}_\infty} / \normLigne{1/f}_\infty \eqsp ,} \\
-%     &\textstyle{\normLigne{\nabla \tilde{f}}_\infty \leq \normLigne{\nabla c}_\infty \normLigne{f}_\infty \normLigne{1/f}_\infty \rme^{3 \normLigne{c}_\infty} \eqsp ,} \\
-%     &\textstyle{\normLigne{\tilde{g}}_\infty \leq \normLigne{1/g}_\infty \rme^{\normLigne{c}_\infty} \eqsp , \quad \normLigne{\tilde{g}}_\infty \geq  \rme^{-\normLigne{c}_\infty} / \normLigne{1/g}_\infty\eqsp ,} \\
-%     &\textstyle{\normLigne{\nabla \tilde{g}}_\infty \leq \normLigne{\nabla c}_\infty \normLigne{g}_\infty \normLigne{1/g}_\infty \rme^{3 \normLigne{c}_\infty} \eqsp .}     
-%   \end{align}
-% \end{lemma}
-
-% \begin{proof}
-%   For any $y \in \msy$ we have
-%   \begin{equation}
-%     \label{eq:tilde_f}
-%     \textstyle{\tilde{f}(y) = ( \int_{\msx} K(x,y) f(x) \rmd \nu_0(x))^{-1} \eqsp . }
-%   \end{equation}
-%   Using this result, the dominated convergence theorem that
-%   $K \in \rmc^1(\msx \times \msy, \ooint{0,+\infty})$ we get that
-%   $\tilde{f} \in \rmc^1(\msy, \ooint{0,+\infty})$ and that for any $y \in \msy$
-%   we have
-%   \begin{equation}
-%     \label{eq:grad_tilde_f}
-%     \textstyle{\nabla \tilde{f}(y) = -\int_{\msx} \nabla_y K(x,y) f(x) \rmd \nu_0(x) / (\int_{\msx} K(x,y) f(x) \rmd \nu_0(x))^2 \eqsp . }
-%   \end{equation}
-%   Using \eqref{eq:tilde_f} we have that
-%   \begin{equation}
-%     \textstyle{\normLigne{\tilde{f}}_\infty \leq \normLigne{1/f}_\infty \rme^{\normLigne{c}_\infty} \eqsp , \qquad \normLigne{1/\tilde{f}}_\infty \geq 1/\normLigne{f}_\infty \rme^{-\normLigne{c}_\infty} \eqsp ,}
-%   \end{equation}
-%   Similarly, using \eqref{eq:grad_tilde_f} we have
-%   \begin{equation}
-%     \textstyle{\normLigne{\nabla \tilde{f}}_\infty \leq \normLigne{\nabla c}_\infty \normLigne{f}_\infty \normLigne{1/f}_\infty \rme^{3 \normLigne{c}_\infty} \eqsp .}
-%   \end{equation}
-%   The proof for $\tilde{g}$ is similar.
-% \end{proof}
-
-\subsection{Quantitative uniform bounds on the potentials}
-\label{sec:quant-unif-bounds}
-
-In this section, we derive quantitative uniform bounds on the potentials w.r.t.\ 
-the Hilbert--Birkhoff metric. More precisely, we show the following theorem.
-
-\begin{theorem}
-  \label{thm:contract_hilbert}
-  Let $(f_n)_{n \in \nset}$ and $(\hat{f}_n)_{n \in \nset}$ be given by
-  \eqref{eq:potentials_rescale_form} w.r.t.\  $\pi_0, \pi_1$ and
-  $\hat{\pi}_0, \hat{\pi}_1$ respectively. Then, for any $n \in \nset$ we have
-  \begin{equation}
-    d_H(f_ng_n, \hat{f}_n\hat{g}_n) \leq 8 \Lip(c) \rme^{10 \norm{c}_\infty} (\wassersteinD[1](\pi_0, \hat{\pi}_0) + \wassersteinD[1](\pi_1, \hat{\pi}_1)) \DIFaddbegin \eqsp \DIFaddend . 
-  \end{equation}
-\end{theorem}
-
-\begin{proof}
-  Let $n \in \nset$,
-  $\rmD^x: \ \rmc(\msx, \ooint{0,+\infty}) \to \rmc(\msx, \ooint{0,+\infty})$
-  and
-  $\rmD^y: \ \rmc(\msy, \ooint{0,+\infty}) \to \rmc(\msy, \ooint{0,+\infty})$
-  such that for any $f \in \rmc(\msx, \ooint{0,+\infty})$ and
-  $g \in \rmc(\msy, \ooint{0,+\infty})$ we have $\rmD^x(f) = 1/f$ and
-  $\rmD^y(g) = 1/g$. For any $\pi_0 \in \Pens(\msx)$ and $\pi_1 \in \Pens(\msy)$
-  we define
-  $\mce_{\pi_1, \pi_0}^y : \ \rmc(\msy, \ooint{0,+\infty}) \to \rmc(\msx,
-  \ooint{0,+\infty})$ such that for any $g \in \rmc(\msy, \ooint{0,+\infty})$ we have
-  \begin{equation}
-    \DIFdelbegin \DIFdel{\textstyle{\mce_{\pi_1, \pi_0}^y(g) = \mce_{\pi_1}^y(g) \exp[-\int_{\msx}\log(\mce_{\pi_1}^y(g)(x)) \rmd \pi_0(x)]. }
-  }\DIFdelend \DIFaddbegin \DIFadd{\textstyle{\mce_{\pi_1, \pi_0}^y(g) = \mce_{\pi_1}^y(g) \exp[-\int_{\msx}\log(\mce_{\pi_1}^y(g)(x)) \rmd \pi_0(x)] \eqsp . }
-  }\DIFaddend \end{equation}
-  For any $n \in \nset$ we have
-  \begin{align}
-    \label{eq:iteation_brikhoff}
-    &f_{n+1} = \rmD^x \circ \mce_{\pi_1, \pi_0}^y \circ \rmD^y \circ \mce_{\pi_0}^x(f_n) \DIFaddbegin \eqsp \DIFaddend , \\
-    &\hat{f}_{n+1} = \rmD^x \circ \mce_{\hat{\pi}_1, \hat{\pi}_0}^y \circ \rmD^y \circ \mce_{\hat{\pi}_0}^x(\hat{f}_n) \DIFaddbegin \eqsp \DIFaddend .
-  \end{align}
-  Using \DIFaddbegin \Cref{prop:hilbert_birkhoff} \DIFadd{and }\DIFaddend the triangle inequality \DIFdelbegin \DIFdel{and }%DIFDELCMD < \Cref{prop:hilbert_birkhoff} %%%
-\DIFdelend we have for any $n \in \nset$ 
-  \begin{equation}
-    \label{eq:hilbert_birkhoff_prod}
-    d_H(f_n g_n, \hat{f}_n \hat{g}_n) \leq d_H(f_n g_n, f_n \hat{g}_n) + d_H(f_n \hat{g}_n, \hat{f}_n \hat{g}_n) \leq d_H(g_n, \hat{g}_n) + d_H(f_n, \hat{f}_n) \DIFaddbegin \eqsp \DIFaddend . 
-  \end{equation}
-  Recall that $f_0 = \hat{f}_0 = 1$ and therefore $d_H(f_0, \hat{f}_0) = 0$.
-  Using \Cref{prop:hilbert_birkhoff}, \Cref{prop:birkhoff_contraction},
-  \eqref{eq:iteation_brikhoff} and the fact that $\rmD^x, \rmD^y$ are isometries, we have for any $n \in \nset$
-  \begin{align}
-    &d_H(f_{n+1}, \hat{f}_{n+1}) = d_H(\rmD^x \circ \mce_{\pi_1, \pi_0}^y \circ \rmD^y \circ \mce_{\pi_0}^x(f_n), \rmD^x \circ \mce_{\hat{\pi}_1, \hat{\pi}_0}^y \circ \rmD^y \circ \mce_{\hat{\pi}_0}^x(\hat{f}_n)) \\
-                                & \quad = d_H(\mce_{\pi_1, \pi_0}^y \circ \rmD^y \circ \mce_{\pi_0}^x(f_n), \mce_{\hat{\pi}_1, \hat{\pi}_0}^y \circ \rmD^y \circ \mce_{\hat{\pi}_0}^x(\hat{f}_n)) \\
-                                & \quad = d_H(\mce_{\pi_1}^y \circ \rmD^y \circ \mce_{\pi_0}^x(f_n), \mce_{\hat{\pi}_1}^y \circ \rmD^y \circ \mce_{\hat{\pi}_0}^x(\hat{f}_n)) \\
-                                & \quad \leq d_H(\mce_{\pi_1}^y \circ \rmD^y \circ \mce_{\pi_0}^x(f_n), \mce_{\hat{\pi}_1}^y \circ \rmD^y \circ \mce_{\pi_0}^x(f_n)) \\
-                                & \quad  \qquad + d_H(\mce_{\hat{\pi}_1}^y \circ \rmD^y \circ \mce_{\pi_0}^x({f}_n), \mce_{\hat{\pi}_1}^y \circ \rmD^y \circ \mce_{\hat{\pi}_0}^x(\hat{f}_n)) \\
-    & \quad \leq d_H(\mce_{\pi_1}^y \circ \rmD^y \circ \mce_{\pi_0}^x(f_n), \mce_{\hat{\pi}_1}^y \circ \rmD^y \circ \mce_{\pi_0}^x(f_n)) + \kappa d_H(\mce_{\pi_0}^x(f_n), \mce_{\hat{\pi}_0}^x(\hat{f}_n)) \\
-    & \quad \leq d_H(\mce_{\pi_1}^y \circ \rmD^y \circ \mce_{\pi_0}^x(f_n), \mce_{\hat{\pi}_1}^y \circ \rmD^y \circ \mce_{\pi_0}^x(f_n)) \\
-    & \qquad \quad + \kappa d_H(\mce_{\hat{\pi}_0}^x(\hat{f}_n), \mce_{\pi_0}^x(\hat{f}_n)) + \kappa^2 d_H(f_n, \hat{f}_n)  \\
-    & \quad \leq d_H(\mce_{\pi_1}^y (g_n), \mce_{\hat{\pi}_1}^y (g_n))  +\kappa  d_H(\mce_{\hat{\pi}_0}^x(\hat{f}_n), \mce_{\pi_0}^x(\hat{f}_n)) + \kappa^2 d_H(f_n, \hat{f}_n)  \DIFaddbegin \eqsp \DIFaddend ,    \label{eq:ineq_fonda}
-  \end{align}
-  %
-with $\kappa = \tanh(\norm{c}_\infty)$. 
-Using \Cref{prop:bound_wass_mce} we have for any $n \in \nset$
-%
-\begin{align}
-  &d_H(\mce_{\hat{\pi}_0}^x(\hat{f}_n), \mce_{\pi_0}^x(\hat{f}_n)) \leq 2 \norm{1/\hat f_n}_\infty ( \Lip(\hat f_n)  + \Lip(c) \norm{\hat f_n}_\infty)\exp[2\norm{c}_\infty] \wassersteinD[1](\pi_0, \hat{\pi}_0) \DIFaddbegin \eqsp \DIFaddend , \\
-  &d_H(\mce_{\pi_1}^y (g_n), \mce_{\hat{\pi}_1}^y (g_n)) \leq 2 \norm{1/g_n}_\infty (\Lip(g_n)  + \Lip(c)  \norm{g_n}_\infty)\exp[2\norm{c}_\infty] \wassersteinD[1](\pi_1, \hat{\pi}_1) \DIFaddbegin \eqsp \DIFaddend .
-      \label{eq:n_step_bound}
-\end{align}
-%
-We recall that for any $n \in \nset$, $x \in \msx$ and $y \in \msy$ we have
-\begin{equation}
- \hat f_n(x) = \exp[\hat \varphi_n(x)] \eqsp , \qquad g_n(y) = \exp[\Psi_n(y)] \DIFaddbegin \eqsp \DIFaddend .
-\end{equation}
-From the above and \eqref{eq:n_step_bound}, \Cref{prop:bound_0} and
-\Cref{prop:bound_1} we obtain that 
-\begin{equation}\label{eq:lipfn}
-    \Lip(\hat f_n), \Lip(g_n) \leq \exp(3\|c\|_\infty) \Lip(c),
-\end{equation}
-% Combining this result, \eqref{eq:n_step_bound}, \Cref{prop:bound_0} and
-% \Cref{prop:bound_1} we get that
-and thus for any $n \in \nset$
-\begin{align}
-  &d_H(\mce_{\hat{\pi}_0}^x(\hat{f}_n), \mce_{\pi_0}^x(\hat{f}_n)) \leq 4 \Lip(c) \rme^{8 \norm{c}_\infty} \wassersteinD[1](\pi_0, \hat{\pi}_0) \DIFaddbegin \eqsp \DIFaddend , \\
-  &d_H(\mce_{\pi_1}^y (g_n), \mce_{\hat{\pi}_1}^y (g_n))  \leq 4 \Lip(c) \rme^{8 \norm{c}_\infty} \wassersteinD[1](\pi_1, \hat{\pi}_1) \DIFaddbegin \eqsp \DIFaddend .
-\end{align}
-Combining this result and \eqref{eq:ineq_fonda} we get that for any $n \in \nset$
-\begin{equation}
-  d_H(f_{n+1}, \hat{f}_{n+1}) \leq \tanh(\norm{c}_\infty) d_H(f_n, \hat{f}_n) + 4 \Lip(c) \rme^{8 \norm{c}_\infty} (\wassersteinD[1](\pi_0, \hat{\pi}_0) + \wassersteinD[1](\pi_1, \hat{\pi}_1)) \DIFaddbegin \eqsp \DIFaddend .
-\end{equation}
-Since $d_H(f_0, \hat{f}_0) = 0$ we have that for any $n \in \nset$
-\begin{align}
-  d_H(f_{n+1}, \hat{f}_{n+1}) & \textstyle{\leq 4 \Lip(c) \rme^{8 \norm{c}_\infty} \sum_{k=0}^n \tanh(\norm{c}_\infty)^k (\wassersteinD[1](\pi_0, \hat{\pi}_0) + \wassersteinD[1](\pi_1, \hat{\pi}_1)) }
-  \\
-  & \textstyle{\leq 4 \Lip(c) \rme^{8 \norm{c}_\infty} \frac{1}{1-\tanh(\|c\|_\infty)} (\wassersteinD[1](\pi_0, \hat{\pi}_0) + \wassersteinD[1](\pi_1, \hat{\pi}_1)) }
-  \\
-                              &\textstyle{ \leq 2  \Lip(c) \rme^{8 \norm{c}_\infty} (1 + \rme^{2\norm{c}_\infty}) (\wassersteinD[1](\pi_0, \hat{\pi}_0) + \wassersteinD[1](\pi_1, \hat{\pi}_1))} \\
-  & \DIFdelbegin \DIFdel{\textstyle{ \leq 4 \Lip(c) \rme^{10 \norm{c}_\infty} (\wassersteinD[1](\pi_0, \hat{\pi}_0) + \wassersteinD[1](\pi_1, \hat{\pi}_1)).
-    }   }\DIFdelend \DIFaddbegin \DIFadd{\textstyle{ \leq 4 \Lip(c) \rme^{10 \norm{c}_\infty} (\wassersteinD[1](\pi_0, \hat{\pi}_0) + \wassersteinD[1](\pi_1, \hat{\pi}_1)) \eqsp .
-    }   }\DIFaddend \label{eq:ineq_fn}  
-\end{align}
-Similarly, we get that for any $n \in \nset$
-\begin{equation}
-  \label{eq:ineq_gn}
-  \textstyle{d_H(g_n, \hat{g}_n}) \leq 4 \Lip(c) \rme^{10 \norm{c}_\infty} (\wassersteinD[1](\pi_0, \hat{\pi}_0) + \wassersteinD[1](\pi_1, \hat{\pi}_1)) \DIFaddbegin \eqsp \DIFaddend .
-\end{equation}
-Combining \eqref{eq:hilbert_birkhoff_prod}, \eqref{eq:ineq_fn} and  \eqref{eq:ineq_gn} concludes the proof.
-\end{proof}
-
-Unfortunately controlling $d_H$ is not enough to control the distance between
-$\Pbb^{n}$ and $\hat{\Pbb}^n$ for any $n \in \nset$. Indeed, using the
-\DIFdelbegin \DIFdel{Hilbert--Birkhoff }\DIFdelend \DIFaddbegin \DIFadd{Hilbert-Birkhoff }\DIFaddend metric we control the oscillations of
-$f_n g_n / (\hat{f}_n \hat{g}_n)$ but in order to control probability distances
-between $\Pbb^{n}$ and $\hat{\Pbb}^n$ for any $n \in \nset$ we need to derive
-an upper-bound for $\normLigne{f_n g_n - \hat{f}_n \hat{g}_n}_\infty$. The
-next lemma is key in order to obtain such bounds.
-
-\begin{lemma}
-  \label{lemma:attain}
-  Let $(f_n)_{n \in \nset}$ and $(\hat{f}_n)_{n \in \nset}$ be given by
-  \eqref{eq:potentials_rescale_form} w.r.t.\  $\pi_0, \pi_1$ and
-  $\hat{\pi}_0, \hat{\pi}_1$ respectively.
-  Then, for any $n \in \nset$ there exist $x_n^\dagger \in \msx$ and
-  $y_n^\dagger \in \msy$ such that
-  \begin{align}
-    &|f_n(x_n^\dagger) g_n(y_n^\dagger)/ (\hat{f}_n(x_n^\dagger) \hat{g}_n(y_n^\dagger)) - 1| \leq  2 \Lip(c) \exp[6 \normLigne{c}_\infty] (\wassersteinD[1](\pi_0, \hat{\pi}_0) + \wassersteinD[1](\pi_1, \hat{\pi}_1) ) \eqsp .
-    % , \\
-    % &1 - f_n(x_n^-) g_n(y_n^-)/ (\hat{f}_n(x_n^-) \hat{g}_n(y_n^-))  \leq  2 \Lip(c) \exp[6 \normLigne{c}_\infty] (\wassersteinD[1](\pi_0, \hat{\pi}_0) + \wassersteinD[1](\pi_1, \hat{\pi}_1) ) \eqsp .
-  \end{align}
-\end{lemma}
-
-\begin{proof}
-  Let $n \in \nset$. 
-  Suppose, aiming for a contradiction, that the conclusion of the Lemma is false; that is assume that for all $(x,y)$  
-\begin{equation}\label{eq:contradition}
-    |f_n(x) g_n(y) / (\hat{f}_n(x) \hat{g}_n(y)) - 1| >  2 \Lip(c) \exp[6 \normLigne{c}_\infty] (\wassersteinD[1](\pi_0, \hat{\pi}_0) + \wassersteinD[1](\pi_1, \hat{\pi}_1) ).
-\end{equation} 
-  Using that $\Pbb^{2n}(\msx \times \msy) =\hat{\Pbb}^{2n}(\msx \times \msy) =1$, we have
-  \begin{align}
-    \label{eq:diff_up}
-    &\textstyle{ \int_{\msx \times \msy} \{f_n(x) g_n(y) / (\hat{f}_n(x) \hat{g}_n(y)) - 1\}\hat{f}_n(x) \hat{g}_n(y)\rmd \hat{\pi}_0(x)\rmd \hat{\pi}_1(y) } \\
-    &\qquad \qquad \qquad = \textstyle{\int_{\msx \times \msy} f_n(x) g_n(y) \rmd \hat{\pi}_0(x)\rmd \hat{\pi}_1(y)} -1 \\
-      &\qquad \qquad \qquad = \textstyle{\int_{\msx \times \msy} f_n(x) g_n(y) \rmd \hat{\pi}_0(x)\rmd \hat{\pi}_1(y) - \int_{\msx \times \msy} f_n(x) g_n(y) \rmd \pi_0(x)\rmd \pi_1(y)} \eqsp . 
-  \end{align}
-  In addition, using \Cref{prop:bound_0}, \Cref{prop:bound_1} and \eqref{eq:lipfn} we have
-  \begin{equation}
-       \Lip (f_n g_n) \leq  \|f_n\|_\infty \Lip(g_n) + \|g_n\|_\infty \Lip(f_n) \leq 2 \Lip(c) \exp[6 \normLigne{c}_\infty] \eqsp . 
-  \end{equation}
-%   \begin{equation}
-%     \norm{\nabla (f_n g_n)(x,y)} \leq \norm{\nabla f_n(x) g_n(y)} + \norm{f_n(x) \nabla g_n(y)} \leq 2 \norm{\nabla c}_\infty \exp[14 \normLigne{c}_\infty] \eqsp . 
-%   \end{equation}
-Combining this result and \eqref{eq:diff_up} we get that
-\begin{align}
-  &\abs{\textstyle{ \int_{\msx \times \msy} \{f_n(x) g_n(y) / (\hat{f}_n(x) \hat{g}_n(y)) - 1\}\rmd \hat{\Pbb}^{2n}(x,y) }}\\
-  & \qquad \leq 2 \Lip(c) \exp[6 \normLigne{c}_\infty] \wassersteinD[1](\pi_0 \otimes \pi_1, \hat{\pi}_0 \otimes \hat{\pi}_1) \eqsp . 
-\end{align}
-In addition, we have that 
-$\wassersteinD[1](\pi_0 \otimes \pi_1, \hat{\pi}_0 \otimes \hat{\pi}_1)\leq
-\wassersteinD[1](\pi_0, \hat{\pi}_0) + \wassersteinD[1](\pi_1, \hat{\pi}_1)$.
-Hence, we get that
-\begin{align}
-  &\abs{\textstyle{ \int_{\msx \times \msy} \{f_n(x) g_n(y) / (\hat{f}_n(x) \hat{g}_n(y)) - 1\}\rmd \hat{\Pbb}^{2n}(x,y) }}\\
-  & \qquad \leq 2 \Lip(c) \exp[6 \normLigne{c}_\infty] (\wassersteinD[1](\pi_0, \hat{\pi}_0) + \wassersteinD[1](\pi_1, \hat{\pi}_1) ).
-\end{align}
-The above, combined with \eqref{eq:contradition} implies that  there must exist values $x_n^+, x_n^- \in \mcx$ and $y_n^+, y_n^-\in \mcy$ such that 
-\begin{align}
-    f_n(x_n^+) g_n(y_n^+) / (\hat{f}_n(x_n^+) \hat{g}_n(y_n^+)) - 1 &>  2 \Lip(c) \exp[6 \normLigne{c}_\infty] (\wassersteinD[1](\pi_0, \hat{\pi}_0) + \wassersteinD[1](\pi_1, \hat{\pi}_1) )\\
-    f_n(x_n^-) g_n(y_n^-) / (\hat{f}_n(x_n^-) \hat{g}_n(y_n^-)) - 1 &<  2 \Lip(c) \exp[6 \normLigne{c}_\infty] (\wassersteinD[1](\pi_0, \hat{\pi}_0) + \wassersteinD[1](\pi_1, \hat{\pi}_1) ).
-\end{align}
-Since by \Cref{prop:bound_1}, $(x,y)\mapsto f_n(x) g_n(y) / (\hat{f}_n(x) \hat{g}_n(y)) - 1$ is continuous, by the intermediate value theorem, there will be a pair $(x_n^\dagger, y_n^\dagger)$, a convex combination of $(x_n^+, y_n^+)$ and $(x_n^-, y_n^-)$ such that the conclusion of the Lemma is true, arriving at the desired contradiction.
-\end{proof}
-
-% \begin{lemma}
-%   \label{lemma:attain}
-%   Let $(f_n)_{n \in \nset}$ and $(\hat{f}_n)_{n \in \nset}$ be given by
-%   \eqref{eq:potentials_rescale_form} w.r.t.\  $\pi_0, \pi_1$ and
-%   $\hat{\pi}_0, \hat{\pi}_1$ respectively.
-%   Then, for any $n \in \nset$ there exist $x_n^+, x_n^- \in \msx$ and
-%   $y_n^+, y_n^- \in \msy$ such that
-%   \begin{align}
-%     &f_n(x_n^+) g_n(y_n^+)/ (\hat{f}_n(x_n^+) \hat{g}_n(y_n^+)) - 1 \leq  2 \norm{\nabla c}_\infty \rme^{14 \normLigne{c}_\infty} (\wassersteinD[1](\pi_0, \hat{\pi}_0) + \wassersteinD[1](\pi_1, \hat{\pi}_1) ) \eqsp , \\
-%     &1 - f_n(x_n^-) g_n(y_n^-)/ (\hat{f}_n(x_n^-) \hat{g}_n(y_n^-))  \leq  2 \norm{\nabla c}_\infty \rme^{14 \normLigne{c}_\infty} (\wassersteinD[1](\pi_0, \hat{\pi}_0) + \wassersteinD[1](\pi_1, \hat{\pi}_1) ) \eqsp .
-%   \end{align}
-% \end{lemma}
-
-% \begin{proof}
-%   Let $n \in \nset$. Using that $\Pbb^{2n}(\msx \times \msy) =\hat{\Pbb}^{2n}(\msx \times \msy) =1$, we have
-%   \begin{align}
-%     \label{eq:diff_up}
-%     &\textstyle{ \int_{\msx \times \msy} \{f_n(x) g_n(y) / (\hat{f}_n(x) \hat{g}_n(y)) - 1\}\hat{f}_n(x) \hat{g}_n(y)\rmd \hat{\pi}_0(x)\rmd \hat{\pi}_1(y) } \\
-%     &\qquad \qquad \qquad = \textstyle{\int_{\msx \times \msy} f_n(x) g_n(y) \rmd \hat{\pi}_0(x)\rmd \hat{\pi}_1(y)} -1 \\
-%       &\qquad \qquad \qquad = \textstyle{\int_{\msx \times \msy} f_n(x) g_n(y) \rmd \hat{\pi}_0(x)\rmd \hat{\pi}_1(y) - \int_{\msx \times \msy} f_n(x) g_n(y) \rmd \pi_0(x)\rmd \pi_1(y)} \eqsp . 
-%   \end{align}
-%   In addition, using \Cref{prop:bound_0} and \Cref{prop:bound_1} we have for any
-%   $x \in \msx$ and $y \in \msy$
-%   \begin{equation}
-%     \norm{\nabla (f_n g_n)(x,y)} \leq \norm{\nabla f_n(x) g_n(y)} + \norm{f_n(x) \nabla g_n(y)} \leq 2 \norm{\nabla c}_\infty \exp[14 \normLigne{c}_\infty] \eqsp . 
-%   \end{equation}
-% Combining this result and \eqref{eq:diff_up} we get that
-% \begin{align}
-%   &\abs{\textstyle{ \int_{\msx \times \msy} \{f_n(x) g_n(y) / (\hat{f}_n(x) \hat{g}_n(y)) - 1\}\rmd \hat{\Pbb}^{2n}(x,y) }}\\
-%   & \qquad \leq 2 \norm{\nabla c}_\infty \exp[14 \normLigne{c}_\infty] \wassersteinD[1](\pi_0 \otimes \pi_1, \hat{\pi}_0 \otimes \hat{\pi}_1) \eqsp . 
-% \end{align}
-% In addition, we have that 
-% $\wassersteinD[1](\pi_0 \otimes \pi_1, \hat{\pi}_0 \otimes \hat{\pi}_1)\leq
-% \wassersteinD[1](\pi_0, \hat{\pi}_0) + \wassersteinD[1](\pi_1, \hat{\pi}_1)$.
-% Hence, we get that
-% \begin{align}
-%   &\abs{\textstyle{ \int_{\msx \times \msy} \{f_n(x) g_n(y) / (\hat{f}_n(x) \hat{g}_n(y)) - 1\}\rmd \hat{\Pbb}^{2n}(x,y) }}\\
-%   & \qquad \leq 2 \norm{\nabla c}_\infty \exp[14 \normLigne{c}_\infty] (\wassersteinD[1](\pi_0, \hat{\pi}_0) + \wassersteinD[1](\pi_1, \hat{\pi}_1) ) \eqsp ,
-% \end{align}
-% which concludes the proof.
-% \end{proof}
-
-
-Finally, as promised, we conclude this section by deriving bounds on
-$\normLigne{f_n g_n  - \hat{f}_n\hat{g}_n}_\infty$ combining
-\Cref{thm:contract_hilbert} with \Cref{lemma:attain}.
-
-\begin{theorem}
-  \label{thm:contrat_infty}
-  Let $(f_n, g_n)_{n \in \nset}$ and $(\hat{f}_n, \hat g_n)_{n \in \nset}$ be given by
-  \eqref{eq:potentials_rescale_form} w.r.t.\  $\pi_0, \pi_1$ and
-  $\hat{\pi}_0, \hat{\pi}_1$ respectively. Then, for any $n \in \nset$ we have
-  \begin{equation}
-    \normLigne{f_ng_n -\hat{f}_n \hat{g}_n}_\infty \leq 10 \Lip(c) \rme^{10 \normLigne{c}_\infty} (\wassersteinD[1](\pi_0, \hat{\pi}_0) + \wassersteinD[1](\pi_1, \hat{\pi}_1) ) \eqsp . 
-  \end{equation}
-\end{theorem}
-
-\begin{proof}
-  Let $n \in \nset$, $x \in \msx$ and $y \in \msy$. Using \Cref{prop:bound_0}
-  and the fact that for any $s,t \in \cball{0}{M}$ with $M \geq 0$ we have
-  $\abs{\rme^s - \rme^t} \leq \rme^M \abs{s-t}$ we get 
-  \begin{equation}
-    \label{eq:bound_u}
-  \absLigne{f_n(x)g_n(y) - \hat{f}_n(x) \hat{g}_n(y)} \leq \rme^{6 \normLigne{c}_\infty} \absLigne{\log(f_n(x)g_n(y)/(\hat{f}_n(x)\hat{g}_n(y)))}
-\end{equation}
-Assume that $f_n(x)g_n(y)/(\hat{f}_n(x)\hat{g}_n(y)) \geq 1$. Then using that
-for any $t > 0$, $\log(t) \leq t - 1$, \Cref{thm:contract_hilbert} and
-\Cref{lemma:attain} we have, with $(x_n^\dagger, y_n^\dagger)$ from \Cref{lemma:attain},
-\begin{align}
-  &\absLigne{\log(f_n(x)g_n(y)/(\hat{f}_n(x)\hat{g}_n(y)))} = \log(f_n(x)g_n(y)/(\hat{f}_n(x)\hat{g}_n(y))) \\
-  & \qquad \quad + \log(\hat{f}_n(x_n^\dagger)\hat{g}_n(y_n^\dagger)/(f_n(x_n^\dagger)g_n(y_n^\dagger))) + \log(f_n(x_n^\dagger)g_n(y_n^\dagger)/(\hat{f}_n(x_n^\dagger)\hat{g}_n(y_n^\dagger))) \\
-  &\qquad \leq d_H(f_n g_n, \hat{f}_n \hat{g}_n) + \log(f_n(x_n^\dagger)g_n(y_n^\dagger)/(\hat{f}_n(x_n^\dagger)\hat{g}_n(y_n^\dagger))) \\
-  &\qquad \leq d_H(f_ng_n, \hat{f}_n \hat{g}_n) + f_n(x_n^\dagger)g_n(y_n^\dagger)/(\hat{f}_n(x_n^\dagger)\hat{g}_n(y_n^\dagger)) - 1 \\
-  & \qquad \leq 10 \Lip(c)\rme^{10 \normLigne{c}_\infty} (\wassersteinD[1](\pi_0, \hat{\pi}_0) + \wassersteinD[1](\pi_1, \hat{\pi}_1) ) \eqsp . 
-\end{align}
-
-% \begin{align}
-%   &\absLigne{\log(f_n(x)g_n(y)/(\hat{f}_n(x)\hat{g}_n(y)))} = \log(f_n(x)g_n(y)/(\hat{f}_n(x)\hat{g}_n(y))) \\
-%   & \qquad \quad + \log(\hat{f}_n(x_n^+)\hat{g}_n(y_n^+)/(f_n(x_n^+)g_n(y_n^+))) + \log(f_n(x_n^+)g_n(y_n^+)/(\hat{f}_n(x_n^+)\hat{g}_n(y_n^+))) \\
-%   &\qquad \leq d_H(f_ng_n, \hat{f}_n \hat{g}_n) + \log(f_n(x_n^+)g_n(y_n^+)/(\hat{f}_n(x_n^+)\hat{g}_n(y_n^+))) \\
-%   &\qquad \leq d_H(f_ng_n, \hat{f}_n \hat{g}_n) + f_n(x_n^+)g_n(y_n^+)/(\hat{f}_n(x_n^+)\hat{g}_n(y_n^+)) - 1 \\
-%   & \qquad \leq 10 \normLigne{\nabla c}_\infty \rme^{16 \normLigne{c}_\infty} (\wassersteinD[1](\pi_0, \hat{\pi}_0) + \wassersteinD[1](\pi_1, \hat{\pi}_1) ) \eqsp . 
-% \end{align}
-Combining this result and \eqref{eq:bound_u} we get that
-\begin{equation}
-  \absLigne{f_n(x)g_n(y) - \hat{f}_n(x) \hat{g}_n(y)} \leq 10 \Lip(c) \rme^{10 \normLigne{c}_\infty} (\wassersteinD[1](\pi_0, \hat{\pi}_0) + \wassersteinD[1](\pi_1, \hat{\pi}_1) ) \eqsp .
-\end{equation}
-The proof in the case where $f_n(x)g_n(y)/(\hat{f}_n(x)\hat{g}_n(y)) \leq 1$ is similar.
-\end{proof}
-\subsection{From potentials to probability metrics}
-\label{sec:from-potent-prob}
-
-Using \Cref{thm:contrat_infty} we are now ready to prove the following theorem.
-
-\begin{theorem}
-  \label{thm:stability_ipfp2}
-  For any $\pi_0, \hat{\pi}_0 \in \Pens(\msx)$,
-  $\pi_1, \hat{\pi}_1 \in \Pens(\msy)$ let $(\Pbb^{n})_{n\in \nset}$ and
-  $(\hat{\Pbb}^{n})_{n\in \nset}$ the IPFP sequence with marginals
-  $(\pi_0, \pi_1)$ respectively $(\hat{\pi}_0, \hat{\pi}_1)$. Then there exists
-  $C \geq 0$ such that for any $n \in \nset$ we have
-  \begin{equation}
-    \wassersteinD[1](\Pbb^n, \hat{\Pbb}^n) \leq C \defEns{\wassersteinD[1](\pi_0, \hat{\pi}_0) + \wassersteinD[1](\pi_1, \hat{\pi}_1)},
-  \end{equation}
-  with
-  \begin{equation}
-    C = \rme^{10 \normLigne{c}_\infty} \{1 + (2\Lip(c)+10) (\diam_\msx + \diam_\msy) \}.
-  \end{equation}
-\end{theorem}
-
-\begin{proof}
-  Let $n \in \nset$ and $\Psi\in \Lip(\mcx\times\mcy, \rset)$, that is $\Psi: \ \msx \times \msy \to \rset$ such that for any
-  $x_0,x_1 \in \msx$ and $y_0, y_1 \in \msy$ we have
-  \begin{equation}
-    |\Psi(x_0,y_0) - \Psi(x_1,y_1)| \leq \ddx(x_0,x_1)+\ddy(y_0,y_1). 
-  \end{equation}
-  Without loss of generality, we may assume that there \DIFdelbegin \DIFdel{exist }\DIFdelend \DIFaddbegin \DIFadd{exists }\DIFaddend $\bar{x} \in \msx$ and
-  $\bar{y} \in \msy$ such that $\Psi(\bar{x}, \bar{y}) = 0$. Therefore, we have
-  that for any $x \in \msx$ and $y \in \msy$
-  \begin{equation}
-    \label{eq:inf_Psi}
-    \normLigne{\Psi}_\infty = \sup \ensembleLigne{\abs{\Psi(x,y) - \Psi(\bar{x},\bar{y})}}{x \in \msx, \ y \in \msy} \leq \diam_\msx +  \diam_\msy.
-  \end{equation}
-  %with $\diam_\msx = \sup \ensembleLigne{\ddx(x_0,x_1)}{x_0,x_1 \in \msx}$,
- % $\diam_\msy = \sup \ensembleLigne{\ddx(y_0,y_1)}{y_0,y_1 \in \msy}$. 
- Using
-  this result, \Cref{prop:bound_0} and \Cref{prop:bound_1}, we get that for any
-  $x_0, x_1 \in \msx$ and $y_0, y_1 \in \msy$ we have
-  \begin{align}
-      \Lip(\Psi f_n g_n)
-      &\leq \Lip(\Psi) \|f_n\|_\infty \|g_n\|_\infty + 
-      \Lip(f_n) \|\Psi\|_\infty \|g_n\|_\infty + \Lip(g_n) 
-      \|\Psi \|_\infty \|f_n\|_\infty\\
-      &\leq \exp[6\|c\|_\infty] + 2\Lip(c)(\diam_\msx+\diam_\msy)\exp[3\|c\|_\infty].
-  \end{align}
-
-%   \begin{align}
-%     &\abs{\Psi(x_0,y_0) f_n(x_0)g_n(y_0) - \Psi(x_1,y_1) f_n(x_1)g_n(y_1)} \\
-%     & \qquad \qquad  \leq 2 (\diam_\msx + \diam_\msy) \normLigne{\nabla c}_\infty \rme^{14 \normLigne{c}_\infty} + \rme^{6\normLigne{c}_\infty} \eqsp . 
-%   \end{align}
-Combining this result with \Cref{thm:contrat_infty} and the fact that $\wassersteinD[1](\pi_0 \otimes \pi_1, \hat{\pi}_0 \otimes \hat{\pi}_1)\leq
-\wassersteinD[1](\pi_0, \hat{\pi}_0) + \wassersteinD[1](\pi_1, \hat{\pi}_1)$ we get that
-\begin{align}
-\label{eq:bound_wass_uno}
-&\textstyle{\int_{\msx \times \msy} \Psi(x,y) f_n(x) g_n(y) \rmd \pi_0(x) \rmd \pi_1(y) - \int_{\msx \times \msy}\Psi(x,y) \hat f_n(x) \hat g_n(y) \rmd \hat{\pi}_0(x) \rmd \hat{\pi}_1(y) }  \\
-&\leq \textstyle{\int_{\msx \times \msy} \Psi(x,y) f_n(x) g_n(y) \rmd \pi_0(x) \rmd \pi_1(y) - \int_{\msx \times \msy}\Psi(x,y) f_n(x) g_n(y) \rmd \hat{\pi}_0(x) \rmd \hat{\pi}_1(y) }  \\
-&\qquad\qquad  + \textstyle{\int_{\msx \times \msy} \Psi(x,y) \sup\|f_n g_n- \hat f_n \hat g_n\|_\infty \rmd \hat\pi_0(x) \rmd \hat\pi_1(y) }\\
-&\leq \Lip(\Psi f_n g_n) [\wassersteinD[1](\pi_0, \hat{\pi}_0) + \wassersteinD[1](\pi_1, \hat{\pi}_1) ]
-+ 10(\diam_\msx +\diam_\msy) \rme^{10 \normLigne{c}_\infty} (\wassersteinD[1](\pi_0, \hat{\pi}_0) + \wassersteinD[1](\pi_1, \hat{\pi}_1) )  \\
-&\leq \left(\exp[6\|c\|_\infty] + 2\Lip(c)(\diam_\msx+\diam_\msy)\exp[3\|c\|_\infty]\right) [\wassersteinD[1](\pi_0, \hat{\pi}_0) + \wassersteinD[1](\pi_1, \hat{\pi}_1) ]\\
-&\qquad + 10(\diam_\msx +\diam_\msy) \rme^{10 \normLigne{c}_\infty} [\wassersteinD[1](\pi_0, \hat{\pi}_0) + \wassersteinD[1](\pi_1, \hat{\pi}_1) ]  \\
-& \leq \rme^{10 \normLigne{c}_\infty} \{1 + (2\Lip(c)+10) (\diam_\msx + \diam_\msy) \} (\wassersteinD[1](\pi_0, \hat{\pi}_0) + \wassersteinD[1](\pi_1, \hat{\pi}_1)). 
-\end{align}
-% In addition, using \eqref{eq:inf_Psi} and \Cref{thm:contrat_infty} we get that
-% \begin{align}
-% \label{eq:bound_wass_duo}
-% &\textstyle{\int_{\msx \times \msy} \Psi(x,y) f_n(x) g_n(y) \rmd \hat{\pi}_0(x) \rmd \hat{\pi}_1(y) - \int_{\msx \times \msy}\Psi(x,y) \hat{f}_n(x) \hat{g}_n(y) \rmd \hat{\pi}_0(x) \rmd \hat{\pi}_1(y) }  \\
-% &\qquad \leq 10 (\diam_\msx + \diam_\msy)\normLigne{\nabla c}_\infty \rme^{19 \normLigne{c}_\infty} (\wassersteinD[1](\pi_0, \hat{\pi}_0) + \wassersteinD[1](\pi_1, \hat{\pi}_1))  \eqsp . 
-% \end{align}
-% Combining \eqref{eq:bound_wass_uno} and \eqref{eq:bound_wass_duo} we get that
-% \begin{align}
-%   &\textstyle{\int_{\msx \times \msy} \Psi(x,y) \rmd \Pbb^{2n}(x,y) - \int_{\msx \times \msy} \Psi(x,y) \rmd \hat{\Pbb}^{2n}(x,y)} \\ & \qquad \leq \rme^{8 \normLigne{c}_\infty} \{1 + 12 (\diam_\msx + \diam_\msy) \normLigne{\nabla c}_\infty \rme^{11 \normLigne{c}_\infty}\} (\wassersteinD[1](\pi_0, \hat{\pi}_0) + \wassersteinD[1](\pi_1, \hat{\pi}_1)) \eqsp . 
-% \end{align}
-% Let $\Lipset$ the subset of functions from $\msx \times \msy$ to $\rset$ such
-% that for any $\Psi \in \Lipset$, $x_0, x_1 \in \msx$ and $y_0, y_1 \in \msy$ we
-% have
-% \begin{equation}
-%   \abs{\Psi(x_0, y_0) - \Psi(x_1, y_1)} \leq \norm{(x_0,y_0) - (x_1,y_1)} \eqsp . 
-% \end{equation}
-Letting
-$\Lipset^\star = \ensembleLigne{\Psi \in \Lipset}{\Psi(\bar{x}, \bar{y}) =0}$,
-we have that
-\begin{align}
-  \wassersteinD[1](\Pbb^{2n}, \hat{\Pbb}^{2n}) &= \sup \ensembleLigne{\textstyle{\int_{\msx \times \msy} \Psi(x,y) \rmd \Pbb^{2n}(x,y) - \int_{\msx \times \msy} \Psi(x,y) \rmd \hat{\Pbb}^{2n}(x,y)}}{\Psi \in \Lipset} \\
-                                               &= \sup \ensembleLigne{\textstyle{\int_{\msx \times \msy} \Psi(x,y) \rmd \Pbb^{2n}(x,y) - \int_{\msx \times \msy} \Psi(x,y) \rmd \hat{\Pbb}^{2n}(x,y)}}{\Psi \in \Lipset^\star} \\
-  &\leq \rme^{10 \normLigne{c}_\infty} \{1 + (2\Lip(c)+10) (\diam_\msx + \diam_\msy) \}  (\wassersteinD[1](\pi_0, \hat{\pi}_0) + \wassersteinD[1](\pi_1, \hat{\pi}_1)).
-\end{align}
-The proof that for any $n \in \nset$ we have
-\begin{equation}
-  \wassersteinD[1](\Pbb^{2n+1}, \hat{\Pbb}^{2n+1}) \leq  \rme^{10 \normLigne{c}_\infty} \{1 + (2\Lip(c)+10) (\diam_\msx + \diam_\msy) \}  (\wassersteinD[1](\pi_0, \hat{\pi}_0) + \wassersteinD[1](\pi_1, \hat{\pi}_1)),
-\end{equation}
-is similar and left to the reader.
-\end{proof}
-
-\begin{proof}[Proof of \Cref{thm:stability_schro}]
-We know that $(f_n,g_n)$ converges in the Hilbert metric; we will now prove that the sequence of measures \DIFdelbegin \DIFdel{$\Pbb^n:=f_n g_n K \pi_0 \otimes \pi_1$ }\DIFdelend \DIFaddbegin \DIFadd{$\Pbb^n:=f_n g_n K \pi_0 \otimes \Pi_1$ }\DIFaddend converges in Wasserstein distance. 
-
-Let $(f_n, g_n)$ be an IPFP sequence. We know that $d_H(f_n, f_{n+1})\leq \kappa^n d_H(f_1,f_0)$ and $d_H(g_n, g_{n+1})\leq \kappa^n d_H(g_1,g_0)$.
-Thus 
-\begin{align}d_H(f_{n+1}g_{n+1}, f_n g_n) 
-&= \log \sup_{x,y}\frac{f_{n+1}(x)g_{n+1}(y)}{f_n(x)g_n(x)} - \log \sup_{x,y}\frac{f_n(x)g_n(x)}{f_{n}(x)g_{zn+1}(y)}\\
-&\leq \kappa^n \left[ d_H(f_1, f_0)+ d_H(g_1, g_0)\right].
-\end{align}
-As explained earlier this is not enough on its own to control 
-$\|f_{n+1}g_{n+1}-f_n g_n\|_\infty$. 
-However, we can use the same technique as earlier. Recall that by definition 
-$$\iint (f_{n+1}(x)g_{n+1}(y)-f_n(x) g_n(y)) K(x,y) \DIFdelbegin %DIFDELCMD < \rmd %%%
-\DIFdelend \pi_0(\DIFaddbegin \rmd \DIFaddend x) \DIFdelbegin %DIFDELCMD < \rmd %%%
-\DIFdelend \pi_1(\DIFaddbegin \rmd \DIFaddend y) =0, $$
-and therefore either 
-$$f_{n+1}(x)g_{n+1}(y)-f_n(x) g_n(y)\equiv 0,$$
-identically on the support of $\pi_0\otimes \pi_1$, or it must take both negative and positive values on the support of $\pi_0\otimes \pi_1$. In the first case we conclude there exists $(x^\ast, y^\ast)$ such that 
-$$f_{n+1}(x^\ast)g_{n+1}(y^\ast) = f_n(x^\ast) g_n(x^\ast).$$
-In the second case, the same conclusion follows from \Cref{prop:bound_1} and the intermediate value theorem.
-% , there will be 
-% $(x^\ast, y^\ast)$ such that 
-% $$f_{n+1}(x^\ast)g_{n+1}(y^\ast) = f_n(x^\ast) g_n(x^\ast).$$
-In either case, following the same reasoning as in the proof of \Cref{thm:contrat_infty} we have that
-\begin{align}
-| f_{n+1}(x)g_{n+1}(y) - f_n(x) g_n(y)|
-&\leq \rme^{6\|c\|_\infty} 
-\absLigne{\log[f_{n+1}(x)g_{n+1}(y)/({f}_n(x){g}_n(y))]}
-\end{align}
-In addition, we have 
-\begin{align}
-& \log\left(f_{n+1}(x)g_{n+1}(y)/ f_n(x) g_n(x)\right)\\ 
-&\leq \log\left(f_{n+1}(x^\ast)g_{n+1}(y^\ast)/f_{n}(x^\ast)g_{n}(y^\ast)
-\right) + d_H(f_{n+1}g_{n+1}, f_n g_n)\\
-&\leq \kappa^n \left[ d_H(f_1, f_0)+ d_H(g_1, g_0)\right],
-\end{align}
-and therefore 
-$$\|f_{n+1}g_{n+1}-f_n g_n\|_\infty \leq \kappa^n \left[ d_H(f_1, f_0)+ d_H(g_1, g_0)\right].$$
-Let $\Psi \in \Lip_1(\msx\times \msy, \rset)$, 
-and without loss of generality we may assume that $\Psi(\bar x, \bar y)=0$ for a fixed pair $(\bar x, \bar y)\in \msx\times \msy$. 
-We then have 
-\begin{align}
-&\int \Psi(x,y) f_{n+1}(x) g_{n+1}(y) K(x,y) \DIFdelbegin %DIFDELCMD < \rmd %%%
-\DIFdelend \pi_0(\DIFaddbegin \rmd \DIFaddend x) \DIFdelbegin %DIFDELCMD < \rmd%%%
-\DIFdelend \pi_1(\DIFaddbegin \rmd \DIFaddend y)\\
-&\quad -   \int \Psi(x,y) f_{n}(x) g_{n}(y) K(x,y) \DIFdelbegin %DIFDELCMD < \rmd %%%
-\DIFdelend \pi_0(\DIFaddbegin \rmd \DIFaddend x) \DIFdelbegin %DIFDELCMD < \rmd%%%
-\DIFdelend \pi_1(\DIFaddbegin \rmd \DIFaddend y)\\
-&\quad \leq \int \|\Psi(x,y)\|_\infty \|f_{n+1} g_{n+1} - f_n g_n\|_\infty \|K\|_\infty \DIFdelbegin %DIFDELCMD < \rmd%%%
-\DIFdelend \pi_0(\DIFaddbegin \rmd \DIFaddend x) \DIFdelbegin %DIFDELCMD < \rmd%%%
-\DIFdelend \pi_1(\DIFaddbegin \rmd \DIFaddend y)\\
-&\quad \leq \kappa^n(\diam_\msx + \diam_\msy)\rme^{-\|c\|_\infty}\kappa^n \left[ d_H(f_1, f_0)+ d_H(g_1, g_0)\right].
-\end{align}
-Taking the supremum over $\{\Psi\in \Lip_1(\msx\times\msy, \rset): \Psi(\bar x, \bar y) =0\}$, we have that 
-$$\wassersteinD[1](\Pbb^{n+1}, \Pbb^{n}) \leq \kappa^n(\diam_\msx + \diam_\msy)\rme^{-\|c\|_\infty}\kappa^n \left[ d_H(f_1, f_0)+ d_H(g_1, g_0)\right].$$
-By completeness of $(\Pens_1(\msx\times\msy), \wassersteinD[1])$ we have that $\Pbb^n$ converges in \DIFdelbegin \DIFdel{$(\Pens_1(\msx\times\msy), \wassersteinD[1])$ }\DIFdelend \DIFaddbegin \DIFadd{$(\Pens{P}_1(\msx\times\msy), \wassersteinD[1])$ }\DIFaddend to $\Pbb^\ast\in \Pens_1(\msx\times\msy, \rset)$. 
-
-Similarly $\hat{\Pbb}^n \to \hat{\Pbb}^\ast\in\Pens_1(\msx\times\msy, \rset)$. Combining everything and applying \Cref{thm:stability_ipfp} we have 
-\begin{align}
-\wassersteinD[1](\Pbb^\ast, \hat \Pbb^\ast)
-   &\leq \wassersteinD[1](\Pbb^\ast, \Pbb^n)
-   + \wassersteinD[1](\Pbb^n, \hat\Pbb^n)+ 
-   \wassersteinD[1](\hat\Pbb^n, \hat\Pbb^\ast)\\
-   &\leq C \defEns{\wassersteinD[1](\pi_0, \hat{\pi}_0) + \wassersteinD[1](\pi_1, \hat{\pi}_1)} + \wassersteinD[1](\Pbb^\ast, \Pbb^n)+\wassersteinD[1](\hat\Pbb^n, \hat\Pbb^\ast).
-\end{align}
-Letting $n\to \infty$ the result follows. 
-\end{proof}
-
-\bibliographystyle{apalike}
-\bibliography{bibliography}
-
-\end{document}
-
-%%% Local Variables:
-%%% mode: latex
-%%% TeX-master: t
-%%% End:
diff --git a/doc/diff_nf_sde.tex b/doc/diff_nf_sde.tex
deleted file mode 100644
index 4367d69..0000000
--- a/doc/diff_nf_sde.tex
+++ /dev/null
@@ -1,63 +0,0 @@
-\section{Difference between ODE and SDE likelihood computations}
-\label{sec:diff-betw-ode}
-
-In this section, we show that the likelihood computation from
-\cite{song2020score} does not coincide with the likelihood computation
-obtained with the SDE model. We present our findings in the Riemannian setting
-but our conclusions can be adapted to the Euclidean setting with arbitrary
-forward dynamics. Recall that we consider a Brownian motion on the manifold as a forward process
-$(\bfB_t^\M)_{t \in \ccint{0,T}}$ with $\{p_t\}_{t=0}^T$ the associated family
-of densities. We have that for any $t \in \ccint{0,T}$ and $x \in \M$
-\begin{equation}
-  \label{eq:forward}
-  \partial_t p_t(x) = \tfrac{1}{2} \Delta p_t(x) = \dive(\tfrac{1}{2}p_t \nabla \log p_t )(x)  . 
-\end{equation}
-
-
-\paragraph{ODE model}
-In the case of the ODE model we define $(\bfX_t)_{t \in \ccint{0,T}}$ such that
-$\bfX_0$ has distribution $\pi$ and satisfies
-$\rmd \bfX_t = \tfrac{1}{2}  \nabla \log p_t(\bfX_t) \rmd t$. Note that the family of
-densities $\{q_t\}_{t=0}^T$ associated with $(\bfX_t)_{t \in \ccint{0,T}}$ also
-satisfies \cref{eq:forward}. Now, we consider
-$(\bfhX_t)_{t \in \ccint{0,T}} = (\bfX_{T-t})_{t \in \ccint{0,T}}$ and note that it satisfies
-\begin{equation}
-  \label{eq:backward_flow_appendix}
- \rmd \bfhX_t = -\tfrac{1}{2}  \nabla \log p_{T-t}(\bfhX_t) \rmd t  .
-\end{equation}
-Finally, we consider $(\bfY_t^{\mathrm{ODE}})_{t \in \ccint{0,T}}$ which also satisfies
-\cref{eq:backward_flow_appendix} and such that the distribution of $\bfY_0^{\mathrm{ODE}}$ is
-$\piinv$. Denoting $\{q_t^{\mathrm{ODE}}\}_{t=0}^T$ the densities of
-$(\bfY_t^{\mathrm{ODE}})_{t \in \ccint{0,T}}$ w.r.t. $\piinv$ we have for any $t \in \ccint{0,T}$ and $x \in \M$
-\begin{equation}
-  \label{eq:proba_flow_ode}
- \partial_t q_t^{\mathrm{ODE}}(x) =  \dive(q_t^{\mathrm{ODE}} -\tfrac{1}{2} \nabla\log p_{T-t} )(x)  . 
-\end{equation}
-
-\paragraph{SDE model}
-When sampling we consider a process $(\bfY^{\mathrm{SDE}}_t)_{t \in \ccint{0,T}}$ such that
-$\bfY^{\mathrm{SDE}}_0$ has distribution $\piinv$ and whose family of densities
-$\{q_t^{\mathrm{SDE}}\}_{t=0}^T$ satisfies for any $t \in \ccint{0,T}$ and $x \in \M$
-\begin{equation}
-  \label{eq:proba_flow_sde}
-  \partial_t q_t^{\mathrm{SDE}}(x) = -\dive(\log p_{T-t} q_t^{\mathrm{SDE}}(x)) +\tfrac{1}{2}\Delta q_t^{\mathrm{SDE}}(x) = \dive(q_t^{\mathrm{SDE}}\{-\nabla\log p_{T-t} + \tfrac{1}{2}\nabla\log q_t^{\mathrm{SDE}}\})(x)  . 
-\end{equation}
-Hence, \cref{eq:proba_flow_ode} and \cref{eq:proba_flow_sde} do not agree,
-except if $q_t^{\mathrm{SDE}} = q_t^{\mathrm{ODE}} = p_{T-t}$ which is the case if and only if $\bfY^{\mathrm{SDE}}_0$ and
-$\bfY_0^{\mathrm{ODE}}$ have the same distribution as $\bfX_T$. Note that it is possible to
-evaluate the likelihood of the SDE model using that
-\begin{equation}
-  \partial_t \log q_t^{\mathrm{SDE}}(\bfY^{\mathrm{SDE}}_t) = \dive(-\nabla\log p_{T-t}(\bfY^{\mathrm{SDE}}_t) +\tfrac{1}{2}\nabla\log q_t^{\mathrm{SDE}}(\bfY^{\mathrm{SDE}}_t)) \rmd t  . 
-\end{equation}
-We can use the score approximation $\bm{s}_\theta(t,x)$ to approximate
-$\nabla \log p_t(x)$ for any $t \in \ccint{0,T}$ and $x \in \M$. In order to
-approximate $\nabla \log q_t^{\mathrm{SDE}}$, one can consider another neural network
-$\bm{t}_\theta(t,x)$ approximating $\nabla \log q_t^{\mathrm{SDE}}(x)$ for any $t \in \ccint{0,T}$
-and $x \in \M$. This approximation can be obtained using the implicit score loss
-presented in \Cref{sec:riem-score-appr}.
-
-
-%%% Local Variables:
-%%% mode: latex
-%%% TeX-master: "main"
-%%% End:
diff --git a/doc/diffusion.tex b/doc/diffusion.tex
deleted file mode 100644
index b75fb40..0000000
--- a/doc/diffusion.tex
+++ /dev/null
@@ -1,9 +0,0 @@
-\section{Time reversal as a diffusion process}
-\label{sec:time-reversal-as}
-
-Write rigorously the Haussman and Pardoux result 
-
-%%% Local Variables:
-%%% mode: latex
-%%% TeX-master: "main_arxiv"
-%%% End:
diff --git a/doc/eigen.tex b/doc/eigen.tex
deleted file mode 100644
index e8d9ff1..0000000
--- a/doc/eigen.tex
+++ /dev/null
@@ -1,77 +0,0 @@
-\section{Eigenfunctions, eigenvalues of the Laplace-Beltrami operator}
-\label{sec:eigenf-eigenv-lapl}
-
-
-In this section, we recall the eigenfunctions and eigenvalues of the
-Laplace-Beltrami operator in two specific cases: the $d$-dimensional torus and
-the $d$-dimensional sphere.
-
-\paragraph{The case of the torus}
-Let $\{b_i\}_{i=1}^d$ be a basis of $\rset^d$.  We consider the associated
-lattice on $\rset^d$, i.e.
-$\Gamma = \ensembleLigne{\sum_{i=1}^d \alpha_i b_i}{\{\alpha_i\}_{i=1}^d \in
-  \zset^d}$. Finally, the associated $d$-dimensional torus is defined as
-$\tset_\Gamma = \rset^d / \Gamma$. Denote
-$\rmB = (b_1, \dots, b_d) \in \rset^{d \times d}$. Let
-$\{\bar{b}_i\}_{i=1}^d \in (\rset^d)^d$ such that
-$(\rmB^{-1})^\top = (\bar{b}_1, \dots, \bar{b}_d)$. We define
-$\Gamma^\star = \ensembleLigne{\sum_{i=1}^d \alpha_i
-  \bar{b}_i}{\{\alpha_i\}_{i=1}^d \in \zset^d}$, the dual lattice. Note that for
-any $x \in \Gamma$ and $y \in \Gamma^\star$ we have that
-$\langle x, y \rangle \in \zset$ and that if $\{b_i\}_{i=1}^d$ is an orthonormal
-basis then $\Gamma = \Gamma^\star$. The torus $\rset^d/\Gamma$ is a (flat)
-compact Riemannian manifold. The set of eigenvalues of the Laplace-Beltrami
-operator is given by
-$\ensembleLigne{-4 \uppi^2 \normLigne{y}^2}{y \in \Gamma^\star}$. The
-eigenfunctions of the Laplace-Beltrami operator are given by
-$\ensembleLigne{x \mapsto \sin(2 \uppi \langle x, y \rangle)}{y \in
-  \Gamma^\star}$ and
-$\ensembleLigne{x \mapsto \cos(2 \uppi \langle x, y \rangle)}{y \in
-  \Gamma^\star}$. 
-
-
-\paragraph{The case of the sphere} Next, we investigate the case of the
-$d$-dimensional sphere \citep[see][]{saloff1994precise}. The set of eigenvalues of
-the Laplace-Beltrami operator is given by
-$\ensembleLigne{-k(k+d-1)}{k \in \nset}$. Note that $\lambda_k = k(k+d-1)$ has
-multiplicity $d_k = (k+d-2)!/\{(d-1)!k\}(2k+d-1)$. The eigenfunctions of the
-Laplace-Beltrami operator are known as the spherical harmonics and can be
-defined in terms of Legendre polynomials. When investigating the heat kernel on
-the $d$-dimensional sphere, we are interested in the product
-$(x,y) \mapsto \sum_{\phi \in \Phi_n} \phi(x)\phi(y)$, where $\Phi_n$ is the set
-of eigenfunctions associated with the eigenvalue $\lambda_n$ for $n \in
-\nset$. This function can be described using the Gegenbauer polynomials
-\cite[see][Theorem 2.9]{atkinson2012spherical}. More precisely, we have that for any
-$n \in \nset$ and $x,y \in \mathbb{S}^d$
-\begin{align}
-  G_n(x,y) &= \textstyle{ \sum_{\phi \in \Phi_n} \phi(x) \phi(y)} \\
-  &= \textstyle{n! \Gamma((d-1)/2) \sum_{k=0}^{\floor{n/2}} (-1)^k (1- \langle x,y \rangle^2)\langle x,y \rangle^{n-2k} / (4^k k! (n -2k)! \Gamma(k + (d-1)/2) ) ,}
-\end{align}
-where here $\Gamma: \ \rset_+ \to \rset$ is given for any $v > 0$ by
-$\Gamma(v) = \int_0^{+\infty} t^{v-1} \rmd t^{-t} \rmd t$.  In the special case
-where $d=1$, then the heat kernel coincide with the wrapped Gaussian density and
-can be easily evaluated.
-
-% with $\{\lambda_n\}_n$ and $\{\psi_n\}_n$ respectively the eigenvalues and eigenfunctions of the Laplace-Beltrami operator $\Delta_\mathcal{M}$.
-% For instance with $\mathbb{S}^d$, we know \citep{borovitskiy2020Matern,devito2019Reproducing,zhao2018Exact} that $\lambda_n = n(n + d - 1)$ and $$\psi_n(x) \psi_n(y) = \frac{2n+d-1}{d-1} \frac{1}{A_{\mathbb{S}^n}} \mathcal{C}_n^{(d-1)/2}(x \cdot y)$$  where $\mathcal{C}_n^{(d-1)/2}$ are Gegenbauer polynomials.
-% An exact sampling scheme exists for $\mathbb{S}^d$ \cite{mijatovic2020note} but it is non trivial to implement \footnote{https://github.com/konkam/ExactWrightFisher.jl}.
-
-% When $d=2$, then the eigenfunctions are the spherical harmonics and the Gegenbauer polynomials are the Legendre polynomials $P_n$, we thus get \citep{jammalamadaka2019Harmonic,mardia2000Directional}: 
-% $$p_t(x, y) = \sum^\infty_{n=0} e^{- n(n+1) \cdot t } ~\frac{2n + 1}{4 \pi} P_n(x \cdot y).$$
-% When $d=1$, the heat kernel and Wrapped normal density coincide which means one can easily sample $X_t|X_0$.
-% Additionally, around $t \approx 0$, \cref{eq:heat_kernel} can be expended as
-% $$p_t(x, y) = (4\pi t)^{-d/2} G(r)^{-1/2} \exp \left(-\frac{r^2}{4t}\right) + \mathcal{O}(1)$$
-% with $r=d_\mathcal{M}(x,y)$.  Higher order expansions can be obtained
-% \cite{rey2019diffusion,zhao2018Exact}.  One could get an unbiased estimator of
-% \cref{eq:heat_kernel} via the Russian roulette estimator
-% $\sum_n \Delta_n = \mathbb{E}_{N \sim p} \left[ \sum^N_n
-%   \frac{\Delta_n}{\mathbb{P}(N \ge n)} \right]$, although what we care in
-% practice about $\nabla_x \log p_t(x, y)$ where the $\log$ would bias the
-% estimator.
-
-
-
-%%% Local Variables:
-%%% mode: latex
-%%% TeX-master: "main"
-%%% End:
diff --git a/doc/experimental_detail.tex b/doc/experimental_detail.tex
deleted file mode 100644
index 6409d11..0000000
--- a/doc/experimental_detail.tex
+++ /dev/null
@@ -1,25 +0,0 @@
-\section{Experimental detail}
-\label{sec:exp_detail}
-
-In what follows we describe the experimental settings used to generate results introduced in \cref{sec:experiments}.
-
-% say we use Jax and geomstats?
-% plan on open sourcing?
-
-\paragraph{Architecture}
-The architecture of the score network $s_\theta$ is given by a multilayer perceptron with 5 hidden layers with $512$ units each.
-We use on sinusoidal activation functions.
-% set of divergence free for generating the vector field.
-
-% \paragraph{Loss}
-% slide score matching with 1 sample for the Hutchinson estimator.
-
-\paragraph{Optimization}
-All models are trained by the stochastic optimizer Adam \citep{kingma2015Adam}
-with parameters $\beta_1=0.9$, $\beta_2=0.999$, batch-size of $512$ data-points and a learning rate set to $2e-4$.
-% number of iterations
-% annealing
-
-\paragraph{Likelihood evaluation}
-We rely on the Dormand-Prince solver \citep{dormand1980family}, an adaptive Runge-Kutta 4(5) solver, with absolute and relative tolerance of $1e-5$ to compute approximate numerical solutions of the ODE.
-% Models are trained on a cluster of GeForce RTX 2080 Ti GPU cards.
\ No newline at end of file
diff --git a/doc/experiments.tex b/doc/experiments.tex
deleted file mode 100644
index 5f16fc5..0000000
--- a/doc/experiments.tex
+++ /dev/null
@@ -1,66 +0,0 @@
-\section{Experiments}
-\label{sec:experiments}
-
-We evaluate the model on a collection of datasets, each containing an empirical distribution of occurrences of earth and climate science events on the surface of the earth. These events are: volcanic eruptions \cite{volcanoe_dataset}, earthquakes \cite{earthquake_dataset}, floods \citep{flood_dataset} and wild fires \citep{fire_dataset}. In each case the earth is approximated as a perfect sphere. We compare to previous baseline methods: Riemannian Continuous Normalizing Flows \citep{mathieu2020riemannian}, Moser Flows \citep{rozen2021moser} and a mixture of Kent distributions \citep{peel2001fitting}. The mixture of Kent distributions is optimised using an EM algorithm and the optimal number of components is selected on a validation set.
-Additionally, we consider another score-based generative model: a standard SBGM on the 2D place followed by the inverse stereographic projection which induces a density on the sphere \citep{gemici2016normalizing}.
-More experimental details can be found in \cref{sec:exp_detail}.
-We observe from \cref{tab:geoscience}, that the RSBGM model outperforms all other methods in density estimation, in particular by a large margin on the volcanic eruptions dataset.
-% Qualitatively, we see on \cref{fig:geoscience} that 
-
-\begin{table}[h]
-    \centering
-    \begin{tabular}{lrrrrr}
-    % \toprule
-     & \textbf{Volcano} & \textbf{Earthquake} & \textbf{Flood} & \textbf{Fire} \\
-    \midrule
-    Mixture of Kent & $-0.95_{\pm 0.14}$ & $0.14_{\pm 0.13}$ & $0.73_{\pm 0.07}$ & $-1.18_{\pm 0.06}$ \\
-    Riemannian CNF & $-0.97_{\pm 0.15}$ & $0.19_{\pm0.04}$ & $0.90_{\pm0.03}$ & $-0.66_{\pm0.05}$ \\
-    Moser Flow & $-2.02_{\pm 0.42}$ & $-0.09_{\pm0.02}$ & $0.62_{\pm 0.04}$ & $-1.03_{\pm 0.03}$ \\
-    Stereographic Score-Based & ${-4.37}_{\pm ???}$ & ${-0.05}_{\pm ???}$ & ${1.32}_{\pm ???}$ & $0.11_{\pm ???}$ \\
-    Riemannian Score-Based & $\bm{-5.56}_{\pm0.26}$ & $\bm{-0.21}_{\pm0.03}$ & $\bm{0.52}_{\pm0.02}$ & $\bm{-1.24}_{\pm 0.07}$\\
-    \midrule 
-    Dataset size & 827 & 6120 & 4875 & 12809 \\
-    \bottomrule
-    \end{tabular}
-    \caption{
-    Negative log-likelihood scores for each method on the earth and climate science datasets.
-    Bold indicates statistically significant best method.
-    Means and standard deviations are computed over 5 different runs.
-    }
-    \label{tab:geoscience}
-\end{table}
-
-\begin{figure}[t]
-% \vspace{-0.8em}
-  \centering
-\begin{subfigure}{.33\textwidth}
-  \includegraphics[width=\linewidth]{{pdf_earthquake_rsbgm}.png}
-  \put(-150,40){\rotatebox{90}{Stereographic}}
-\end{subfigure}\hfil
-\begin{subfigure}{.33\textwidth}
-  \includegraphics[width=\linewidth]{{pdf_earthquake_rsbgm}.png}
-\end{subfigure}\hfil
-\begin{subfigure}{.33\textwidth}
-  \includegraphics[width=\linewidth]{{pdf_earthquake_rsbgm}.png}
-\end{subfigure}\hfil
-\begin{subfigure}{.33\textwidth}
-  \includegraphics[width=\linewidth]{{pdf_earthquake_rsbgm}.png}
-  \put(-150,40){\rotatebox{90}{Riemannian}}
-  \put(-90,-10){Earthquake}
-\end{subfigure}\hfil
-\begin{subfigure}{.33\textwidth}
-  \includegraphics[width=\linewidth]{{pdf_earthquake_rsbgm}.png}
-  \put(-80,-10){Flood}
-\end{subfigure}\hfil
-\begin{subfigure}{.33\textwidth}
-  \includegraphics[width=\linewidth]{{pdf_earthquake_rsbgm}.png}
-  \put(-70,-10){Fire}
-\end{subfigure}
-\caption{
-    Trained score-based generative models on earth sciences data.
-    The learned density is colored green-blue.
-    Blue and red dots represent training and testing datapoints, respectively.
-  }
-  \label{fig:geoscience}
-% \vspace{-1.0em}
-\end{figure}
\ No newline at end of file
diff --git a/doc/horizontal.tex b/doc/horizontal.tex
deleted file mode 100644
index 2d0b70c..0000000
--- a/doc/horizontal.tex
+++ /dev/null
@@ -1,9 +0,0 @@
-\section{Horizontal lift and time-reversal}
-\label{sec:horizontal-lift-time}
-
-Need to be worked out properly. Is it interesting in practice? Probably not.
-
-%%% Local Variables:
-%%% mode: latex
-%%% TeX-master: "main_arxiv"
-%%% End:
diff --git a/doc/ideas.tex b/doc/ideas.tex
deleted file mode 100644
index 0b3d3a0..0000000
--- a/doc/ideas.tex
+++ /dev/null
@@ -1,12 +0,0 @@
-\section{Random ideas to try}
-\label{sec:random-ideas-try}
-
-\begin{itemize}
-\item try the smooth denoising?
-\item max likelihood
-\item Gromov stuff laplace between manifolds 
-\end{itemize}
-%%% Local Variables:
-%%% mode: latex
-%%% TeX-master: "main_arxiv"
-%%% End:
diff --git a/doc/images/pdf_earthquake_rsbgm.png b/doc/images/pdf_earthquake_rsbgm.png
deleted file mode 100644
index 61b7d1b..0000000
Binary files a/doc/images/pdf_earthquake_rsbgm.png and /dev/null differ
diff --git a/doc/implicit_loss.tex b/doc/implicit_loss.tex
deleted file mode 100644
index be476a5..0000000
--- a/doc/implicit_loss.tex
+++ /dev/null
@@ -1,27 +0,0 @@
-\section{Proof of \cref{prop:implicit_der}}
-\label{sec:implicit-losses}
-
-
-
-\begin{proof}
-      Let $t \in \ocint{0,T}$ and $s_t \in \rmc^\infty(\M)$. Using the
-      divergence theorem \cite[see][p.51]{lee2018introduction}, we have
-      \begin{align}
-        \ell_{t|s}(s_t) &\textstyle{= \int_{\M \times \M} \normLigne{\nabla \log p_{t|s}(x_t|x_s)}^2 \rmd \Pbb_{s,t}(x_s,x_t) + \int_\M \normLigne{s_t(x_t)}^2 \rmd \Pbb_{t}(x_t)} \\
-        & \qquad \qquad \textstyle{- 2 \int_{\M \times \M} \langle \nabla \log p_{t|s}(x_t|x_s), s_t(x_t) \rangle \rmd \Pbb_{s,t}(x_s,x_t)} \\
-                    &=\textstyle{\int_{\M \times \M} \normLigne{\nabla \log p_{t|s}(x_t|x_s)}^2 \rmd \Pbb_{s,t}(x_s,x_t) + \int_\M \normLigne{s_t(x_t)}^2 \rmd \Pbb_{t}(x_t)} \\
-                    & \qquad \qquad \textstyle{- 2 \int_{\M \times \M}  \langle \nabla \log p_{t|s}(x_t|x_s), s_t(x_t) \rangle p_{t|s}(x_t|x_s)p_s(x_s) \rmd (\piinv \otimes \piinv) (x_s, x_t)  } \\
-        &=\textstyle{\int_{\M \times \M} \normLigne{\nabla \log p_{t|s}(x_t|x_s)}^2 \rmd \Pbb_{s,t}(x_s,x_t) + \int_\M \normLigne{s_t(x_t)}^2 \rmd \Pbb_{t}(x_t)} \\
-                    & \qquad \qquad \textstyle{- 2 \int_{\M } \{\int_\M  \langle \nabla p_{t|s}(x_t|x_s), s_t(x_t) \rangle \rmd \piinv(x_t)\} p_s(x_s) \rmd \piinv(x_s)  } \\
-        &=\textstyle{\int_{\M \times \M} \normLigne{\nabla \log p_{t|s}(x_t|x_s)}^2 \rmd \Pbb_{s,t}(x_s,x_t) + \int_\M \normLigne{s_t(x_t)}^2 \rmd \Pbb_{t}(x_t)} \\
-                    & \qquad \qquad \textstyle{ +2 \int_{\M } \{\int_\M   \dive(s_t)(x_t)p_{t|s}(x_t|x_s)  \rmd \piinv(x_t)\} p_s(x_s) \rmd \piinv(x_s)  }  ,
-      \end{align}
-
-
-      which concludes the proof.
-    \end{proof}
-
-%%% Local Variables:
-%%% mode: latex
-%%% TeX-master: "main"
-%%% End:
diff --git a/doc/intro.tex b/doc/intro.tex
deleted file mode 100644
index 92b2e31..0000000
--- a/doc/intro.tex
+++ /dev/null
@@ -1,146 +0,0 @@
-\section{Introduction}
-\label{sec:introduction}
-
-Score-based Generative Modeling (SGM) is a recently developed approach to
-generative modeling exhibiting state-of-the-art performances on various tasks
-including image and audio synthesis
-D\citep{song2019generative,song2020score,ho2020denoising,nichol2021improved,nichol2021beatgans}. These
-models proceed as follows. We add noise to the data progressively using a
-diffusion process targeting a reference Gaussian distribution. The corresponding
-time-reversal process is also a diffusion whose drift depends on the logarithmic
-gradients of the perturbed data distributions, i.e. the scores. The generative
-model is obtained by approximating this time-reversal denoising diffusion by
-initializing it at the reference Gaussian distribution and using neural networks
-estimates of the scores obtained using score matching
-\cite{hyvarinen2005estimation,vincent2011connection}. It can be shown rigorously
-that the obtained final samples are approximately distributed according to the
-data distribution \citep{debortoli2021neurips}.
-
-Until now, SGM has been applied to Euclidean data, i.e. data with flat
-geometry. However, in a large number of scientific domains, the underlying
-assumption is that the distributions of interest are supported on a Riemannian
-manifold. These include, amongst others, protein modeling
-\citep{boomsma2008generative,hamelryck2006sampling,mardia2008multivariate,shapovalov2011smoothed,mardia2007protein},
-cell development \citep{klimovskaia2020poincare}, image recognition
-\citep{lui2012advances}, geological sciences
-\citep{karpatne2018machine,peel2001fitting}, graph-structured and hierarchical
-data \citep{roy2007learning,steyvers2005large}, robotics
-\citep{feiten2013rigid,senanayake2018directional} and high-energy physics
-\citep{brehmer2020flows}. The choice of a Riemannian metric is associated with a
-description of the interactions between the points of the dataset and therefore
-can be seen as a geometric prior.
-
-In this paper we introduce \emph{Riemannian Score-based Generative Models}
-(RSGM), an extension of SGMs to compact Riemannian manifolds. Contrary to
-classical SGMs which rely on forward and time-reversed diffusion processes
-defined on an Euclidean space, we incorporate the geometry of the data in our
-algorithm by defining our diffusion processes directly on the Riemannian
-manifold. However, switching from the classical Euclidean setting to the
-Riemannian is non-trivial. First, one must be able to define a noising process
-on the manifold that converges to an easy-to-sample reference distribution. In
-the setting of compact Riemannian manifolds, a natural choice is given by the
-Brownian motion. Indeed, due to the compactness, this diffusion is geometrically
-ergodic and targets the uniform distribution on the manifold \citep{he2013lower}
-from which one can either sample exactly or approximately with high
-accuracy. Second, we must identify the corresponding time-reversal process. We
-show here that, as in the Euclidean case, this process is also a diffusion whose
-infinitesimal generator is given by the generator of the forward process with an
-extra term corresponding to the scores of the marginal distributions of the
-Brownian diffusion initialized at the data distribution. Third, while score
-matching ideas \citep{hyvarinen2005estimation,vincent2011connection} can be
-easily used to estimate the score in the Euclidean case when the forward
-dynamics is given by a Ornstein--Ulhenbeck or a Brownian motion, adapting these
-ideas to the Riemmanian framework is complicated by the fact that the heat
-kernel, i.e. the transition kernel of the Brownian motion, is typically only
-available as an infinite sum through the Sturm-Liouville
-decomposition. Similarly, diffusions on manifold cannot be sampled
-exactly. Hence, we use geodesic random walks which converge to the diffusion of
-interest in the limit of small stepsizes \citep{jorgensen1975central}.
-
-We further consider the following extensions of RSGMs. By using tools from
-neural ODEs on manifolds
-\citep{mathieu2020riemannian,falorsi2020neural,lou2020neural}, we show how we
-can compute the likelihood of our model, generalizing the approach proposed in
-the Euclidean case in
-\citep{song2020score,durkan2021maximum,huang2021variational}. Finally, RGSMs
-like standard SGMs are computationally expensive at generation time as they
-require to run a discretized diffusion over many time steps. For speeding up
-generation, it has been proposed in the Euclidean setting to solve instead a
-Schr\"odinger Bridge (SB) problem
-\citep{debortoli2021neurips,chen2021likelihood}, i.e. a dynamical version of
-an entropy-regularized Optimal Transport (OT) problem between the data and the
-easy-to-sample reference distribution. In particular, we generalize the
-Diffusion Schr\"odinger Bridge (DSB) algorithm introduced in
-\citep{debortoli2021neurips} to solve the SB problem on compact Riemmanian
-manifolds.
-
-% We also investigate the connection between SGMs and entropy-regularized Optimal Transport
-% (OT) \citep{debortoli2021neurips,vargas2021solving,chen2021likelihood}. More
-% precisely, we define the Schr\"odinger Bridge (SB) problem on Riemannian
-% manifolds, which corresponds to a dynamical version of regularized OT. We solve
-% this problem with a procedure akin to Diffusion Schr\"odinger Bridge (DSB)
-% introduced in \citep{debortoli2021neurips}.  DSB allows to speed up
-% significantly the sampling process in generative modeling and also permits to
-% define diffusions interpolating between arbitrary distributions. We show that
-% our Riemannian extension of DSB enjoys the same benefits.
-
-We validate our methodology by modelling a number of natural disaster occurrence datasets collected by \cite{mathieu2020riemannian}. We compare to three previous baselines, a mixture of Kent distributions \cite{peel2001fitting}, Riemannian Continuous Normalising Flows \cite{mathieu2020riemannian}, and Moser Flows \cite{rozen2021moser}. We also compare to using a standard standard Euclidean SGM by projecting the manifold onto Euclidean space and performing the flow there (e.g. projecting the sphere via the stereographic projection onto the 2D plane). We find in all cases that RSGMs outperform all baselines.
-
-% We validate our methodology \valentin{TO FILL}. (experiments)
-% Code is available at ...
-
-The rest of the paper is organized as follows. We introduce the notation needed
-in the rest of the paper in \cref{sec:notation}. We recall the basics of
-standard Euclidean SGMs in \Cref{sec:eucl-sgm-riem}. In
-\Cref{sec:score-appr-manif}, we present RGSMs, our extension of SGMs to compact
-Riemannian manifolds. We discuss related works in
-\Cref{sec:related-works} and assess the efficiency of our method in
-\Cref{sec:experiments}. % Finally, we present an extension of our work to
-% Schr\"odinger bridges in \Cref{sec:extension} and
-Finally we summarize our contributions in \Cref{sec:conclusion}.
-
-\section{Notation}
-\label{sec:notation}
-
-We consider a compact connected Riemannian manifold
-$(\M, \langle \cdot, \cdot \rangle_\M)$. We denote by $\XM$ the set of vector
-fields on $\M$ and $\XMdeux$ the section
-$\Gamma(\M, \sqcup_{x \in \M} \mathcal{L}(\mathrm{T}_x \M))$, where $\mathcal{L}(\mathrm{T}_x \M)$ is the space of linear mappings on
-$\mathrm{T}_x \M$. Let $(\bfM_t)_{t \in \ccint{0,T}}$ be a real-valued process
-and $(\bfX_t)_{t \in \ccint{0,T}}$ be a $\M$-valued process with distribution
-$\Pbb \in \Pens(\rmc(\ccint{0,T}, \M))$.  $(\bfM_t)_{t \in \ccint{0,T}}$ is a
-$\Pbb$-martingale if $(\bfM_t)_{t \in \ccint{0,T}}$ is a martingale w.r.t the
-filtration $(\mcf_t)_{t \in \ccint{0,T}}$ where for any $t \in \ccint{0,T}$,
-$\mcf_t = \sigma(\ensembleLigne{\bfX_s}{s \in \ccint{0,t}})$. In addition, for
-any $\Pbb \in \Pens(\rmc(\ccint{0,T}, \M))$, we define $R(\Pbb)$ such that for
-any $\msa \in \mcb{\rmc(\ccint{0,T}, \msx)}$ we have
-$R(\Pbb)(\msa) = \Pbb(R(\msa))$, where
-$R(\msa) = \ensembleLigne{t \mapsto \omega_{T-t}}{\omega \in \msa}$. In other
-words, $R(\Pbb)$ is the path measure associated with the reverse process $\Pbb$.
-When there is no ambiguity, we use the same notation for distributions and their
-densities.
-
-Let $T > 0$ or $T=+\infty$, $b: \ \ccint{0,T} \to \XM$, $\Sigma: \ \ccint{0,T} \to \XMdeux$
-such that for any $t \in \ccint{0,T}$ and $x \in \M$, $\Sigma(t,x)$ is
-symmetric, non-negative and denote $\sigma(t,x) = \Sigma^{1/2}(t,x)$. Let
-$(\bfX_t)_{t \in \ccint{0,T}}$ a continuous process with distribution
-$\Pbb \in \Pens(\rmc(\ccint{0,T}, \M))$ such that for any $f \in \rmc^2(\M)$ we
-have that $(\bfM_t^{\bfX, f})_{t \in \ccint{0,T}}$ is a $\Pbb$-martingale where
-for any $t \in \ccint{0,T}$
-  \begin{equation}
-    \textstyle{ \bfM_t^{\bfX, f} = f(\bfX_t) - \int_0^t \{ \langle b(s, \bfX_s), \nabla f(\bfX_s) \rangle_\M + (1/2) \langle \Sigma(\bfX_s),  \nabla^2 f(\bfX_s) \rangle_\M \} \rmd s  . }
-  \end{equation}
-  Then, we say that $(\bfX_t)_{t \in \ccint{0,T}}$ is \emph{associated with} the
-  SDE $\rmd \bfX_t = b(t, \bfX_t) \rmd t + \sigma(t, \bfX_t) \rmd \bfB_t^\M$
-  with infinitesimal generator
-  $\generator: \ \ccint{0,T} \times \rmc^2(\M) \to \rmc(\M)$ given for any
-  $t \in \ccint{0,T}$ by
-  $\generator_t( f) = \langle b, \nabla f \rangle_\M + (1/2) \langle \Sigma,
-  \nabla^2 f \rangle_\M$. Note that if $\Sigma = \Id$ then
-  $\langle \Sigma, \nabla^2 f \rangle_\M = \Delta f$, where $\Delta$ is the
-  Laplace-Beltrami operator.
-
-%%% Local Variables:
-%%% mode: latex
-%%% TeX-master: "main"
-%%% End:
diff --git a/doc/intro_app.tex b/doc/intro_app.tex
deleted file mode 100644
index a681ae7..0000000
--- a/doc/intro_app.tex
+++ /dev/null
@@ -1,18 +0,0 @@
-\section{Organization of the supplementary}
-\label{sec:organ-suppl}
-
-In this supplementary we gather the proof of \cref{thm:time_reversal_manifold}
-as well as additional derivations on score-based generative models and
-Riemannian manifolds. In \cref{sec:prel-stoch-riem}, we recall basics on
-stochastic Riemannian geometry following \cite{hsu2002stochastic}. In
-\cref{sec:diff-betw-ode}, we highlight differences between ODE and SDE 
-models for likelihood computation. In \cref{sec:eigenf-eigenv-lapl}, we recall
-some basic facts about eigenvalues and eigenfunctions of the Laplace-Beltrami
-operator on the $d$-dimensional sphere and torus. In \cref{sec:time-reversal},
-we present the extension of the time-reversal formula to manifold and prove
-\cref{thm:time_reversal_manifold}.
-
-%%% Local Variables:
-%%% mode: latex
-%%% TeX-master: "main"
-%%% End:
diff --git a/doc/isometric.tex b/doc/isometric.tex
deleted file mode 100644
index adaeab2..0000000
--- a/doc/isometric.tex
+++ /dev/null
@@ -1,21 +0,0 @@
-\section{Isometric embedding and time-reversal}
-\label{sec:isom-embedd-time}
-
-\subsection{Continuous time-reversal}
-\label{sec:cont-time-revers}
-
-This is similar to the Euclidean stuff
-
-\subsection{Discretisation and approximation results}
-\label{sec:discr-appr-rates}
-
-\begin{enumerate}
-\item discretisation
-\item dont talk about score approximation here (later section)
-\item result on the approximation similar to Theorem 1
-\end{enumerate}
-
-%%% Local Variables:
-%%% mode: latex
-%%% TeX-master: "main_arxiv"
-%%% End:
diff --git a/doc/main.aux b/doc/main.aux
deleted file mode 100644
index f1da09d..0000000
--- a/doc/main.aux
+++ /dev/null
@@ -1,614 +0,0 @@
-\relax 
-\providecommand\hyper@newdestlabel[2]{}
-\providecommand\HyperFirstAtBeginDocument{\AtBeginDocument}
-\HyperFirstAtBeginDocument{\ifx\hyper@anchor\@undefined
-\global\let\oldcontentsline\contentsline
-\gdef\contentsline#1#2#3#4{\oldcontentsline{#1}{#2}{#3}}
-\global\let\oldnewlabel\newlabel
-\gdef\newlabel#1#2{\newlabelxx{#1}#2}
-\gdef\newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}}
-\AtEndDocument{\ifx\hyper@anchor\@undefined
-\let\contentsline\oldcontentsline
-\let\newlabel\oldnewlabel
-\fi}
-\fi}
-\global\let\hyper@last\relax 
-\gdef\HyperFirstAtBeginDocument#1{#1}
-\providecommand\HyField@AuxAddToFields[1]{}
-\providecommand\HyField@AuxAddToCoFields[2]{}
-\citation{song2019generative,song2020score,ho2020denoising,nichol2021improved,nichol2021beatgans}
-\citation{hyvarinen2005estimation,vincent2011connection}
-\citation{debortoli2021neurips}
-\citation{boomsma2008generative,hamelryck2006sampling,mardia2008multivariate,shapovalov2011smoothed,mardia2007protein}
-\citation{klimovskaia2020poincare}
-\citation{lui2012advances}
-\citation{karpatne2018machine,peel2001fitting}
-\citation{roy2007learning,steyvers2005large}
-\citation{feiten2013rigid,senanayake2018directional}
-\citation{brehmer2020flows}
-\citation{he2013lower}
-\citation{hyvarinen2005estimation,vincent2011connection}
-\citation{jorgensen1975central}
-\providecommand\csxdef[2]{}
-\@writefile{toc}{\providecommand\autonum@processReference[2]{}}
-\@writefile{lof}{\providecommand\autonum@processReference[2]{}}
-\@writefile{toc}{\contentsline {section}{\numberline {1}Introduction}{1}{section.1}\protected@file@percent }
-\newlabel{sec:introduction}{{1}{1}{Introduction}{section.1}{}}
-\newlabel{sec:introduction@cref}{{[section][1][]1}{[1][1][]1}}
-\citation{mathieu2020riemannian,falorsi2020neural,lou2020neural}
-\citation{song2020score,durkan2021maximum,huang2021variational}
-\citation{debortoli2021neurips,chen2021likelihood}
-\citation{debortoli2021neurips}
-\citation{mathieu2020riemannian}
-\citation{peel2001fitting}
-\citation{mathieu2020riemannian}
-\citation{rozen2021moser}
-\citation{song2020score,song2019generative,debortoli2021neurips}
-\csxdef {autonum@sec:notationReferenced}{}
-\@writefile{toc}{\contentsline {section}{\numberline {2}Notation}{2}{section.2}\protected@file@percent }
-\newlabel{sec:notation}{{2}{2}{Notation}{section.2}{}}
-\newlabel{sec:notation@cref}{{[section][2][]2}{[1][2][]2}}
-\@writefile{toc}{\contentsline {section}{\numberline {3}Euclidean Score-based Generative Modeling}{2}{section.3}\protected@file@percent }
-\newlabel{sec:eucl-sgm-riem}{{3}{2}{Euclidean Score-based Generative Modeling}{section.3}{}}
-\newlabel{sec:eucl-sgm-riem@cref}{{[section][3][]3}{[1][2][]2}}
-\citation{cattiaux2021time,haussmann1986time}
-\citation{song2020score,song2020improved,song2020denoising,jolicoeur2020adversarial,jolicoeur2021gotta,nichol2021beatgans}
-\citation{bao2022analyticdpm,watson2021learning}
-\citation{song2020score}
-\citation{ho2020denoising,huang2021variational,durkan2021maximum}
-\citation{cattiaux2021time}
-\newlabel{eq:forward_SDE}{{1}{3}{Euclidean Score-based Generative Modeling}{equation.3.1}{}}
-\newlabel{eq:forward_SDE@cref}{{[equation][1][]1}{[1][3][]3}}
-\newlabel{eq:backward_SDE}{{2}{3}{Euclidean Score-based Generative Modeling}{equation.3.2}{}}
-\newlabel{eq:backward_SDE@cref}{{[equation][2][]2}{[1][3][]3}}
-\csxdef {autonum@eq:forward_SDEReferenced}{}
-\csxdef {autonum@eq:backward_SDEReferenced}{}
-\csxdef {autonum@eq:backward_SDEReferenced}{}
-\@writefile{toc}{\contentsline {section}{\numberline {4}Riemannian Score-based Generative Modeling}{3}{section.4}\protected@file@percent }
-\newlabel{sec:score-appr-manif}{{4}{3}{Riemannian Score-based Generative Modeling}{section.4}{}}
-\newlabel{sec:score-appr-manif@cref}{{[section][4][]4}{[1][3][]3}}
-\csxdef {autonum@tab:differenceReferenced}{}
-\citation{hsu2002stochastic}
-\citation{hsu2002stochastic}
-\citation{urakawa2006convergence}
-\citation{saloff1994precise}
-\citation{song2020score,song2019generative}
-\citation{jorgensen1975central}
-\csxdef {autonum@thm:time_reversal_manifoldReferenced}{}
-\@writefile{lot}{\contentsline {table}{\numberline {1}{\ignorespaces \small  Differences between SGM on Euclidean spaces and RSGM on compact Riemannian manifolds.\relax }}{4}{table.caption.1}\protected@file@percent }
-\providecommand*\caption@xref[2]{\@setref\relax\@undefined{#1}}
-\newlabel{tab:difference}{{1}{4}{\small Differences between SGM on Euclidean spaces and RSGM on compact Riemannian manifolds.\relax }{table.caption.1}{}}
-\newlabel{tab:difference@cref}{{[table][1][]1}{[1][3][]4}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {4.1}Brownian motion on compact Riemannian manifolds}{4}{subsection.4.1}\protected@file@percent }
-\newlabel{sec:brown-moti-comp}{{4.1}{4}{Brownian motion on compact Riemannian manifolds}{subsection.4.1}{}}
-\newlabel{sec:brown-moti-comp@cref}{{[subsection][1][4]4.1}{[1][3][]4}}
-\@writefile{toc}{\contentsline {paragraph}{Brownian motion and uniform distribution}{4}{section*.2}\protected@file@percent }
-\csxdef {autonum@eq:forward_SDEReferenced}{}
-\csxdef {autonum@sec:notationReferenced}{}
-\csxdef {autonum@sec:brown-moti-manifReferenced}{}
-\newlabel{prop:brownian_conv}{{2}{4}{Convergence of Brownian motion \cite [Proposition 2.6]{urakawa2006convergence}}{proposition.2}{}}
-\newlabel{prop:brownian_conv@cref}{{[proposition][2][]2}{[1][4][]4}}
-\csxdef {autonum@prop:brownian_convReferenced}{}
-\@writefile{toc}{\contentsline {paragraph}{Sampling from diffusions}{4}{section*.3}\protected@file@percent }
-\citation{lee2013smooth}
-\citation{jorgensen1975central}
-\citation{gunther1991isometric}
-\@writefile{loa}{\contentsline {algorithm}{\numberline {1}{\ignorespaces \small  Geodesic Random Walk (GRW)\relax }}{5}{algorithm.1}\protected@file@percent }
-\newlabel{alg:grw}{{1}{5}{\small Geodesic Random Walk (GRW)\relax }{algorithm.1}{}}
-\newlabel{alg:grw@cref}{{[algorithm][1][]1}{[1][5][]5}}
-\csxdef {autonum@sec:metr-conn-tensReferenced}{}
-\newlabel{eq:generic}{{3}{5}{Sampling from diffusions}{equation.4.3}{}}
-\newlabel{eq:generic@cref}{{[equation][3][]3}{[1][5][]5}}
-\csxdef {autonum@eq:genericReferenced}{}
-\newlabel{thm:grw_diffusion}{{4}{5}{Convergence of geodesic random walk \cite [Theorem 2.1]{jorgensen1975central}}{theorem.4}{}}
-\newlabel{thm:grw_diffusion@cref}{{[theorem][4][]4}{[1][5][]5}}
-\csxdef {autonum@thm:jorgensen_appendixReferenced}{}
-\csxdef {autonum@thm:grw_diffusionReferenced}{}
-\csxdef {autonum@alg:grwReferenced}{}
-\csxdef {autonum@alg:grwReferenced}{}
-\csxdef {autonum@sec:stoch-diff-equatReferenced}{}
-\citation{saloff1994precise}
-\citation{jones2008Manifold}
-\citation{saloff1994precise}
-\citation{bismut1984large}
-\citation{chen2021logarithmic}
-\citation{lee2018introduction}
-\citation{lee2006riemannian}
-\citation{cattiaux2021time}
-\citation{haussmann1986time}
-\citation{cattiaux2021time}
-\@writefile{toc}{\contentsline {paragraph}{Heat kernel on compact Riemannian manifolds}{6}{section*.4}\protected@file@percent }
-\newlabel{eq:infinite_sum}{{4}{6}{Heat kernel on compact Riemannian manifolds}{equation.4.4}{}}
-\newlabel{eq:infinite_sum@cref}{{[equation][4][]4}{[1][6][]6}}
-\csxdef {autonum@eq:infinite_sumReferenced}{}
-\csxdef {autonum@sec:eigenf-eigenv-laplReferenced}{}
-\@writefile{tdo}{\contentsline {todo}{{\bf  VDB:} experiment: quality o the approximation as a function of $J$ and $t$. On the same graph put the Varadhan approx}{6}{section*.5}\protected@file@percent }
-\pgfsyspdfmark {pgfid1}{21446816}{31303498}
-\pgfsyspdfmark {pgfid4}{36739180}{31339049}
-\pgfsyspdfmark {pgfid5}{38401010}{31091875}
-\newlabel{eq:varadhan}{{5}{6}{Heat kernel on compact Riemannian manifolds}{equation.4.5}{}}
-\newlabel{eq:varadhan@cref}{{[equation][5][]5}{[1][6][]6}}
-\csxdef {autonum@sec:riem-score-apprReferenced}{}
-\@writefile{toc}{\contentsline {subsection}{\numberline {4.2}A manifold time-reversal formula}{6}{subsection.4.2}\protected@file@percent }
-\newlabel{sec:time-revers-form}{{4.2}{6}{A manifold time-reversal formula}{subsection.4.2}{}}
-\newlabel{sec:time-revers-form@cref}{{[subsection][2][4]4.2}{[1][6][]6}}
-\csxdef {autonum@sec:time-reversalReferenced}{}
-\csxdef {autonum@eq:backward_SDEReferenced}{}
-\newlabel{thm:time_reversal_manifold}{{5}{6}{Reverse diffusion}{theorem.5}{}}
-\newlabel{thm:time_reversal_manifold@cref}{{[theorem][5][]5}{[1][6][]6}}
-\newlabel{eq:time_reversal_manifold}{{6}{6}{Reverse diffusion}{equation.4.6}{}}
-\newlabel{eq:time_reversal_manifold@cref}{{[equation][6][]6}{[1][6][]6}}
-\citation{hutchinson1989stochastic}
-\citation{song2020score,song2020improved,song2020denoising,ho2020denoising}
-\citation{goto2021approximated,schiela2020sqp}
-\citation{zhu2020riemannian,sato2019riemannian}
-\csxdef {autonum@sec:time-reversalReferenced}{}
-\csxdef {autonum@thm:time_reversal_manifoldReferenced}{}
-\csxdef {autonum@thm:time_reversal_manifoldReferenced}{}
-\csxdef {autonum@thm:grw_diffusionReferenced}{}
-\@writefile{toc}{\contentsline {subsection}{\numberline {4.3}Score approximation on Riemannian manifolds}{7}{subsection.4.3}\protected@file@percent }
-\newlabel{sec:riem-score-appr}{{4.3}{7}{Score approximation on Riemannian manifolds}{subsection.4.3}{}}
-\newlabel{sec:riem-score-appr@cref}{{[subsection][3][4]4.3}{[1][7][]7}}
-\csxdef {autonum@eq:time_reversal_manifoldReferenced}{}
-\@writefile{toc}{\contentsline {paragraph}{Score-matching and loss functions}{7}{section*.6}\protected@file@percent }
-\newlabel{prop:implicit_der}{{7}{7}{}{proposition.7}{}}
-\newlabel{prop:implicit_der@cref}{{[proposition][7][]7}{[1][7][]7}}
-\csxdef {autonum@sec:brown-moti-compReferenced}{}
-\csxdef {autonum@sec:brown-moti-compReferenced}{}
-\csxdef {autonum@eq:varadhanReferenced}{}
-\citation{hutchinson1989stochastic}
-\citation{rozen2021moser}
-\citation{rozen2021moser}
-\citation{falorsi2020neural}
-\citation{song2020score}
-\citation{mathieu2020riemannian}
-\csxdef {autonum@tab:sm_lossesReferenced}{}
-\@writefile{lot}{\contentsline {table}{\numberline {2}{\ignorespaces \small  Riemannian score matching losses.\relax }}{8}{table.caption.7}\protected@file@percent }
-\newlabel{tab:sm_losses}{{2}{8}{\small Riemannian score matching losses.\relax }{table.caption.7}{}}
-\newlabel{tab:sm_losses@cref}{{[table][2][]2}{[1][8][]8}}
-\@writefile{toc}{\contentsline {paragraph}{Parametric family of vector fields}{8}{section*.8}\protected@file@percent }
-\@writefile{tdo}{\contentsline {todo}{{\bf  VDB:} I think this is needed? Maybe not}{8}{section*.9}\protected@file@percent }
-\pgfsyspdfmark {pgfid6}{22327234}{25121870}
-\pgfsyspdfmark {pgfid9}{36739180}{25157421}
-\pgfsyspdfmark {pgfid10}{38401010}{24910247}
-\@writefile{tdo}{\contentsline {todo}{{\bf  EM:} We do not discuss NN architectural choices for $\{s_\theta ^i\}_i$ but can do for the next iteration.}{8}{section*.10}\protected@file@percent }
-\pgfsyspdfmark {pgfid11}{4736286}{14723858}
-\pgfsyspdfmark {pgfid14}{36739180}{14759409}
-\pgfsyspdfmark {pgfid15}{38401010}{14512235}
-\csxdef {autonum@sec:time-revers-formReferenced}{}
-\csxdef {autonum@sec:brown-moti-compReferenced}{}
-\csxdef {autonum@alg:rsgmReferenced}{}
-\@writefile{toc}{\contentsline {subsection}{\numberline {4.4}Likelihood computation}{8}{subsection.4.4}\protected@file@percent }
-\newlabel{sec:likel-comp}{{4.4}{8}{Likelihood computation}{subsection.4.4}{}}
-\newlabel{sec:likel-comp@cref}{{[subsection][4][4]4.4}{[1][8][]8}}
-\citation{mathieu2020riemannian}
-\citation{brehmer2020flows,kalatzis2021multi,caterini2021Rectangular}
-\citation{navarro2017multivariate}
-\citation{collett1981Discriminating}
-\citation{mathieu2019continuous,nagano2019wrapped}
-\citation{fisher1953dispersion}
-\citation{kent1982fisher}
-\citation{peel2001fitting,mardia2008multivariate}
-\citation{papamakarios2019normalizing}
-\citation{falorsi2019reparameterizing}
-\citation{bose2020latent}
-\@writefile{loa}{\contentsline {algorithm}{\numberline {2}{\ignorespaces \small  Computation of the loss\relax }}{9}{algorithm.2}\protected@file@percent }
-\newlabel{alg:rsgm}{{2}{9}{\small Computation of the loss\relax }{algorithm.2}{}}
-\newlabel{alg:rsgm@cref}{{[algorithm][2][]2}{[1][8][]9}}
-\csxdef {autonum@alg:grwReferenced}{}
-\newlabel{eq:backward_flow}{{7}{9}{Likelihood computation}{equation.4.7}{}}
-\newlabel{eq:backward_flow@cref}{{[equation][7][]7}{[1][9][]9}}
-\csxdef {autonum@eq:backward_flowReferenced}{}
-\csxdef {autonum@sec:diff-betw-odeReferenced}{}
-\@writefile{toc}{\contentsline {section}{\numberline {5}Related work}{9}{section.5}\protected@file@percent }
-\newlabel{sec:related-works}{{5}{9}{Related work}{section.5}{}}
-\newlabel{sec:related-works@cref}{{[section][5][]5}{[1][9][]9}}
-\@writefile{toc}{\contentsline {paragraph}{Parametric family of distributions.}{9}{section*.11}\protected@file@percent }
-\@writefile{tdo}{\contentsline {todo}{{\bf  VDB:} this is the same thing as push forward of Euclidean NF?}{9}{section*.12}\protected@file@percent }
-\pgfsyspdfmark {pgfid16}{17103517}{9561644}
-\pgfsyspdfmark {pgfid19}{36739180}{9597195}
-\pgfsyspdfmark {pgfid20}{38401010}{9350021}
-\citation{falorsi2020neural,mathieu2020riemannian,falorsi2021Continuous}
-\citation{grathwohl2019Scalable}
-\citation{rozen2021moser}
-\citation{sei2013jacobian}
-\citation{ambrosio2003Optimal}
-\citation{rezende2021Implicit,cohen2021riemannian}
-\citation{rezende2020Normalizing}
-\citation{volcanoe_dataset}
-\citation{earthquake_dataset}
-\citation{flood_dataset}
-\citation{fire_dataset}
-\citation{mathieu2020riemannian}
-\citation{rozen2021moser}
-\citation{peel2001fitting}
-\citation{gemici2016normalizing}
-\@writefile{toc}{\contentsline {paragraph}{Push-forward of Euclidean normalizing flows.}{10}{section*.13}\protected@file@percent }
-\@writefile{toc}{\contentsline {paragraph}{Neural ODE on manifolds.}{10}{section*.14}\protected@file@percent }
-\@writefile{toc}{\contentsline {paragraph}{Optimal transport on manifolds.}{10}{section*.15}\protected@file@percent }
-\@writefile{toc}{\contentsline {section}{\numberline {6}Experiments}{10}{section.6}\protected@file@percent }
-\newlabel{sec:experiments}{{6}{10}{Experiments}{section.6}{}}
-\newlabel{sec:experiments@cref}{{[section][6][]6}{[1][10][]10}}
-\csxdef {autonum@sec:exp_detailReferenced}{}
-\csxdef {autonum@tab:geoscienceReferenced}{}
-\@writefile{lot}{\contentsline {table}{\numberline {3}{\ignorespaces  Negative log-likelihood scores for each method on the earth and climate science datasets. Bold indicates statistically significant best method. Means and standard deviations are computed over 5 different runs. \relax }}{10}{table.caption.16}\protected@file@percent }
-\newlabel{tab:geoscience}{{3}{10}{Negative log-likelihood scores for each method on the earth and climate science datasets. Bold indicates statistically significant best method. Means and standard deviations are computed over 5 different runs. \relax }{table.caption.16}{}}
-\newlabel{tab:geoscience@cref}{{[table][3][]3}{[1][10][]10}}
-\citation{ungar2005Einstein}
-\citation{kawar2021snips,kawar2021stochastic,lee2021priorgrad,sinha2021d2c,batzolis2021conditional,chung2021come}
-\bibstyle{apalike}
-\bibdata{bibliography}
-\bibcite{ambrosio2003Optimal}{{1}{2003}{{Ambrosio}}{{}}}
-\bibcite{atkinson2012spherical}{{2}{2012}{{Atkinson and Han}}{{}}}
-\bibcite{bao2022analyticdpm}{{3}{2022}{{Bao et~al.}}{{}}}
-\bibcite{batzolis2021conditional}{{4}{2021}{{Batzolis et~al.}}{{}}}
-\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces  Trained score-based generative models on earth sciences data. The learned density is colored green-blue. Blue and red dots represent training and testing datapoints, respectively. \relax }}{11}{figure.caption.17}\protected@file@percent }
-\newlabel{fig:geoscience}{{1}{11}{Trained score-based generative models on earth sciences data. The learned density is colored green-blue. Blue and red dots represent training and testing datapoints, respectively. \relax }{figure.caption.17}{}}
-\newlabel{fig:geoscience@cref}{{[figure][1][]1}{[1][10][]11}}
-\@writefile{toc}{\contentsline {section}{\numberline {7}Discussion and limitations}{11}{section.7}\protected@file@percent }
-\newlabel{sec:conclusion}{{7}{11}{Discussion and limitations}{section.7}{}}
-\newlabel{sec:conclusion@cref}{{[section][7][]7}{[1][10][]11}}
-\@writefile{tdo}{\contentsline {todo}{{\bf  VDB:} universal ?}{11}{section*.18}\protected@file@percent }
-\pgfsyspdfmark {pgfid21}{17244633}{20058558}
-\pgfsyspdfmark {pgfid24}{36739180}{20094109}
-\pgfsyspdfmark {pgfid25}{38401010}{19846935}
-\@writefile{tdo}{\contentsline {todo}{{\bf  EM:} Should write a paragraph in the app on the conditional extension}{11}{section*.19}\protected@file@percent }
-\pgfsyspdfmark {pgfid26}{28404717}{14946750}
-\pgfsyspdfmark {pgfid29}{36739180}{14982301}
-\pgfsyspdfmark {pgfid30}{38401010}{14735127}
-\bibcite{bismut1984large}{{5}{1984}{{Bismut}}{{}}}
-\bibcite{boomsma2008generative}{{6}{2008}{{Boomsma et~al.}}{{}}}
-\bibcite{bose2020latent}{{7}{2020}{{Bose et~al.}}{{}}}
-\bibcite{flood_dataset}{{8}{2017}{{Brakenridge}}{{}}}
-\bibcite{brehmer2020flows}{{9}{2020}{{Brehmer and Cranmer}}{{}}}
-\bibcite{caterini2021Rectangular}{{10}{2021}{{Caterini et~al.}}{{}}}
-\bibcite{cattiaux2021time}{{11}{2021}{{Cattiaux et~al.}}{{}}}
-\bibcite{chen2021likelihood}{{12}{2021a}{{Chen et~al.}}{{}}}
-\bibcite{chen2021logarithmic}{{13}{2021b}{{Chen et~al.}}{{}}}
-\bibcite{chen2016entropic}{{14}{2016}{{Chen et~al.}}{{}}}
-\bibcite{chung2021come}{{15}{2021}{{Chung et~al.}}{{}}}
-\bibcite{cohen2021riemannian}{{16}{2021}{{Cohen et~al.}}{{}}}
-\bibcite{collett1981Discriminating}{{17}{1981}{{Collett and Lewis}}{{}}}
-\bibcite{debortoli2021neurips}{{18}{2021}{{De~Bortoli et~al.}}{{}}}
-\bibcite{nichol2021beatgans}{{19}{2021}{{Dhariwal and Nichol}}{{}}}
-\bibcite{dormand1980family}{{20}{1980}{{Dormand and Prince}}{{}}}
-\bibcite{durkan2021maximum}{{21}{2021}{{Durkan and Song}}{{}}}
-\bibcite{fire_dataset}{{22}{2020}{{EOSDIS}}{{}}}
-\bibcite{falorsi2021Continuous}{{23}{2021}{{Falorsi}}{{}}}
-\bibcite{falorsi2019reparameterizing}{{24}{2019}{{Falorsi et~al.}}{{}}}
-\bibcite{falorsi2020neural}{{25}{2020}{{Falorsi and Forr{\'e}}}{{}}}
-\bibcite{federer2014geometric}{{26}{2014}{{Federer}}{{}}}
-\bibcite{feiten2013rigid}{{27}{2013}{{Feiten et~al.}}{{}}}
-\bibcite{fisher1953dispersion}{{28}{1953}{{Fisher}}{{}}}
-\bibcite{garcia2021brenier}{{29}{2021}{{Garc{\'\i }a-Zelada and Huguet}}{{}}}
-\bibcite{gemici2016normalizing}{{30}{2016}{{Gemici et~al.}}{{}}}
-\bibcite{goto2021approximated}{{31}{2021}{{Goto and Sato}}{{}}}
-\bibcite{grathwohl2019Scalable}{{32}{2019}{{Grathwohl et~al.}}{{}}}
-\bibcite{gunther1991isometric}{{33}{1991}{{Gunther}}{{}}}
-\bibcite{hamelryck2006sampling}{{34}{2006}{{Hamelryck et~al.}}{{}}}
-\bibcite{haussmann1986time}{{35}{1986}{{Haussmann and Pardoux}}{{}}}
-\bibcite{he2013lower}{{36}{2013}{{He}}{{}}}
-\bibcite{ho2020denoising}{{37}{2020}{{Ho et~al.}}{{}}}
-\bibcite{hsu2002stochastic}{{38}{2002}{{Hsu}}{{}}}
-\bibcite{huang2021variational}{{39}{2021}{{Huang et~al.}}{{}}}
-\bibcite{hutchinson1989stochastic}{{40}{1989}{{Hutchinson}}{{}}}
-\bibcite{hyvarinen2005estimation}{{41}{2005}{{Hyv{\"a}rinen and Dayan}}{{}}}
-\bibcite{ikeda1989sto}{{42}{1989}{{Ikeda and Watanabe}}{{}}}
-\bibcite{jolicoeur2021gotta}{{43}{2021a}{{Jolicoeur-Martineau et~al.}}{{}}}
-\bibcite{jolicoeur2020adversarial}{{44}{2021b}{{Jolicoeur-Martineau et~al.}}{{}}}
-\bibcite{jones2008Manifold}{{45}{2008}{{Jones et~al.}}{{}}}
-\bibcite{jorgensen1975central}{{46}{1975}{{J{\o }rgensen}}{{}}}
-\bibcite{kalatzis2021multi}{{47}{2021}{{Kalatzis et~al.}}{{}}}
-\bibcite{karpatne2018machine}{{48}{2018}{{Karpatne et~al.}}{{}}}
-\bibcite{kawar2021snips}{{49}{2021a}{{Kawar et~al.}}{{}}}
-\bibcite{kawar2021stochastic}{{50}{2021b}{{Kawar et~al.}}{{}}}
-\bibcite{kent1982fisher}{{51}{1982}{{Kent}}{{}}}
-\bibcite{kingma2015Adam}{{52}{2015}{{Kingma and Ba}}{{}}}
-\bibcite{klimovskaia2020poincare}{{53}{2020}{{Klimovskaia et~al.}}{{}}}
-\bibcite{kloeden:platen:2011}{{54}{2011}{{Kloeden and Platen}}{{}}}
-\bibcite{kobayashi1963foundations}{{55}{1963}{{Kobayashi and Nomizu}}{{}}}
-\bibcite{kolar2013natural}{{56}{2013}{{Kol{\'a}r et~al.}}{{}}}
-\bibcite{kurtz1995stratonovich}{{57}{1995}{{Kurtz et~al.}}{{}}}
-\bibcite{lee2010introduction}{{58}{2010}{{Lee}}{{}}}
-\bibcite{lee2006riemannian}{{59}{2006}{{Lee}}{{}}}
-\bibcite{lee2013smooth}{{60}{2013}{{Lee}}{{}}}
-\bibcite{lee2018introduction}{{61}{2018}{{Lee}}{{}}}
-\bibcite{lee2021priorgrad}{{62}{2021}{{Lee et~al.}}{{}}}
-\bibcite{leobacher2021existence}{{63}{2021}{{Leobacher and Steinicke}}{{}}}
-\bibcite{leonard2012schrodinger}{{64}{2012a}{{L{\'e}onard}}{{}}}
-\bibcite{leonard2012girsanov}{{65}{2012b}{{L{\'e}onard}}{{}}}
-\bibcite{leonard2014reciprocal}{{66}{2014}{{L{\'e}onard et~al.}}{{}}}
-\bibcite{li1986large}{{67}{1986}{{Li}}{{}}}
-\bibcite{lou2020neural}{{68}{2020}{{Lou et~al.}}{{}}}
-\bibcite{lui2012advances}{{69}{2012}{{Lui}}{{}}}
-\bibcite{mardia2008multivariate}{{70}{2008}{{Mardia et~al.}}{{}}}
-\bibcite{mardia2007protein}{{71}{2007}{{Mardia et~al.}}{{}}}
-\bibcite{mathieu2019continuous}{{72}{2019}{{Mathieu et~al.}}{{}}}
-\bibcite{mathieu2020riemannian}{{73}{2020}{{Mathieu and Nickel}}{{}}}
-\bibcite{nagano2019wrapped}{{74}{2019}{{Nagano et~al.}}{{}}}
-\bibcite{navarro2017multivariate}{{75}{2017}{{Navarro et~al.}}{{}}}
-\bibcite{earthquake_dataset}{{76}{NGDC/WDS}{{}}{{, 2022a}}}
-\bibcite{volcanoe_dataset}{{77}{NGDC/WDS}{{}}{{, 2022b}}}
-\bibcite{nichol2021improved}{{78}{2021}{{Nichol and Dhariwal}}{{}}}
-\bibcite{nutz2022stability}{{79}{2022}{{Nutz and Wiesel}}{{}}}
-\bibcite{papamakarios2019normalizing}{{80}{2019}{{Papamakarios et~al.}}{{}}}
-\bibcite{peel2001fitting}{{81}{2001}{{Peel et~al.}}{{}}}
-\bibcite{peyre2019computational}{{82}{2019}{{Peyr{\'e} and Cuturi}}{{}}}
-\bibcite{revuz1999continuous}{{83}{1999}{{Revuz and Yor}}{{}}}
-\bibcite{rezende2020Normalizing}{{84}{2020}{{Rezende et~al.}}{{}}}
-\bibcite{rezende2021Implicit}{{85}{2021}{{Rezende and Racani{\`e}re}}{{}}}
-\bibcite{roy2007learning}{{86}{2007}{{Roy et~al.}}{{}}}
-\bibcite{rozen2021moser}{{87}{2021}{{Rozen et~al.}}{{}}}
-\bibcite{saloff1994precise}{{88}{1994}{{Saloff-Coste}}{{}}}
-\bibcite{sato2019riemannian}{{89}{2019}{{Sato et~al.}}{{}}}
-\bibcite{schiela2020sqp}{{90}{2020}{{Schiela and Ortiz}}{{}}}
-\bibcite{schrodinger1932theorie}{{91}{1932}{{Schr{\"o}dinger}}{{}}}
-\bibcite{sei2013jacobian}{{92}{2013}{{Sei}}{{}}}
-\bibcite{senanayake2018directional}{{93}{2018}{{Senanayake and Ramos}}{{}}}
-\bibcite{shapovalov2011smoothed}{{94}{2011}{{Shapovalov and Dunbrack~Jr}}{{}}}
-\bibcite{sinha2021d2c}{{95}{2021}{{Sinha et~al.}}{{}}}
-\bibcite{sinkhorn1967diagonal}{{96}{1967}{{Sinkhorn}}{{}}}
-\bibcite{song2020denoising}{{97}{2020}{{Song et~al.}}{{}}}
-\bibcite{song2019generative}{{98}{2019}{{Song and Ermon}}{{}}}
-\bibcite{song2020improved}{{99}{2020}{{Song and Ermon}}{{}}}
-\bibcite{song2020score}{{100}{2021}{{Song et~al.}}{{}}}
-\bibcite{steyvers2005large}{{101}{2005}{{Steyvers and Tenenbaum}}{{}}}
-\bibcite{ungar2005Einstein}{{102}{2005}{{Ungar}}{{}}}
-\bibcite{urakawa2006convergence}{{103}{2006}{{Urakawa}}{{}}}
-\bibcite{vargas2021solving}{{104}{2021}{{Vargas et~al.}}{{}}}
-\bibcite{vincent2011connection}{{105}{2011}{{Vincent}}{{}}}
-\bibcite{watson2021learning}{{106}{2021}{{Watson et~al.}}{{}}}
-\bibcite{zhu2020riemannian}{{107}{2020}{{Zhu and Sato}}{{}}}
-\citation{hsu2002stochastic}
-\citation{hsu2002stochastic,lee2018introduction,lee2006riemannian}
-\citation{lee2010introduction,lee2013smooth}
-\citation{lee2013smooth}
-\citation{lee2018introduction}
-\@writefile{toc}{\contentsline {section}{\numberline {S1}Organization of the supplementary}{1}{appendix.A}\protected@file@percent }
-\newlabel{sec:organ-suppl}{{S1}{1}{Organization of the supplementary}{appendix.A}{}}
-\newlabel{sec:organ-suppl@cref}{{[appendix][1][2147483647]S1}{[1][1][]1}}
-\csxdef {autonum@thm:time_reversal_manifoldReferenced}{}
-\csxdef {autonum@sec:prel-stoch-riemReferenced}{}
-\csxdef {autonum@sec:diff-betw-odeReferenced}{}
-\csxdef {autonum@sec:eigenf-eigenv-laplReferenced}{}
-\csxdef {autonum@sec:time-reversalReferenced}{}
-\csxdef {autonum@thm:time_reversal_manifoldReferenced}{}
-\@writefile{toc}{\contentsline {section}{\numberline {S2}Preliminaries on stochastic Riemannian geometry}{1}{appendix.B}\protected@file@percent }
-\newlabel{sec:prel-stoch-riem}{{S2}{1}{Preliminaries on stochastic Riemannian geometry}{appendix.B}{}}
-\newlabel{sec:prel-stoch-riem@cref}{{[appendix][2][2147483647]S2}{[1][1][]1}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {S2.1}Tensor field, metric, connection and transport}{1}{subsection.B.1}\protected@file@percent }
-\newlabel{sec:metr-conn-tens}{{S2.1}{1}{Tensor field, metric, connection and transport}{subsection.B.1}{}}
-\newlabel{sec:metr-conn-tens@cref}{{[subappendix][1][2147483647,2]S2.1}{[1][1][]1}}
-\@writefile{toc}{\contentsline {paragraph}{Tensor field and Riemannian metric}{1}{section*.21}\protected@file@percent }
-\@writefile{toc}{\contentsline {paragraph}{Connection}{1}{section*.22}\protected@file@percent }
-\citation{lee2018introduction}
-\citation{lee2018introduction}
-\citation{gunther1991isometric}
-\citation{leobacher2021existence}
-\citation{leobacher2021existence}
-\citation{kloeden:platen:2011}
-\citation{revuz1999continuous}
-\@writefile{toc}{\contentsline {paragraph}{Parallel transport, geodesics and exponential mapping}{2}{section*.23}\protected@file@percent }
-\newlabel{eq:parallel_transport}{{S1}{2}{Parallel transport, geodesics and exponential mapping}{equation.B.1}{}}
-\newlabel{eq:parallel_transport@cref}{{[equation][1][2147483647]S1}{[1][2][]2}}
-\csxdef {autonum@eq:parallel_transportReferenced}{}
-\csxdef {autonum@sec:frame-bundle-orthReferenced}{}
-\@writefile{toc}{\contentsline {paragraph}{Orthogonal projection}{2}{section*.24}\protected@file@percent }
-\@writefile{toc}{\contentsline {subsection}{\numberline {S2.2}Stochastic Differential Equations on manifolds}{2}{subsection.B.2}\protected@file@percent }
-\newlabel{sec:stoch-diff-equat}{{S2.2}{2}{Stochastic Differential Equations on manifolds}{subsection.B.2}{}}
-\newlabel{sec:stoch-diff-equat@cref}{{[subappendix][2][2147483647,2]S2.2}{[1][2][]2}}
-\@writefile{toc}{\contentsline {paragraph}{Stratanovitch integral}{2}{section*.25}\protected@file@percent }
-\citation{kurtz1995stratonovich}
-\citation{revuz1999continuous}
-\citation{hsu2002stochastic}
-\citation{hsu2002stochastic}
-\citation{kolar2013natural}
-\citation{kobayashi1963foundations}
-\@writefile{toc}{\contentsline {paragraph}{SDEs on manifolds}{3}{section*.26}\protected@file@percent }
-\@writefile{toc}{\contentsline {subsection}{\numberline {S2.3}Frame bundle and orthonormal frame bundle}{3}{subsection.B.3}\protected@file@percent }
-\newlabel{sec:frame-bundle-orth}{{S2.3}{3}{Frame bundle and orthonormal frame bundle}{subsection.B.3}{}}
-\newlabel{sec:frame-bundle-orth@cref}{{[subappendix][3][2147483647,2]S2.3}{[1][3][]3}}
-\citation{kolar2013natural}
-\citation{hsu2002stochastic}
-\citation{gunther1991isometric}
-\citation{hsu2002stochastic}
-\citation{hsu2002stochastic}
-\citation{hsu2002stochastic}
-\newlabel{eq:horizontal_lift}{{S2}{4}{Frame bundle and orthonormal frame bundle}{equation.B.2}{}}
-\newlabel{eq:horizontal_lift@cref}{{[equation][2][2147483647]S2}{[1][4][]4}}
-\csxdef {autonum@eq:horizontal_liftReferenced}{}
-\@writefile{toc}{\contentsline {subsection}{\numberline {S2.4}Horizontal lift and stochastic development}{4}{subsection.B.4}\protected@file@percent }
-\newlabel{sec:horiz-lift-stoch}{{S2.4}{4}{Horizontal lift and stochastic development}{subsection.B.4}{}}
-\newlabel{sec:horiz-lift-stoch@cref}{{[subappendix][4][2147483647,2]S2.4}{[1][4][]4}}
-\citation{federer2014geometric}
-\citation{lee2018introduction}
-\citation{gunther1991isometric}
-\citation{hsu2002stochastic}
-\citation{hsu2002stochastic}
-\@writefile{toc}{\contentsline {subsection}{\numberline {S2.5}Brownian motion on manifolds}{5}{subsection.B.5}\protected@file@percent }
-\newlabel{sec:brown-moti-manif}{{S2.5}{5}{Brownian motion on manifolds}{subsection.B.5}{}}
-\newlabel{sec:brown-moti-manif@cref}{{[subappendix][5][2147483647,2]S2.5}{[1][5][]5}}
-\@writefile{toc}{\contentsline {paragraph}{Gradient, divergence and Laplace operators}{5}{section*.27}\protected@file@percent }
-\@writefile{toc}{\contentsline {paragraph}{Alternatives definitions of Brownian motion}{5}{section*.28}\protected@file@percent }
-\newlabel{prop:intrinsic_brownian}{{S11}{5}{Intrinsic view of Brownian motion}{proposition.11}{}}
-\newlabel{prop:intrinsic_brownian@cref}{{[proposition][11][2147483647]S11}{[1][5][]5}}
-\citation{gunther1991isometric}
-\citation{hsu2002stochastic}
-\citation{ikeda1989sto}
-\citation{jorgensen1975central}
-\citation{jorgensen1975central}
-\newlabel{prop:extrinsic_brownian}{{S12}{6}{Extrinsic view of Brownian motion}{proposition.12}{}}
-\newlabel{prop:extrinsic_brownian@cref}{{[proposition][12][2147483647]S12}{[1][6][]6}}
-\csxdef {autonum@prop:extrinsic_brownianReferenced}{}
-\csxdef {autonum@eq:horizontal_liftReferenced}{}
-\csxdef {autonum@prop:intrinsic_brownianReferenced}{}
-\@writefile{toc}{\contentsline {paragraph}{Brownian motion and random walks}{6}{section*.29}\protected@file@percent }
-\citation{jorgensen1975central}
-\citation{urakawa2006convergence}
-\citation{he2013lower}
-\citation{li1986large}
-\citation{song2020score}
-\newlabel{thm:jorgensen_appendix}{{S14}{7}{Convergence of geodesic random walks}{theorem.14}{}}
-\newlabel{thm:jorgensen_appendix@cref}{{[theorem][14][2147483647]S14}{[1][7][]7}}
-\@writefile{toc}{\contentsline {paragraph}{Convergence of Brownian motion}{7}{section*.30}\protected@file@percent }
-\newlabel{prop:brownian_conv_repeat}{{S15}{7}{Concergence of Brownian motion}{proposition.15}{}}
-\newlabel{prop:brownian_conv_repeat@cref}{{[proposition][15][2147483647]S15}{[1][7][]7}}
-\citation{saloff1994precise}
-\citation{atkinson2012spherical}
-\@writefile{toc}{\contentsline {section}{\numberline {S3}Difference between ODE and SDE likelihood computations}{8}{appendix.C}\protected@file@percent }
-\newlabel{sec:diff-betw-ode}{{S3}{8}{Difference between ODE and SDE likelihood computations}{appendix.C}{}}
-\newlabel{sec:diff-betw-ode@cref}{{[appendix][3][2147483647]S3}{[1][7][]8}}
-\newlabel{eq:forward}{{S3}{8}{Difference between ODE and SDE likelihood computations}{equation.C.3}{}}
-\newlabel{eq:forward@cref}{{[equation][3][2147483647]S3}{[1][8][]8}}
-\@writefile{toc}{\contentsline {paragraph}{ODE model}{8}{section*.31}\protected@file@percent }
-\csxdef {autonum@eq:forwardReferenced}{}
-\newlabel{eq:backward_flow_appendix}{{S4}{8}{ODE model}{equation.C.4}{}}
-\newlabel{eq:backward_flow_appendix@cref}{{[equation][4][2147483647]S4}{[1][8][]8}}
-\csxdef {autonum@eq:backward_flow_appendixReferenced}{}
-\newlabel{eq:proba_flow_ode}{{S5}{8}{ODE model}{equation.C.5}{}}
-\newlabel{eq:proba_flow_ode@cref}{{[equation][5][2147483647]S5}{[1][8][]8}}
-\@writefile{toc}{\contentsline {paragraph}{SDE model}{8}{section*.32}\protected@file@percent }
-\newlabel{eq:proba_flow_sde}{{S6}{8}{SDE model}{equation.C.6}{}}
-\newlabel{eq:proba_flow_sde@cref}{{[equation][6][2147483647]S6}{[1][8][]8}}
-\csxdef {autonum@eq:proba_flow_odeReferenced}{}
-\csxdef {autonum@eq:proba_flow_sdeReferenced}{}
-\@writefile{toc}{\contentsline {section}{\numberline {S4}Eigenfunctions, eigenvalues of the Laplace-Beltrami operator}{8}{appendix.D}\protected@file@percent }
-\newlabel{sec:eigenf-eigenv-lapl}{{S4}{8}{Eigenfunctions, eigenvalues of the Laplace-Beltrami operator}{appendix.D}{}}
-\newlabel{sec:eigenf-eigenv-lapl@cref}{{[appendix][4][2147483647]S4}{[1][8][]8}}
-\@writefile{toc}{\contentsline {paragraph}{The case of the torus}{8}{section*.33}\protected@file@percent }
-\@writefile{toc}{\contentsline {paragraph}{The case of the sphere}{8}{section*.34}\protected@file@percent }
-\citation{cattiaux2021time}
-\citation{cattiaux2021time}
-\citation{haussmann1986time}
-\citation{cattiaux2021time}
-\citation{haussmann1986time}
-\citation{garcia2021brenier}
-\citation{haussmann1986time}
-\citation{haussmann1986time}
-\@writefile{toc}{\contentsline {section}{\numberline {S5}Time-reversal formula: extension to compact Riemannian manifolds}{9}{appendix.E}\protected@file@percent }
-\newlabel{sec:time-reversal}{{S5}{9}{Time-reversal formula: extension to compact Riemannian manifolds}{appendix.E}{}}
-\newlabel{sec:time-reversal@cref}{{[appendix][5][2147483647]S5}{[1][9][]9}}
-\csxdef {autonum@thm:time_reversal_manifoldReferenced}{}
-\csxdef {autonum@sec:informal-derivationReferenced}{}
-\csxdef {autonum@thm:time_reversal_manifoldReferenced}{}
-\csxdef {autonum@sec:proof-crefthm:tReferenced}{}
-\@writefile{toc}{\contentsline {subsection}{\numberline {S5.1}Informal derivation}{9}{subsection.E.1}\protected@file@percent }
-\newlabel{sec:informal-derivation}{{S5.1}{9}{Informal derivation}{subsection.E.1}{}}
-\newlabel{sec:informal-derivation@cref}{{[subappendix][1][2147483647,5]S5.1}{[1][9][]9}}
-\csxdef {autonum@thm:time_reversal_manifoldReferenced}{}
-\newlabel{eq:martingale_forward}{{S7}{9}{Informal derivation}{equation.E.7}{}}
-\newlabel{eq:martingale_forward@cref}{{[equation][7][2147483647]S7}{[1][9][]9}}
-\newlabel{eq:time_reversal_manifold_haussman}{{S8}{9}{Informal derivation}{equation.E.8}{}}
-\newlabel{eq:time_reversal_manifold_haussman@cref}{{[equation][8][2147483647]S8}{[1][9][]9}}
-\csxdef {autonum@eq:time_reversal_manifold_haussmanReferenced}{}
-\newlabel{eq:time_reversal_manifold_haussman}{{S9}{9}{Informal derivation}{equation.E.9}{}}
-\newlabel{eq:time_reversal_manifold_haussman@cref}{{[equation][9][2147483647]S9}{[1][9][]9}}
-\citation{haussmann1986time}
-\citation{lee2018introduction}
-\citation{cattiaux2021time}
-\citation{cattiaux2021time}
-\citation{cattiaux2021time}
-\citation{cattiaux2021time}
-\citation{cattiaux2021time}
-\newlabel{eq:first_der}{{S10}{10}{Informal derivation}{equation.E.10}{}}
-\newlabel{eq:first_der@cref}{{[equation][10][2147483647]S10}{[1][9][]10}}
-\newlabel{eq:backward_kolmogorov}{{S11}{10}{Informal derivation}{equation.E.11}{}}
-\newlabel{eq:backward_kolmogorov@cref}{{[equation][11][2147483647]S11}{[1][9][]10}}
-\csxdef {autonum@sec:proof-crefthm:tReferenced}{}
-\csxdef {autonum@eq:backward_kolmogorovReferenced}{}
-\newlabel{eq:def_h}{{S12}{10}{Informal derivation}{equation.E.12}{}}
-\newlabel{eq:def_h@cref}{{[equation][12][2147483647]S12}{[1][10][]10}}
-\csxdef {autonum@eq:def_hReferenced}{}
-\csxdef {autonum@eq:martingale_forwardReferenced}{}
-\csxdef {autonum@eq:first_derReferenced}{}
-\csxdef {autonum@eq:time_reversal_manifold_haussmanReferenced}{}
-\csxdef {autonum@thm:time_reversal_manifoldReferenced}{}
-\csxdef {autonum@thm:time_reversal_manifoldReferenced}{}
-\@writefile{toc}{\contentsline {subsection}{\numberline {S5.2}Proof of \autonum@processReference {autonum@referencecrefOld}{forcsvlist}{thm:time_reversal_manifold}}{10}{subsection.E.2}\protected@file@percent }
-\newlabel{sec:proof-crefthm:t}{{S5.2}{10}{Proof of \cref {thm:time_reversal_manifold}}{subsection.E.2}{}}
-\newlabel{sec:proof-crefthm:t@cref}{{[subappendix][2][2147483647,5]S5.2}{[1][10][]10}}
-\csxdef {autonum@sec:diff-proc-stochReferenced}{}
-\csxdef {autonum@sec:girs-theory-compReferenced}{}
-\csxdef {autonum@sec:concluding-proofReferenced}{}
-\citation{leonard2014reciprocal}
-\citation{cattiaux2021time}
-\citation{cattiaux2021time}
-\@writefile{toc}{\contentsline {subsubsection}{\numberline {S5.2.1}Diffusion processes and integration by part formula}{11}{subsubsection.E.2.1}\protected@file@percent }
-\newlabel{sec:diff-proc-stoch}{{S5.2.1}{11}{Diffusion processes and integration by part formula}{subsubsection.E.2.1}{}}
-\newlabel{sec:diff-proc-stoch@cref}{{[subsubappendix][1][2147483647,5,2]S5.2.1}{[1][10][]11}}
-\newlabel{thm:ibp_cattiaux}{{S16}{11}{}{theorem.16}{}}
-\newlabel{thm:ibp_cattiaux@cref}{{[theorem][16][2147483647]S16}{[1][11][]11}}
-\csxdef {autonum@thm:ibp_cattiauxReferenced}{}
-\citation{gunther1991isometric}
-\citation{leonard2012girsanov}
-\citation{leonard2012girsanov}
-\citation{cattiaux2021time}
-\@writefile{toc}{\contentsline {subsubsection}{\numberline {S5.2.2}Girsanov theory on compact Riemannian manifolds}{12}{subsubsection.E.2.2}\protected@file@percent }
-\newlabel{sec:girs-theory-comp}{{S5.2.2}{12}{Girsanov theory on compact Riemannian manifolds}{subsubsection.E.2.2}{}}
-\newlabel{sec:girs-theory-comp@cref}{{[subsubappendix][2][2147483647,5,2]S5.2.2}{[1][11][]12}}
-\newlabel{prop:girsanov_manifold}{{S17}{12}{}{proposition.17}{}}
-\newlabel{prop:girsanov_manifold@cref}{{[proposition][17][2147483647]S17}{[1][12][]12}}
-\@writefile{tdo}{\contentsline {todo}{{\bf  VDB:} prove that for the projection this is okay}{12}{section*.35}\protected@file@percent }
-\pgfsyspdfmark {pgfid31}{25895937}{13726040}
-\pgfsyspdfmark {pgfid34}{36739180}{13761591}
-\pgfsyspdfmark {pgfid35}{38401010}{13514417}
-\newlabel{eq:KL_ineq}{{S13}{12}{Girsanov theory on compact Riemannian manifolds}{equation.E.13}{}}
-\newlabel{eq:KL_ineq@cref}{{[equation][13][2147483647]S13}{[1][12][]12}}
-\@writefile{tdo}{\contentsline {todo}{{\bf  VDB:} dire que c'est okay pour le delta et pour le gradient si on prend u bar = u circ p}{12}{section*.36}\protected@file@percent }
-\pgfsyspdfmark {pgfid36}{23020441}{4843052}
-\pgfsyspdfmark {pgfid39}{36739180}{4878603}
-\pgfsyspdfmark {pgfid40}{38401010}{4631429}
-\citation{cattiaux2021time}
-\citation{lee2018introduction}
-\citation{debortoli2021neurips}
-\citation{schrodinger1932theorie,leonard2012schrodinger,chen2016entropic,debortoli2021neurips}
-\csxdef {autonum@eq:KL_ineqReferenced}{}
-\newlabel{eq:KL_ineq}{{S14}{13}{Girsanov theory on compact Riemannian manifolds}{equation.E.14}{}}
-\newlabel{eq:KL_ineq@cref}{{[equation][14][2147483647]S14}{[1][13][]13}}
-\newlabel{prop:hyp_317}{{S18}{13}{}{proposition.18}{}}
-\newlabel{prop:hyp_317@cref}{{[proposition][18][2147483647]S18}{[1][13][]13}}
-\csxdef {autonum@prop:girsanov_manifoldReferenced}{}
-\csxdef {autonum@thm:ibp_cattiauxReferenced}{}
-\newlabel{prop:cattiaux_spec}{{S19}{13}{}{proposition.19}{}}
-\newlabel{prop:cattiaux_spec@cref}{{[proposition][19][2147483647]S19}{[1][13][]13}}
-\newlabel{eq:equalitu}{{S15}{13}{}{equation.E.15}{}}
-\newlabel{eq:equalitu@cref}{{[equation][15][2147483647]S15}{[1][13][]13}}
-\csxdef {autonum@prop:hyp_317Referenced}{}
-\csxdef {autonum@thm:ibp_cattiauxReferenced}{}
-\csxdef {autonum@eq:equalituReferenced}{}
-\@writefile{toc}{\contentsline {subsubsection}{\numberline {S5.2.3}Concluding the proof}{13}{subsubsection.E.2.3}\protected@file@percent }
-\newlabel{sec:concluding-proof}{{S5.2.3}{13}{Concluding the proof}{subsubsection.E.2.3}{}}
-\newlabel{sec:concluding-proof@cref}{{[subsubappendix][3][2147483647,5,2]S5.2.3}{[1][13][]13}}
-\csxdef {autonum@prop:cattiaux_specReferenced}{}
-\csxdef {autonum@thm:time_reversal_manifoldReferenced}{}
-\citation{leonard2012schrodinger,chen2016entropic,vargas2021solving,debortoli2021neurips,chen2021likelihood}
-\citation{sinkhorn1967diagonal,peyre2019computational}
-\citation{nutz2022stability}
-\citation{debortoli2021neurips,vargas2021solving,chen2021likelihood}
-\citation{debortoli2021neurips}
-\citation{debortoli2021neurips}
-\citation{cattiaux2021time}
-\@writefile{toc}{\contentsline {section}{\numberline {S6}Schr\"odinger Bridges on Manifolds}{14}{appendix.F}\protected@file@percent }
-\newlabel{sec:extension}{{S6}{14}{Schr\"odinger Bridges on Manifolds}{appendix.F}{}}
-\newlabel{sec:extension@cref}{{[appendix][6][2147483647]S6}{[1][13][]14}}
-\@writefile{toc}{\contentsline {paragraph}{Dynamical Schr\"odinger bridges}{14}{section*.37}\protected@file@percent }
-\@writefile{toc}{\contentsline {paragraph}{Riemannian Diffusion Schr\"odinger Bridge}{14}{section*.38}\protected@file@percent }
-\newlabel{prop:continuous_schro}{{S20}{14}{}{proposition.20}{}}
-\newlabel{prop:continuous_schro@cref}{{[proposition][20][2147483647]S20}{[1][14][]14}}
-\citation{lee2018introduction}
-\citation{lee2018introduction}
-\citation{kingma2015Adam}
-\citation{dormand1980family}
-\newlabel{prop:loss_implicit_explicit}{{S21}{15}{}{proposition.21}{}}
-\newlabel{prop:loss_implicit_explicit@cref}{{[proposition][21][2147483647]S21}{[1][14][]15}}
-\csxdef {autonum@prop:implicit_derReferenced}{}
-\@writefile{toc}{\contentsline {section}{\numberline {S7}Proof of \autonum@processReference {autonum@referencecrefOld}{forcsvlist}{prop:implicit_der}}{15}{appendix.G}\protected@file@percent }
-\newlabel{sec:implicit-losses}{{S7}{15}{Proof of \cref {prop:implicit_der}}{appendix.G}{}}
-\newlabel{sec:implicit-losses@cref}{{[appendix][7][2147483647]S7}{[1][15][]15}}
-\@writefile{toc}{\contentsline {section}{\numberline {S8}Experimental detail}{15}{appendix.H}\protected@file@percent }
-\newlabel{sec:exp_detail}{{S8}{15}{Experimental detail}{appendix.H}{}}
-\newlabel{sec:exp_detail@cref}{{[appendix][8][2147483647]S8}{[1][15][]15}}
-\csxdef {autonum@sec:experimentsReferenced}{}
-\@writefile{toc}{\contentsline {paragraph}{Architecture}{15}{section*.39}\protected@file@percent }
-\@writefile{toc}{\contentsline {paragraph}{Optimization}{15}{section*.40}\protected@file@percent }
-\@writefile{toc}{\contentsline {paragraph}{Likelihood evaluation}{15}{section*.41}\protected@file@percent }
diff --git a/doc/main.bbl b/doc/main.bbl
deleted file mode 100644
index 45a9af8..0000000
--- a/doc/main.bbl
+++ /dev/null
@@ -1,613 +0,0 @@
-\begin{thebibliography}{}
-
-\bibitem[Ambrosio, 2003]{ambrosio2003Optimal}
-Ambrosio, L. (2003).
-\newblock Optimal transport maps in {{Monge-Kantorovich}} problem.
-\newblock {\em arXiv preprint arXiv:0304389v1}.
-
-\bibitem[Atkinson and Han, 2012]{atkinson2012spherical}
-Atkinson, K. and Han, W. (2012).
-\newblock {\em Spherical Harmonics and Approximations on the Unit Sphere: An
-  Introduction}, volume 2044.
-\newblock Springer Science \& Business Media.
-
-\bibitem[Bao et~al., 2022]{bao2022analyticdpm}
-Bao, F., Li, C., Zhu, J., and Zhang, B. (2022).
-\newblock Analytic-dpm: an analytic estimate of the optimal reverse variance in
-  diffusion probabilistic models.
-\newblock {\em arXiv preprint arXiv:2201.06503}.
-
-\bibitem[Batzolis et~al., 2021]{batzolis2021conditional}
-Batzolis, G., Stanczuk, J., Sch{\"o}nlieb, C.-B., and Etmann, C. (2021).
-\newblock Conditional image generation with score-based diffusion models.
-\newblock {\em arXiv preprint arXiv:2111.13606}.
-
-\bibitem[Bismut, 1984]{bismut1984large}
-Bismut, J.-M. (1984).
-\newblock Large deviations and the {M}alliavin calculus.
-\newblock {\em Birkhauser Prog. Math.}, 45.
-
-\bibitem[Boomsma et~al., 2008]{boomsma2008generative}
-Boomsma, W., Mardia, K.~V., Taylor, C.~C., Ferkinghoff-Borg, J., Krogh, A., and
-  Hamelryck, T. (2008).
-\newblock A generative, probabilistic model of local protein structure.
-\newblock {\em Proceedings of the National Academy of Sciences},
-  105(26):8932--8937.
-
-\bibitem[Bose et~al., 2020]{bose2020latent}
-Bose, J., Smofsky, A., Liao, R., Panangaden, P., and Hamilton, W. (2020).
-\newblock Latent variable modelling with hyperbolic normalizing flows.
-\newblock In {\em International Conference on Machine Learning}, pages
-  1045--1055. PMLR.
-
-\bibitem[Brakenridge, 2017]{flood_dataset}
-Brakenridge, G. (2017).
-\newblock Global active archive of large flood events.
-\newblock http://floodobservatory.colorado.edu/Archives/index.html.
-
-\bibitem[Brehmer and Cranmer, 2020]{brehmer2020flows}
-Brehmer, J. and Cranmer, K. (2020).
-\newblock Flows for simultaneous manifold learning and density estimation.
-\newblock {\em arXiv preprint arXiv:2003.13913}.
-
-\bibitem[Caterini et~al., 2021]{caterini2021Rectangular}
-Caterini, A.~L., {Loaiza-Ganem}, G., Pleiss, G., and Cunningham, J.~P. (2021).
-\newblock Rectangular flows for manifold learning.
-\newblock {\em arXiv preprint arXiv:2106.01413}.
-
-\bibitem[Cattiaux et~al., 2021]{cattiaux2021time}
-Cattiaux, P., Conforti, G., Gentil, I., and L{\'e}onard, C. (2021).
-\newblock Time reversal of diffusion processes under a finite entropy
-  condition.
-\newblock {\em arXiv preprint arXiv:2104.07708}.
-
-\bibitem[Chen et~al., 2021a]{chen2021likelihood}
-Chen, T., Liu, G.-H., and Theodorou, E.~A. (2021a).
-\newblock Likelihood training of {S}chr\"odinger bridge using forward-backward
-  sdes theory.
-\newblock {\em arXiv preprint arXiv:2110.11291}.
-
-\bibitem[Chen et~al., 2021b]{chen2021logarithmic}
-Chen, X., Li, X.~M., and Wu, B. (2021b).
-\newblock Logarithmic heat kernels: estimates without curvature restrictions.
-\newblock {\em arXiv preprint arXiv:2106.02746}.
-
-\bibitem[Chen et~al., 2016]{chen2016entropic}
-Chen, Y., Georgiou, T., and Pavon, M. (2016).
-\newblock Entropic and displacement interpolation: a computational approach
-  using the {H}ilbert metric.
-\newblock {\em SIAM Journal on Applied Mathematics}, 76(6):2375--2396.
-
-\bibitem[Chung et~al., 2021]{chung2021come}
-Chung, H., Sim, B., and Ye, J.~C. (2021).
-\newblock Come-closer-diffuse-faster: Accelerating conditional diffusion models
-  for inverse problems through stochastic contraction.
-\newblock {\em arXiv preprint arXiv:2112.05146}.
-
-\bibitem[Cohen et~al., 2021]{cohen2021riemannian}
-Cohen, S., Amos, B., and Lipman, Y. (2021).
-\newblock Riemannian convex potential maps.
-\newblock {\em arXiv preprint arXiv:2106.10272}.
-
-\bibitem[Collett and Lewis, 1981]{collett1981Discriminating}
-Collett, D. and Lewis, T. (1981).
-\newblock Discriminating {{Between}} the {{Von Mises}} and {{Wrapped Normal
-  Distributions}}.
-\newblock {\em Australian Journal of Statistics}, 23(1):73--79.
-
-\bibitem[De~Bortoli et~al., 2021]{debortoli2021neurips}
-De~Bortoli, V., Thornton, J., Heng, J., and Doucet, A. (2021).
-\newblock Diffusion {S}chr{\"o}dinger bridge with applications to score-based
-  generative modeling.
-\newblock In {\em Advances in Neural Information Processing Systems}.
-
-\bibitem[Dhariwal and Nichol, 2021]{nichol2021beatgans}
-Dhariwal, P. and Nichol, A. (2021).
-\newblock Diffusion models beat {GAN} on image synthesis.
-\newblock {\em arXiv preprint arXiv:2105.05233}.
-
-\bibitem[Dormand and Prince, 1980]{dormand1980family}
-Dormand, R.~J. and Prince, J.~P. (1980).
-\newblock A family of embedded {{Runge}}-{{Kutta}} formulae.
-\newblock {\em Journal of Computational and Applied Mathematics}, pages 19--26.
-
-\bibitem[Durkan and Song, 2021]{durkan2021maximum}
-Durkan, C. and Song, Y. (2021).
-\newblock On maximum likelihood training of score-based generative models.
-\newblock {\em arXiv preprint arXiv:2101.09258}.
-
-\bibitem[EOSDIS, 2020]{fire_dataset}
-EOSDIS (2020).
-\newblock Land, atmosphere near real-time capability for eos (lance) system
-  operated by nasa’s earth science data and information system (esdis).
-\newblock
-  https://earthdata.nasa.gov/earth-observation-data/near-real-time/firms/active-fire-data.
-
-\bibitem[Falorsi, 2021]{falorsi2021Continuous}
-Falorsi, L. (2021).
-\newblock Continuous normalizing flows on manifolds.
-\newblock {\em arXiv:2104.14959}.
-
-\bibitem[Falorsi et~al., 2019]{falorsi2019reparameterizing}
-Falorsi, L., de~Haan, P., Davidson, T.~R., and Forr{\'e}, P. (2019).
-\newblock Reparameterizing distributions on lie groups.
-\newblock In {\em The 22nd International Conference on Artificial Intelligence
-  and Statistics}, pages 3244--3253. PMLR.
-
-\bibitem[Falorsi and Forr{\'e}, 2020]{falorsi2020neural}
-Falorsi, L. and Forr{\'e}, P. (2020).
-\newblock Neural ordinary differential equations on manifolds.
-\newblock {\em arXiv preprint arXiv:2006.06663}.
-
-\bibitem[Federer, 2014]{federer2014geometric}
-Federer, H. (2014).
-\newblock {\em Geometric Measure Theory}.
-\newblock Springer.
-
-\bibitem[Feiten et~al., 2013]{feiten2013rigid}
-Feiten, W., Lang, M., and Hirche, S. (2013).
-\newblock Rigid motion estimation using mixtures of projected gaussians.
-\newblock In {\em Proceedings of the 16th International Conference on
-  Information Fusion}, pages 1465--1472. IEEE.
-
-\bibitem[Fisher, 1953]{fisher1953dispersion}
-Fisher, R.~A. (1953).
-\newblock Dispersion on a sphere.
-\newblock {\em Proceedings of the Royal Society of London. Series A.
-  Mathematical and Physical Sciences}, 217(1130):295--305.
-
-\bibitem[Garc{\'\i}a-Zelada and Huguet, 2021]{garcia2021brenier}
-Garc{\'\i}a-Zelada, D. and Huguet, B. (2021).
-\newblock Brenier--{S}chr{\"o}dinger problem on compact manifolds with
-  boundary.
-\newblock {\em Stochastic Analysis and Applications}, pages 1--29.
-
-\bibitem[Gemici et~al., 2016]{gemici2016normalizing}
-Gemici, M.~C., Rezende, D., and Mohamed, S. (2016).
-\newblock Normalizing flows on {R}iemannian manifolds.
-\newblock {\em arXiv preprint arXiv:1611.02304}.
-
-\bibitem[Goto and Sato, 2021]{goto2021approximated}
-Goto, J. and Sato, H. (2021).
-\newblock Approximated logarithmic maps on {R}iemannian manifolds and their
-  applications.
-\newblock {\em JSIAM Letters}, 13:17--20.
-
-\bibitem[Grathwohl et~al., 2019]{grathwohl2019Scalable}
-Grathwohl, W., Chen, R. T.~Q., Bettencourt, J., and Duvenaud, D. (2019).
-\newblock Scalable reversible generative models with free-form continuous
-  dynamics.
-\newblock In {\em International Conference on Learning Representations}.
-
-\bibitem[Gunther, 1991]{gunther1991isometric}
-Gunther, M. (1991).
-\newblock Isometric embeddings of {R}iemannian manifolds, {K}yoto, 1990.
-\newblock In {\em Proc. Intern. Congr. Math.}, pages 1137--1143. Math. Soc.
-  Japan.
-
-\bibitem[Hamelryck et~al., 2006]{hamelryck2006sampling}
-Hamelryck, T., Kent, J.~T., and Krogh, A. (2006).
-\newblock Sampling realistic protein conformations using local structural bias.
-\newblock {\em PLoS Computational Biology}, 2(9):e131.
-
-\bibitem[Haussmann and Pardoux, 1986]{haussmann1986time}
-Haussmann, U.~G. and Pardoux, E. (1986).
-\newblock Time reversal of diffusions.
-\newblock {\em The Annals of Probability}, 14(4):1188--1205.
-
-\bibitem[He, 2013]{he2013lower}
-He, Y. (2013).
-\newblock A lower bound for the first eigenvalue in the laplacian operator on
-  compact riemannian manifolds.
-\newblock {\em Journal of Geometry and Physics}, 71:73--84.
-
-\bibitem[Ho et~al., 2020]{ho2020denoising}
-Ho, J., Jain, A., and Abbeel, P. (2020).
-\newblock Denoising diffusion probabilistic models.
-\newblock {\em Advances in Neural Information Processing Systems}.
-
-\bibitem[Hsu, 2002]{hsu2002stochastic}
-Hsu, E.~P. (2002).
-\newblock {\em Stochastic Analysis on Manifolds}.
-\newblock Number~38. American Mathematical Society.
-
-\bibitem[Huang et~al., 2021]{huang2021variational}
-Huang, C.-W., Lim, J.~H., and Courville, A. (2021).
-\newblock A variational perspective on diffusion-based generative models and
-  score matching.
-\newblock {\em arXiv preprint arXiv:2106.02808}.
-
-\bibitem[Hutchinson, 1989]{hutchinson1989stochastic}
-Hutchinson, M.~F. (1989).
-\newblock A stochastic estimator of the trace of the influence matrix for
-  laplacian smoothing splines.
-\newblock {\em Communications in Statistics-Simulation and Computation},
-  18(3):1059--1076.
-
-\bibitem[Hyv{\"a}rinen and Dayan, 2005]{hyvarinen2005estimation}
-Hyv{\"a}rinen, A. and Dayan, P. (2005).
-\newblock Estimation of non-normalized statistical models by score matching.
-\newblock {\em Journal of Machine Learning Research}, 6(4).
-
-\bibitem[Ikeda and Watanabe, 1989]{ikeda1989sto}
-Ikeda, N. and Watanabe, S. (1989).
-\newblock {\em Stochastic Differential Equations and Diffusion Processes},
-  volume~24 of {\em North-Holland Mathematical Library}.
-\newblock North-Holland Publishing Co., Amsterdam; Kodansha, Ltd., Tokyo,
-  second edition.
-
-\bibitem[Jolicoeur-Martineau et~al., 2021a]{jolicoeur2021gotta}
-Jolicoeur-Martineau, A., Li, K., Pich{\'e}-Taillefer, R., Kachman, T., and
-  Mitliagkas, I. (2021a).
-\newblock Gotta go fast when generating data with score-based models.
-\newblock {\em arXiv preprint arXiv:2105.14080}.
-
-\bibitem[Jolicoeur-Martineau et~al., 2021b]{jolicoeur2020adversarial}
-Jolicoeur-Martineau, A., Pich{\'e}-Taillefer, R., Tachet~des Combes, R., and
-  Mitliagkas, I. (2021b).
-\newblock Adversarial score matching and improved sampling for image
-  generation.
-\newblock {\em International Conference on Learning Representations}.
-
-\bibitem[Jones et~al., 2008]{jones2008Manifold}
-Jones, P.~W., Maggioni, M., and Schul, R. (2008).
-\newblock Manifold {{Parametrizations}} by {{Eigenfunctions}} of the
-  {{Laplacian}} and {{Heat Kernels}}.
-\newblock {\em Proceedings of the National Academy of Sciences of the United
-  States of America}, 105(6):1803--1808.
-
-\bibitem[J{\o}rgensen, 1975]{jorgensen1975central}
-J{\o}rgensen, E. (1975).
-\newblock The central limit problem for geodesic random walks.
-\newblock {\em Zeitschrift f{\"u}r Wahrscheinlichkeitstheorie und verwandte
-  Gebiete}, 32(1-2):1--64.
-
-\bibitem[Kalatzis et~al., 2021]{kalatzis2021multi}
-Kalatzis, D., Ye, J.~Z., Wohlert, J., and Hauberg, S. (2021).
-\newblock Multi-chart flows.
-\newblock {\em arXiv preprint arXiv:2106.03500}.
-
-\bibitem[Karpatne et~al., 2018]{karpatne2018machine}
-Karpatne, A., Ebert-Uphoff, I., Ravela, S., Babaie, H.~A., and Kumar, V.
-  (2018).
-\newblock Machine learning for the geosciences: Challenges and opportunities.
-\newblock {\em IEEE Transactions on Knowledge and Data Engineering},
-  31(8):1544--1554.
-
-\bibitem[Kawar et~al., 2021a]{kawar2021snips}
-Kawar, B., Vaksman, G., and Elad, M. (2021a).
-\newblock Snips: Solving noisy inverse problems stochastically.
-\newblock {\em arXiv preprint arXiv:2105.14951}.
-
-\bibitem[Kawar et~al., 2021b]{kawar2021stochastic}
-Kawar, B., Vaksman, G., and Elad, M. (2021b).
-\newblock Stochastic image denoising by sampling from the posterior
-  distribution.
-\newblock {\em arXiv preprint arXiv:2101.09552}.
-
-\bibitem[Kent, 1982]{kent1982fisher}
-Kent, J.~T. (1982).
-\newblock The {F}isher-{B}ingham distribution on the sphere.
-\newblock {\em Journal of the Royal Statistical Society: Series B
-  (Methodological)}, 44(1):71--80.
-
-\bibitem[Kingma and Ba, 2015]{kingma2015Adam}
-Kingma, D.~P. and Ba, J. (2015).
-\newblock Adam: {{A Method}} for {{Stochastic Optimization}}.
-\newblock {\em arXiv:1412.6980 [cs]}.
-
-\bibitem[Klimovskaia et~al., 2020]{klimovskaia2020poincare}
-Klimovskaia, A., Lopez-Paz, D., Bottou, L., and Nickel, M. (2020).
-\newblock Poincar{\'e} maps for analyzing complex hierarchies in single-cell
-  data.
-\newblock {\em Nature communications}, 11(1):1--9.
-
-\bibitem[Kloeden and Platen, 2011]{kloeden:platen:2011}
-Kloeden, P. and Platen, E. (2011).
-\newblock {\em Numerical Solution of Stochastic Differential Equations}.
-\newblock Stochastic Modelling and Applied Probability. Springer Berlin
-  Heidelberg.
-
-\bibitem[Kobayashi and Nomizu, 1963]{kobayashi1963foundations}
-Kobayashi, S. and Nomizu, K. (1963).
-\newblock {\em Foundations of Differential Geometry}, volume~1.
-\newblock New York, London.
-
-\bibitem[Kol{\'a}r et~al., 2013]{kolar2013natural}
-Kol{\'a}r, I., Michor, P.~W., and Slov{\'a}k, J. (2013).
-\newblock {\em Natural Operations in Differential Geometry}.
-\newblock Springer Science \& Business Media.
-
-\bibitem[Kurtz et~al., 1995]{kurtz1995stratonovich}
-Kurtz, T.~G., Pardoux, {\'E}., and Protter, P. (1995).
-\newblock Stratonovich stochastic differential equations driven by general
-  semimartingales.
-\newblock In {\em Annales de l'IHP Probabilit{\'e}s et statistiques},
-  volume~31, pages 351--377.
-
-\bibitem[Lee, 2010]{lee2010introduction}
-Lee, J. (2010).
-\newblock {\em Introduction to Topological Manifolds}, volume 202.
-\newblock Springer Science \& Business Media.
-
-\bibitem[Lee, 2006]{lee2006riemannian}
-Lee, J.~M. (2006).
-\newblock {\em Riemannian Manifolds: An Introduction to Curvature}, volume 176.
-\newblock Springer Science \& Business Media.
-
-\bibitem[Lee, 2013]{lee2013smooth}
-Lee, J.~M. (2013).
-\newblock Smooth manifolds.
-\newblock In {\em Introduction to Smooth Manifolds}, pages 1--31. Springer.
-
-\bibitem[Lee, 2018]{lee2018introduction}
-Lee, J.~M. (2018).
-\newblock {\em Introduction to Riemannian manifolds}.
-\newblock Springer.
-
-\bibitem[Lee et~al., 2021]{lee2021priorgrad}
-Lee, S.-g., Kim, H., Shin, C., Tan, X., Liu, C., Meng, Q., Qin, T., Chen, W.,
-  Yoon, S., and Liu, T.-Y. (2021).
-\newblock Priorgrad: Improving conditional denoising diffusion models with
-  data-driven adaptive prior.
-\newblock {\em arXiv preprint arXiv:2106.06406}.
-
-\bibitem[Leobacher and Steinicke, 2021]{leobacher2021existence}
-Leobacher, G. and Steinicke, A. (2021).
-\newblock Existence, uniqueness and regularity of the projection onto
-  differentiable manifolds.
-\newblock {\em Annals of Global Analysis and Geometry}, 60(3):559--587.
-
-\bibitem[L{\'e}onard, 2012a]{leonard2012schrodinger}
-L{\'e}onard, C. (2012a).
-\newblock From the {S}chr{\"o}dinger problem to the {M}onge--{K}antorovich
-  problem.
-\newblock {\em Journal of Functional Analysis}, 262(4):1879--1920.
-
-\bibitem[L{\'e}onard, 2012b]{leonard2012girsanov}
-L{\'e}onard, C. (2012b).
-\newblock Girsanov theory under a finite entropy condition.
-\newblock In {\em S{\'e}minaire de Probabilit{\'e}s XLIV}, pages 429--465.
-  Springer.
-
-\bibitem[L{\'e}onard et~al., 2014]{leonard2014reciprocal}
-L{\'e}onard, C., R{\oe}lly, S., Zambrini, J.-C., et~al. (2014).
-\newblock Reciprocal processes: a measure-theoretical point of view.
-\newblock {\em Probability Surveys}, 11:237--269.
-
-\bibitem[Li, 1986]{li1986large}
-Li, P. (1986).
-\newblock Large time behavior of the heat equation on complete manifolds with
-  non-negative ricci curvature.
-\newblock {\em Annals of Mathematics}, 124(1):1--21.
-
-\bibitem[Lou et~al., 2020]{lou2020neural}
-Lou, A., Lim, D., Katsman, I., Huang, L., Jiang, Q., Lim, S.-N., and De~Sa, C.
-  (2020).
-\newblock Neural manifold ordinary differential equations.
-\newblock {\em arXiv preprint arXiv:2006.10254}.
-
-\bibitem[Lui, 2012]{lui2012advances}
-Lui, Y.~M. (2012).
-\newblock Advances in matrix manifolds for computer vision.
-\newblock {\em Image and Vision Computing}, 30(6-7):380--388.
-
-\bibitem[Mardia et~al., 2008]{mardia2008multivariate}
-Mardia, K.~V., Hughes, G., Taylor, C.~C., and Singh, H. (2008).
-\newblock A multivariate von {M}ises distribution with applications to
-  bioinformatics.
-\newblock {\em Canadian Journal of Statistics}, 36(1):99--109.
-
-\bibitem[Mardia et~al., 2007]{mardia2007protein}
-Mardia, K.~V., Taylor, C.~C., and Subramaniam, G.~K. (2007).
-\newblock Protein bioinformatics and mixtures of bivariate von mises
-  distributions for angular data.
-\newblock {\em Biometrics}, 63(2):505--512.
-
-\bibitem[Mathieu et~al., 2019]{mathieu2019continuous}
-Mathieu, E., Lan, C.~L., Maddison, C.~J., Tomioka, R., and Teh, Y.~W. (2019).
-\newblock Continuous hierarchical representations with poincar$\backslash$'e
-  variational auto-encoders.
-\newblock {\em arXiv preprint arXiv:1901.06033}.
-
-\bibitem[Mathieu and Nickel, 2020]{mathieu2020riemannian}
-Mathieu, E. and Nickel, M. (2020).
-\newblock Riemannian continuous normalizing flows.
-\newblock {\em arXiv preprint arXiv:2006.10605}.
-
-\bibitem[Nagano et~al., 2019]{nagano2019wrapped}
-Nagano, Y., Yamaguchi, S., Fujita, Y., and Koyama, M. (2019).
-\newblock A wrapped normal distribution on hyperbolic space for gradient-based
-  learning.
-\newblock In {\em International Conference on Machine Learning}, pages
-  4693--4702. PMLR.
-
-\bibitem[Navarro et~al., 2017]{navarro2017multivariate}
-Navarro, A.~K., Frellsen, J., and Turner, R.~E. (2017).
-\newblock The multivariate generalised von mises distribution: inference and
-  applications.
-\newblock In {\em Thirty-First AAAI Conference on Artificial Intelligence}.
-
-\bibitem[(NGDC/WDS), 2022a]{earthquake_dataset}
-(NGDC/WDS), N. G. D. C. . W. D.~S. (2022a).
-\newblock Ncei/wds global significant earthquake database.
-\newblock
-  https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.ngdc.mgg.hazards:G012153.
-
-\bibitem[(NGDC/WDS), 2022b]{volcanoe_dataset}
-(NGDC/WDS), N. G. D. C. . W. D.~S. (2022b).
-\newblock Ncei/wds global significant volcanic eruptions database.
-\newblock
-  https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.ngdc.mgg.hazards:G10147.
-
-\bibitem[Nichol and Dhariwal, 2021]{nichol2021improved}
-Nichol, A. and Dhariwal, P. (2021).
-\newblock Improved denoising diffusion probabilistic models.
-\newblock {\em arXiv preprint arXiv:2102.09672}.
-
-\bibitem[Nutz and Wiesel, 2022]{nutz2022stability}
-Nutz, M. and Wiesel, J. (2022).
-\newblock Stability of {S}chr\"odinger potentials and convergence of
-  {S}inkhorn's algorithm.
-\newblock {\em arXiv preprint arXiv:2201.10059}.
-
-\bibitem[Papamakarios et~al., 2019]{papamakarios2019normalizing}
-Papamakarios, G., Nalisnick, E., Rezende, D.~J., Mohamed, S., and
-  Lakshminarayanan, B. (2019).
-\newblock Normalizing flows for probabilistic modeling and inference.
-\newblock {\em arXiv preprint arXiv:1912.02762}.
-
-\bibitem[Peel et~al., 2001]{peel2001fitting}
-Peel, D., Whiten, W.~J., and McLachlan, G.~J. (2001).
-\newblock Fitting mixtures of kent distributions to aid in joint set
-  identification.
-\newblock {\em Journal of the American Statistical Association},
-  96(453):56--63.
-
-\bibitem[Peyr{\'e} and Cuturi, 2019]{peyre2019computational}
-Peyr{\'e}, G. and Cuturi, M. (2019).
-\newblock Computational optimal transport.
-\newblock {\em Foundations and Trends{\textregistered} in Machine Learning},
-  11(5-6):355--607.
-
-\bibitem[Revuz and Yor, 1999]{revuz1999continuous}
-Revuz, D. and Yor, M. (1999).
-\newblock {\em Continuous Martingales and {B}rownian Motion}, volume 293 of
-  {\em Grundlehren der Mathematischen Wissenschaften [Fundamental Principles of
-  Mathematical Sciences]}.
-\newblock Springer-Verlag, Berlin, third edition.
-
-\bibitem[Rezende et~al., 2020]{rezende2020Normalizing}
-Rezende, D.~J., Papamakarios, G., Racani{\`e}re, S., Albergo, M.~S., Kanwar,
-  G., Shanahan, P.~E., and Cranmer, K. (2020).
-\newblock Normalizing flows on tori and spheres.
-\newblock {\em arXiv:2002.02428}.
-
-\bibitem[Rezende and Racani{\`e}re, 2021]{rezende2021Implicit}
-Rezende, D.~J. and Racani{\`e}re, S. (2021).
-\newblock Implicit {R}iemannian concave potential maps.
-\newblock {\em arXiv:2110.01288}.
-
-\bibitem[Roy et~al., 2007]{roy2007learning}
-Roy, D.~M., Kemp, C., Mansinghka, V., and B~Tenenbaum, J. (2007).
-\newblock Learning annotated hierarchies from relational data.
-
-\bibitem[Rozen et~al., 2021]{rozen2021moser}
-Rozen, N., Grover, A., Nickel, M., and Lipman, Y. (2021).
-\newblock Moser flow: Divergence-based generative modeling on manifolds.
-\newblock {\em Advances in Neural Information Processing Systems}, 34.
-
-\bibitem[Saloff-Coste, 1994]{saloff1994precise}
-Saloff-Coste, L. (1994).
-\newblock Precise estimates on the rate at which certain diffusions tend to
-  equilibrium.
-\newblock {\em Mathematische Zeitschrift}, 217(1):641--677.
-
-\bibitem[Sato et~al., 2019]{sato2019riemannian}
-Sato, H., Kasai, H., and Mishra, B. (2019).
-\newblock Riemannian stochastic variance reduced gradient algorithm with
-  retraction and vector transport.
-\newblock {\em SIAM Journal on Optimization}, 29(2):1444--1472.
-
-\bibitem[Schiela and Ortiz, 2020]{schiela2020sqp}
-Schiela, A. and Ortiz, J. (2020).
-\newblock An {SQP} method for equality constrained optimization on manifolds.
-\newblock {\em arXiv preprint arXiv:2005.06844}.
-
-\bibitem[Schr{\"o}dinger, 1932]{schrodinger1932theorie}
-Schr{\"o}dinger, E. (1932).
-\newblock Sur la th{\'e}orie relativiste de l'{\'e}lectron et
-  l'interpr{\'e}tation de la m{\'e}canique quantique.
-\newblock {\em Annales de l'Institut Henri Poincar{\'e}}, 2(4):269--310.
-
-\bibitem[Sei, 2013]{sei2013jacobian}
-Sei, T. (2013).
-\newblock A {J}acobian inequality for gradient maps on the sphere and its
-  application to directional statistics.
-\newblock {\em Communications in Statistics-Theory and Methods},
-  42(14):2525--2542.
-
-\bibitem[Senanayake and Ramos, 2018]{senanayake2018directional}
-Senanayake, R. and Ramos, F. (2018).
-\newblock Directional grid maps: modeling multimodal angular uncertainty in
-  dynamic environments.
-\newblock In {\em 2018 IEEE/RSJ International Conference on Intelligent Robots
-  and Systems (IROS)}, pages 3241--3248. IEEE.
-
-\bibitem[Shapovalov and Dunbrack~Jr, 2011]{shapovalov2011smoothed}
-Shapovalov, M.~V. and Dunbrack~Jr, R.~L. (2011).
-\newblock A smoothed backbone-dependent rotamer library for proteins derived
-  from adaptive kernel density estimates and regressions.
-\newblock {\em Structure}, 19(6):844--858.
-
-\bibitem[Sinha et~al., 2021]{sinha2021d2c}
-Sinha, A., Song, J., Meng, C., and Ermon, S. (2021).
-\newblock D2c: Diffusion-denoising models for few-shot conditional generation.
-\newblock {\em arXiv preprint arXiv:2106.06819}.
-
-\bibitem[Sinkhorn, 1967]{sinkhorn1967diagonal}
-Sinkhorn, R. (1967).
-\newblock Diagonal equivalence to matrices with prescribed row and column sums.
-\newblock {\em The American Mathematical Monthly}, 74(4):402--405.
-
-\bibitem[Song et~al., 2020]{song2020denoising}
-Song, J., Meng, C., and Ermon, S. (2020).
-\newblock Denoising diffusion implicit models.
-\newblock {\em arXiv preprint arXiv:2010.02502}.
-
-\bibitem[Song and Ermon, 2019]{song2019generative}
-Song, Y. and Ermon, S. (2019).
-\newblock Generative modeling by estimating gradients of the data distribution.
-\newblock In {\em Advances in Neural Information Processing Systems}.
-
-\bibitem[Song and Ermon, 2020]{song2020improved}
-Song, Y. and Ermon, S. (2020).
-\newblock Improved techniques for training score-based generative models.
-\newblock In {\em Advances in Neural Information Processing Systems}.
-
-\bibitem[Song et~al., 2021]{song2020score}
-Song, Y., Sohl{-}Dickstein, J., Kingma, D.~P., Kumar, A., Ermon, S., and Poole,
-  B. (2021).
-\newblock Score-based generative modeling through stochastic differential
-  equations.
-\newblock In {\em International Conference on Learning Representations}.
-
-\bibitem[Steyvers and Tenenbaum, 2005]{steyvers2005large}
-Steyvers, M. and Tenenbaum, J.~B. (2005).
-\newblock The large-scale structure of semantic networks: Statistical analyses
-  and a model of semantic growth.
-\newblock {\em Cognitive science}, 29(1):41--78.
-
-\bibitem[Ungar, 2005]{ungar2005Einstein}
-Ungar, A. (2005).
-\newblock Einstein's special relativity: {{Unleashing}} the power of its
-  hyperbolic geometry.
-\newblock {\em Computers \& Mathematics with Applications}, 49(2):187--221.
-
-\bibitem[Urakawa, 2006]{urakawa2006convergence}
-Urakawa, H. (2006).
-\newblock Convergence rates to equilibrium of the heat kernels on compact
-  {R}iemannian manifolds.
-\newblock {\em Indiana University mathematics journal}, pages 259--288.
-
-\bibitem[Vargas et~al., 2021]{vargas2021solving}
-Vargas, F., Thodoroff, P., Lawrence, N.~D., and Lamacraft, A. (2021).
-\newblock Solving {S}chr{\"o}dinger bridges via maximum likelihood.
-\newblock {\em arXiv preprint arXiv:2106.02081}.
-
-\bibitem[Vincent, 2011]{vincent2011connection}
-Vincent, P. (2011).
-\newblock A connection between score matching and denoising autoencoders.
-\newblock {\em Neural Computation}, 23(7):1661--1674.
-
-\bibitem[Watson et~al., 2021]{watson2021learning}
-Watson, D., Ho, J., Norouzi, M., and Chan, W. (2021).
-\newblock Learning to efficiently sample from diffusion probabilistic models.
-\newblock {\em arXiv preprint arXiv:2106.03802}.
-
-\bibitem[Zhu and Sato, 2020]{zhu2020riemannian}
-Zhu, X. and Sato, H. (2020).
-\newblock Riemannian conjugate gradient methods with inverse retraction.
-\newblock {\em Computational Optimization and Applications}, 77(3):779--810.
-
-\end{thebibliography}
diff --git a/doc/main.blg b/doc/main.blg
deleted file mode 100644
index 0316bed..0000000
--- a/doc/main.blg
+++ /dev/null
@@ -1,51 +0,0 @@
-This is BibTeX, Version 0.99d (TeX Live 2019/Debian)
-Capacity: max_strings=200000, hash_size=200000, hash_prime=170003
-The top-level auxiliary file: main.aux
-The style file: apalike.bst
-Database file #1: bibliography.bib
-Warning--there's a number but no series in hsu2002stochastic
-Warning--can't use both volume and number fields in kobayashi1963foundations
-Warning--can't use both volume and number fields in kurtz1995stratonovich
-Warning--empty journal in roy2007learning
-You've used 107 entries,
-            1935 wiz_defined-function locations,
-            1304 strings with 26159 characters,
-and the built_in function-call counts, 40733 in all, are:
-= -- 3981
-> -- 1696
-< -- 79
-+ -- 597
-- -- 562
-* -- 3316
-:= -- 7161
-add.period$ -- 330
-call.type$ -- 107
-change.case$ -- 806
-chr.to.int$ -- 102
-cite$ -- 111
-duplicate$ -- 1613
-empty$ -- 2784
-format.name$ -- 727
-if$ -- 7924
-int.to.chr$ -- 6
-int.to.str$ -- 0
-missing$ -- 114
-newline$ -- 537
-num.names$ -- 321
-pop$ -- 754
-preamble$ -- 1
-purify$ -- 815
-quote$ -- 0
-skip$ -- 1240
-stack$ -- 0
-substring$ -- 2515
-swap$ -- 224
-text.length$ -- 20
-text.prefix$ -- 0
-top$ -- 0
-type$ -- 620
-warning$ -- 4
-while$ -- 315
-width$ -- 0
-write$ -- 1351
-(There were 4 warnings)
diff --git a/doc/main.out b/doc/main.out
deleted file mode 100644
index fd04cd6..0000000
--- a/doc/main.out
+++ /dev/null
@@ -1,19 +0,0 @@
-\BOOKMARK [1][-]{section.1}{Introduction}{}% 1
-\BOOKMARK [1][-]{section.2}{Notation}{}% 2
-\BOOKMARK [1][-]{section.3}{Euclidean Score-based Generative Modeling}{}% 3
-\BOOKMARK [1][-]{section.4}{Riemannian Score-based Generative Modeling}{}% 4
-\BOOKMARK [2][-]{subsection.4.1}{Brownian motion on compact Riemannian manifolds}{section.4}% 5
-\BOOKMARK [2][-]{subsection.4.2}{A manifold time-reversal formula}{section.4}% 6
-\BOOKMARK [2][-]{subsection.4.3}{Score approximation on Riemannian manifolds}{section.4}% 7
-\BOOKMARK [2][-]{subsection.4.4}{Likelihood computation}{section.4}% 8
-\BOOKMARK [1][-]{section.5}{Related work}{}% 9
-\BOOKMARK [1][-]{section.6}{Experiments}{}% 10
-\BOOKMARK [1][-]{section.7}{Discussion and limitations}{}% 11
-\BOOKMARK [1][-]{appendix.A}{Organization of the supplementary}{}% 12
-\BOOKMARK [1][-]{appendix.B}{Preliminaries on stochastic Riemannian geometry}{}% 13
-\BOOKMARK [1][-]{appendix.C}{Difference between ODE and SDE likelihood computations}{}% 14
-\BOOKMARK [1][-]{appendix.D}{Eigenfunctions, eigenvalues of the Laplace-Beltrami operator}{}% 15
-\BOOKMARK [1][-]{appendix.E}{Time-reversal formula: extension to compact Riemannian manifolds}{}% 16
-\BOOKMARK [1][-]{appendix.F}{Schr\366dinger Bridges on Manifolds}{}% 17
-\BOOKMARK [1][-]{appendix.G}{Proof of prop:implicitder}{}% 18
-\BOOKMARK [1][-]{appendix.H}{Experimental detail}{}% 19
diff --git a/doc/main.pdf b/doc/main.pdf
deleted file mode 100644
index 9f2575d..0000000
Binary files a/doc/main.pdf and /dev/null differ
diff --git a/doc/main.synctex.gz b/doc/main.synctex.gz
deleted file mode 100644
index 0d5d8d2..0000000
Binary files a/doc/main.synctex.gz and /dev/null differ
diff --git a/doc/main.tex b/doc/main.tex
deleted file mode 100644
index 7c1b793..0000000
--- a/doc/main.tex
+++ /dev/null
@@ -1,160 +0,0 @@
-\documentclass[11pt,a4paper]{article}
-\usepackage{tmlr}
-
-\usepackage[utf8]{inputenc} % allow utf-8 input
-\usepackage[T1]{fontenc}    % use 8-bit T1 fonts
-\usepackage{hyperref}       % hyperlinks
-\usepackage{url}            % simple URL typesetting
-\usepackage{booktabs}       % professional-quality tables
-\usepackage{amsfonts}       % blackboard math symbols
-\usepackage{nicefrac}       % compact symbols for 1/2, etc.
-\usepackage{microtype}      % microtypography
-\usepackage{xcolor}         % colors
-\usepackage{tikz}
-% \usepackage{caption}
-\usepackage{float}
-\usetikzlibrary{arrows.meta}
-\usetikzlibrary{calc}
-\input{preamble/header}
-\input{preamble/def}
-\usepackage{comment}
-\usepackage{authblk}
-\usepackage{cancel}
-
-% \usepackage[commands]{MJH}
-%  \usepackage{bibspacing}
-\setlength{\bibsep}{2pt}
-\makeatletter
-\renewcommand\AB@affilsepx{, \protect\Affilfont}
-\makeatother
-%  \usepackage{showlabels}
-\providecommand{\keywords}[1]
-{
-  \small	
-  \textbf{\textit{Keywords---}} #1
-}
-% \hypersetup{colorlinks,citecolor=blue!50!black}
-\newcommand{\appendixhead}{
-  \centerline{\textbf{\LARGE Supplementary to: }\vspace{0.15in}}
-  \centerline{\textbf{\LARGE Riemannian Score-Based Generative Modeling}\vspace{0.25in}}
-  }
-\usepackage{xcolor}
-\colorlet{linkcolor}{blue!70!black}
-\hypersetup{
-  colorlinks,
-  linkcolor={red!50!black},
-  citecolor={blue!50!black},
-  urlcolor={blue!80!black}
-}
-% \hypersetup{
-%     colorlinks=true,       % false: boxed links; true: colored links
-%     linkcolor=linkcolor,          % color of internal links (change box color with linkbordercolor)
-%     citecolor=linkcolor,        % color of links to bibliography
-%     filecolor=linkcolor,      % color of file links
-%     urlcolor=linkcolor           % color of external links
-% }
-\usepackage[font={small}]{caption, subcaption}
-\graphicspath{{images/}}
-
-\title{Riemannian Score-Based Generative Modeling}
-
-% The \author macro works with any number of authors. There are two commands
-% used to separate the names and addresses of multiple authors: \And and \AND.
-%
-% Using \And between authors leaves it to LaTeX to determine where to break the
-% lines. Using \AND forces a line break at that point. So, if LaTeX puts 3 of 4
-% authors names on the first line, and the last on the second line, try using
-% \AND instead of \And before the third author name.
-
-% \author{George, Valentin and Arnaud}
-
-\author{Valentin De Bortoli, Arnaud Doucet, Michael Hutchinson, \'Emile Mathieu, Yee Whye Teh, James Thornton}
-\affil{Oxford University}
-% \affil[1]{deligian@stats.ox.ac.uk}
-% \affil[2]{valentin.debortoli@gmail.com}
-% \affil[3]{doucet@stats.ox.ac.uk}
-
-
-\begin{document}
-
-\maketitle
-
-\begin{abstract}
-  \small
- Score-based generative models (SGMs) are a novel class of generative models demonstrating remarkable empirical performance. One uses a diffusion to add progressively Gaussian noise to the data, while the generative model is a ``denoising'' process obtained by approximating the time-reversal of this ``noising'' diffusion. However, current SGMs make the underlying assumption that the data is supported on a Euclidean manifold with flat geometry. This prevents the use of these models for applications in
-  robotics, geoscience or protein modeling which rely on distributions defined
-  on Riemannian manifolds. To overcome this issue, we introduce \emph{Riemannian
-    Score-based Generative Models} (RSGMs) which extend current SGMs to the
-   setting of compact Riemannian manifolds. %RGSMs rely on the extension of results on   time-reversal of diffusions to non-Euclidean geometry. 
-We also show how RSGMs can be accelerated by solving a Schr\"odinger bridge problem on manifolds. We illustrate our
-%   approach with synthetic examples on the sphere.
-approach with earth and climate science data.
-\end{abstract}
-\keywords{Diffusion processes, Generative modeling, Riemannian manifold, Score-based generative models, Schr\"odinger bridge}
-
-%\tableofcontents
-
-\input{intro}
-
-\input{prel_main}
-
-\input{score_approx}
-
-\input{rel_work}
-
-\input{experiments}
-
-\input{conclusion}
-
-\bibliographystyle{apalike}
-\bibliography{bibliography}
-
-\newpage
-\appendixhead
-\appendix
-
-\theoremstyle{plain}
-\newtheorem{unlemma}{Lemma S}
-\newtheorem{unproposition}{Proposition S}
-\newtheorem{uncorollary}{Corollary S}
-\newtheorem{untheorem}{Theorem S}
-
-\setcounter{equation}{0}
-\setcounter{figure}{0}
-\setcounter{table}{0}
-\setcounter{page}{1}
-\makeatletter
-\renewcommand{\theequation}{S\arabic{equation}}
-\renewcommand{\thefigure}{S\arabic{figure}}
-\renewcommand{\thetheorem}{S\arabic{theorem}}
-\renewcommand{\thedefinition}{S\arabic{definition}}
-\renewcommand{\thelemma}{S\arabic{lemma}}
-\renewcommand{\thesection}{S\arabic{section}}
-\renewcommand{\theremark}{S\arabic{remark}}
-\renewcommand{\theproposition}{S\arabic{proposition}}
-\renewcommand{\thecorollary}{S\arabic{corollary}}
-\setcounter{tocdepth}{1}
-
-
-\input{intro_app}
-
-\input{prel}
-
-\input{diff_nf_sde}
-
-\input{eigen}
-
-\input{time_reversal}
-
-\input{schrodinger}
-
-\input{implicit_loss}
-
-\input{experimental_detail}
-
-\end{document}
-
-%%% Local Variables:
-%%% mode: latex
-%%% TeX-master: t
-%%% End:
diff --git a/doc/preamble/def.tex b/doc/preamble/def.tex
deleted file mode 100644
index 3658075..0000000
--- a/doc/preamble/def.tex
+++ /dev/null
@@ -1,1158 +0,0 @@
-\def\rmB{\mathrm{B}}
-\def\ellim{\ell^{\mathrm{im}}}
-\def\piinv{p_{\textup{ref}}}
-\def\piinvb{\bar{\pi}_{\mathrm{inv}}}
-% \def\pizero{\pi_0}
-\def\piinv{p_{\textup{ref}}}
-\def\pizero{p_0}
-
-\newcommand{\mjh}[1]{\textcolor{blue}{#1}}
-
-
-\newcommand{\mY}{\bm{Y}}
-\newcommand{\mX}{\bm{X}}
-\newcommand{\mW}{\bm{W}}
-\newcommand{\mZ}{\bm{Z}}
-\newcommand{\mz}{\bm{z}}
-\newcommand{\mB}{\bm{B}}
-\newcommand{\vf}[1]{\bm{v}(\#1)}
-
-\newcommand{\grad}{\mathrm{grad}}
-\newcommand{\dive}{\mathrm{div}}
-
-\newcommand{\prodM}[2]{\langle #1, #2 \rangle_\M}
-\newcommand{\XM}{\mathcal{X}(\mathcal{M})}
-\newcommand{\XMdeux}{\mathcal{X}^2(\mathcal{M})}
-\newcommand{\Xgamma}{\mathcal{X}(\gamma)}
-\newcommand{\TM}{\mathrm{T}\mathcal{M}}
-\newcommand{\FM}{\mathrm{F}\mathcal{M}}
-\newcommand{\OM}{\mathrm{O}\mathcal{M}}
-\newcommand{\TMstar}{\mathrm{T}^\star\mathcal{M}}
-\newcommand{\detLigne}[1]{\det(#1)}
-\def\hlf{\hat{\ell}^f}
-\def\hlb{\hat{\ell}^b}
-\def\Ent{\mathrm{H}}
-\def\lyap{V_{p,t,x_t}}
-\def\lyapp{V_{p}}
-\def\carrechamp{\Upsilon}
-\def\carrechampb{\bar{\Upsilon}}
-
-\def\contspace{\mathcal{C}}
-\def\pdata{p_{\textup{data}}}
-\def\qdata{q_{\textup{data}}}
-\def\pprior{p_{\textup{prior}}}
-
-\def\for{\mathrm{f}}
-\def\back{\mathrm{b}}
-\def\lf{\ell^{\mathrm{f}}}
-\def\lb{\ell^{\mathrm{b}}}
-\def\sf{s^{\mathrm{f}}}
-\def\sb{s^{\mathrm{b}}}
-
-\def\Tcal{\mathcal{T}}
-\def\bfpi{\bm{\pi}}
-\def\bfnu{\bm{\nu}}
-
-% \def\Pens{\mathscr{P}}
-\def\Pens{\mathcal{P}}
-\def\Mens{\mathscr{M}}
-\def\pif{\overrightarrow{\pi}}
-\def\lambdabff{\overrightarrow{\bm{\lambda}}}
-\def\lambdabfb{\overleftarrow{\bm{\lambda}}}
-\newcommand{\mail}[1]{\footnote{Email: \href{mailto:#1}{\textcolor{black}{#1}}}}
-\def\Phif{\overrightarrow{\Phi}}
-\def\Phib{\overleftarrow{\Phi}}
-\def\scoref{\overrightarrow{\mathrm{S}}}
-\def\scoreb{\overleftarrow{\mathrm{S}}}
-\def\netf{\overrightarrow{\mathrm{NN}}}
-\def\netb{\overleftarrow{\mathrm{NN}}}
-\newcommand{\schro}{Schr\"{o}dinger\xspace}
-\newcommand{\Cweakapp}{\ttd}
-\def\ttfp{\Cweakapp_{p}}
-\def\ttfpun{\Cweakapp_{p,1}}
-\def\ttfpdeux{\Cweakapp_{p,2}}
-\def\ttfptrois{\Cweakapp_{p,3}}
-\def\ttfpquatre{\Cweakapp_{p,4}}
-\def\ttamin{\mathtt{a}}
-\def\ttfun{\Cweakapp_4}
-\def\ttfdeux{\Cweakapp_5}
-\def\btta{\bar{\mathtt{A}}}
-\def\bfb{\mathbf{b}}
-\def\bfsigma{\pmb{\sigma}}
-\def\KuLo{Kurdyka-\L ojasiewicz}
-\newcommand{\tta}{\mathtt{A}}
-\newcommand{\ttb}{\mathtt{B}}
-\newcommand{\ttc}{\mathtt{C}}
-\newcommand{\ttd}{\mathtt{D}}
-\def\tte{\mathtt{E}}
-\newcommand{\ttM}{\mathtt{M}}
-\def\boundLSig{\Lip\eta}
-
-\newcommand{\Capprox}{\tta}
-\newcommand{\Ctech}{\ttc}
-\newcommand{\Cstrong}{\ttb}
-\newcommand{\Cconv}{\ttc}
-\newcommand{\Cweak}{C}
-
-\def\conj{\varkappa}
-\def\mtta{\mathtt{a}}
-\def\explog{\vareps}
-\newcommand{\note}[1]{\textcolor{red}{#1}}
-\def\Cbeta{\Cweak_{\beta, \explog}}
-\def\Aar{\Capprox_{\alpha, r}}
-\def\xo{x_0}
-\def\Db{\Ctech}
-\def\intk{\int_{k\gua}^{(k+1)\gua}}
-\newcommandx\ctun[1][1=T]{\Capprox_{#1,1}}
-\def\btun{\mathtt{B}_1}
-\def\btdeux{\mathtt{B}_2}
-\def\dtun{\mathtt{D}_1}
-\def\cttun{\tilde{\Capprox}_{T,1}}
-\def\dtdeux{\mathtt{D}_2}
-\def\ctdeux{\Capprox_{T,2}}
-\def\cttrois{\Capprox_{T,3}}
-\def\ctquatre{\Capprox_{T,4}}
-\def\ctcinq{\Capprox_{T,5}}
-\def\ctsix{\Capprox_{T,6}}
-\def\ctsept{\Capprox_{T,7}}
-\def\cthuit{\Capprox_{T,8}}
-\def\ctneuf{\Capprox_{T,9}}
-\def\gfun{\mathbb{G}}
-\def\hash{\sharp}
-\def\Cconvcontun{\Cconv_{1,\alpha}^{(c)}}
-\def\Cconvcontdeux{\Cconv_{2,\alpha}^{(c)}}
-\def\Cconvconttrois{\Cconv_{3,\alpha}^{(c)}}
-\def\Cconvdiscun{\Cconv_{1,\alpha}^{(d)}}
-\def\Cconvdiscdeux{\Cconv_{2,\alpha}^{(d)}}
-\def\Cconvdisctrois{\Cconv_{3,\alpha}^{(d)}}
-\def\Cconvcont{\Phibf_{\alpha}^{(c)}}
-\def\Cconvdisc{\Phibf_{\alpha}^{(d)}}
-\def\Csham{\Cconv_1}
-\def\Cshamd{\Cconv_2}
-\def\Cshama{\Cconv_{\alpha}}
-\def\Cshamamoins{\Cshama^-}
-\def\Cshamaplus{\Cshama^+}
-\def\Ccont{\Cconv^{(c)}}
-\def\Cdisc{\Cconv^{(d)}}
-\def\Cconvk{{\Cconv^{(a)}_k}}
-%\def\Cconvdun{\Cconv^{(b)}_1}
-%\def\Cconvddeux{\Cconv^{(b)}_2}
-\def\Cconvdtrois{\Cconv^{(b)}}
-\def\Cconvdun{(\gamma\eta/2)}
-\def\Cconvddeux{(\gamma/2)}
-\def\Cshamdisc{\Cconv_{0}}
-\def\Cshamt{\tilde{\Cconv}_{\alpha}}
-\def\Psial{\Psibf_{\alpha}}
-\def\Cstrongcont{\Cstrong_1}
-\def\Cstrongcontf{\Cstrong_2}
-\def\Cstrongdisc{\Cstrong_3}
-\def\Cstrongdiscf{\Cstrong_4}
-\def\Cstrongloj{\Cstrong_5}
-\def\Cstronglojdisc{\Cstrong_6}
-\def\Cstrongtilde{\tilde{\Cstrong}}
-\def\maxnorm{C}
-\newcommand{\pinv}{^{-1}}
-\newcommand{\st}{^{\star}}
-\newcommand{\gb}{\gamma^{\beta}}
-\newcommand{\tr}{^{\top}}
-\def\scrE{\mathscr{E}}
-\def\scrV{\mathscr{V}}
-\def\scrF{\mathscr{F}}
-\newcommand{\rref}[1]{\tup{\Cref{#1}}}
-\newcommand{\la}{\langle}
-\newcommand{\ra}{\rangle}
-\newcommand{\LL}{\L ojasciewicz~}
-\newcommand{\gua}{\gamma_{\alpha}}
-\newcommand{\bgua}{\bgamma_{\alpha}}
-\newcommand{\gda}{\gua^{1/2}}
-\newcommand{\tgua}{(t+\gua)^{\alpha}}
-\newcommand{\guac}{c}
-\newcommand{\et}{\quad\mbox{and}\quad}
-%\newcommand{\sigb}{\ttM_{\Sigma}}
-\newcommand{\sigb}{\eta}
-\newcommand{\phe}{\varphi_{\varepsilon}}
-\newcommand{\feps}{f_{\varepsilon}}
-\newcommand{\nfeps}{\nabla f_{\varepsilon}}
-\newcommand{\intd}{\int_{\bR^{\dim}}}
-\newcommandx{\expec}[2]{{\mathbb E}\left[#1 \middle \vert #2  \right]} %%%% esperance conditionnelle
-\newcommand{\expek}[1]{\expec{#1}{\cF_k}}
-\newcommand{\expen}[1]{\expec{#1}{\cF_n}}
-\newcommand{\nn}{_{n+1}}
-\newcommand{\kk}{_{k+1}}
-\newcommand{\pal}{^{\alpha}}
-\newcommand{\pmal}{^{-\alpha}}
-\newcommand{\cH}{\mathcal{H}}
-
-\def\En{\tilde{E}_n}
-\def\varepsn{\tilde{\vareps}_n}
-\def\pow{p}
-\def\ntt{\mathtt{n}_0}
-\def\tlambda{\tilde{\lambda}}
-\def\dim{d}
-\newcommand{\tb}{\tilde{b}}
-\newcommand{\Time}{T}
-\newcommand{\mttun}{\mathtt{k}_1}
-\newcommand{\mttdeux}{\mathtt{k}_2}
-\newcommand{\mtttrois}{\mtt_3^+}
-\newcommand{\bvareps}{\bar{\vareps}}
-\newcommand{\transference}{\mathbf{T}}
-\newcommand{\esssup}{\mathrm{ess sup}}
-\newcommand{\ring}{\mathcal{C}_{\varrho}}
-\newcommand{\measx}{\mathcal{X}}
-\newcommand{\bkappa}{\bar{\kappa}}
-\newcommand{\probaspace}[1]{\mathbb{P}\left( #1 \right)}
-\newcommand{\dTVdeux}{d_{\mathrm{TV}, 2}}
-\newcommand{\dTVDeux}[1]{d_{\mathrm{TV}, 2}\left( #1 \right)}
-\newcommand{\bgM}{b_{\gamma, n}}
-\newcommand{\bbgM}{\bar{b}_{\gamma, M}}
-\newcommand{\rme}{\mathrm{e}}
-\newcommand{\rmF}{\mathrm{F}}
-\newcommand{\rmE}{\mathrm{E}}
-\newcommand{\Fdr}{\mathrm{f}}
-\newcommand{\Gdr}{\mathrm{g}}
-\newcommand{\alphastar}{\alpha_{\star}}
-\newcommand{\LipVset}{\mathrm{Lip}_{V, \alpha}}
-\newcommand{\Lip}{\mathtt{L}}
-\newcommand{\Lipset}{\mathrm{Lip}}
-\newcommand{\Mtt}{\mathtt{M}}
-\newcommand{\Ktt}{\mathtt{K}}
-\newcommand{\tLip}{\tilde{\mathtt{L}}}
-\newcommand{\tell}{\tilde{\ell}}
-\newcommand{\Lipb}{\mtt_b}
-\newcommand{\step}{\ceil{1/\gamma}}
-\newcommand{\bstep}{\ceil{1/\bgamma}}
-\def\bdisc{b}
-\def\bfDd{\mathbf{D}_{\mathrm{d}}}
-\def\bfDc{\mathbf{D}_{\mathrm{c}}}
-\newcommand{\SDE}{\mathrm{SDE}}
-
-\newcommand{\bbeta}{\bar{\beta}}
-\newcommand{\measfun}{\mathbb{F}}
-\newcommand{\btheta}{\boldsymbol{\theta}}
-\newcommand{\bdeta}{\boldsymbol{\eta}}
-\newcommand{\bvarphi}{\boldsymbol{\varphi}}
-
-%\newcommand{\tau}{\boldsymbol{\tau}}
-%\newcommand{\x}{\boldsymbol{x}}
-%\newcommand{\X}{\boldsymbol{X}}
-%\newcommand{\y}{\boldsymbol{y}}
-%%\newcommand{\u}{\boldsymbol{u}}
-%\newcommand{\w}{\boldsymbol{w}}
-%\newcommand{\z}{\boldsymbol{z}}
-%\newcommand{\p}{\boldsymbol{p}}
-%\newcommand{\s}{\mathcal{S}}
-%\newcommand{\ind}{\boldsymbol{1}}
-%\newcommand{\dx}{\boldsymbol{\delta}\boldsymbol{x}}
-%\newcommand{\argmax}{\operatornamewithlimits{argmax}}
-%\newcommand{\argmin}{\operatornamewithlimits{argmin}}
-%\newcommand{\prox}{\operatorname{prox}}
-\def\x{{ \boldsymbol x}}
-\def\u{{ \boldsymbol u}}
-\def\y{{\boldsymbol y}}
-\def\z{{\boldsymbol z}}
-\def\w{{\boldsymbol w}}
-
-\def\xt{ \boldsymbol x^t}
-\newcommandx{\norm}[2][1=]{\ifthenelse{\equal{#1}{}}{\left\Vert #2 \right\Vert}{\left\Vert #2 \right\Vert^{#1}}}
-\newcommandx{\normLigne}[2][1=]{\ifthenelse{\equal{#1}{}}{\Vert #2 \Vert}{\Vert #2\Vert^{#1}}}
-
-
-\newcommand\mycomment[1]{\textcolor{red}{#1}}
-
-%\theoremstyle{definition}
-%\newtheorem{defn}{Definition}[section]
-%\newtheorem{assump}{A}[paragraph]
-%\newtheorem{prop}{Proposition}[section]
-%\newtheorem{theo}{Theorem}[section]
-%\newtheorem{coro}{Corollary}[section]
-%\newtheorem{lemma}{Lemma}[section]
-%\newtheorem{exmp}{Example}[section]
-
-\def\xstart{x^{\star}_{\theta}}
-
-%%%%%%%%%%%%%%%
-%% mathbf
-
-\def\bfn{\mathbf{n}}
-\def\bfw{\mathbf{w}}
-\def\bfc{\mathbf{c}}
-\def\bfY{\mathbf{Y}}
-\def\bfhY{\hat{\mathbf{Y}}}
-\def\bbfY{\bar{\mathbf{Y}}}
-\def\bfX{\mathbf{X}}
-\def\bfhX{\hat{\mathbf{X}}}
-\def\bfW{\mathbf{W}}
-\def\bfU{\mathbf{U}}
-\def\bfE{\mathbf{E}}
-\def\bfs{\mathbf{s}}
-\def\bfZ{\mathbf{Z}}
-\def\bfXt{\tilde{\mathbf{X}}}
-\def\bfXd{\overline{\mathbf{X}}}
-\def\bfYd{\overline{\mathbf{Y}}}
-\def\bfZ{\mathbf{Z}}
-\def\bbfX{\tilde{\mathbf{X}}}
-\def\bfM{\mathbf{M}}
-\def\bfB{\mathbf{B}}
-\def\bfP{\mathbf{P}}
-%%% mathsf
-\def\msi{\mathsf{I}}
-\def\msa{\mathsf{A}}
-\def\msd{\mathsf{D}}
-\def\msk{\mathsf{K}}
-\def\mss{\mathsf{S}}
-\def\msn{\mathsf{N}}
-\def\msat{\tilde{\mathsf{A}}}
-\def\msb{\mathsf{B}}
-\def\msc{\mathsf{C}}
-\def\tmsc{\tilde{\msc}}
-\def\mse{\mathsf{E}}
-\def\msf{\mathsf{F}}
-\def\tmsf{\tilde{\msf}}
-\def\mso{\mathsf{o}}
-\def\msg{\mathsf{G}}
-\def\msh{\mathsf{H}}
-\def\msm{\mathsf{M}}
-\def\msu{\mathsf{U}}
-\def\msv{\mathsf{V}}
-\def\msr{\mathsf{R}}
-\newcommand{\msff}[2]{\mathsf{F}_{#1}^{#2}}
-\def\msp{\mathsf{P}}
-\def\msq{\mathsf{Q}}
-\def\msx{\mathsf{X}}
-\def\msz{\mathsf{Z}}
-\def\msy{\mathsf{Y}}
-\def\ddx{d_\msx}
-\def\ddy{d_\msy}
-
-%% mathcal
-\def\mca{\mathcal{A}}
-\def\mct{\mathcal{T}}
-\def\mcat{\tilde{\mathcal{A}}}
-\def\mcab{\bar{\mathcal{A}}}
-\def\mcbb{\mathcal{B}}  %%% \mcb est déjà pris
-\newcommand{\mcb}[1]{\mathcal{B}(#1)}
-\def\mcc{\mathcal{C}}
-\def\mcz{\mathcal{Z}}
-\def\mcy{\mathcal{Y}}
-\def\mcx{\mathcal{X}}
-\def\mce{\mathcal{E}}
-\def\mcs{\mathcal{S}}
-\def\mcf{\mathcal{F}}
-\def\mcg{\mathcal{G}}
-\def\mch{\mathcal{H}}
-\def\mcm{\mathcal{M}}
-\def\mcu{\mathcal{U}}
-\def\mcv{\mathcal{V}}
-\def\mcr{\mathcal{R}}
-\newcommand{\mcff}[2]{\mathcal{F}_{#1}^{#2}}
-\def\mcfb{\bar{\mathcal{F}}}
-\def\bmcf{\bar{\mathcal{F}}}
-\def\mcft{\tilde{\mathcal{F}}}
-\def\tmcf{\tilde{\mathcal{F}}}
-\def\mcp{\mathcal{P}}
-\def\mcq{\mathcal{Q}}
-
-%% mathbb
-
-\def\Qbb{\mathbb{Q}}
-\def\Rbb{\mathbb{R}}
-\def\Mbb{\mathbb{M}}
-\def\Pbb{\mathbb{P}}
-\def\Hbb{\mathbb{H}}
-\newcommand{\Qit}[1]{\Qbb^{(#1)}}
-\newcommand{\Pit}[1]{\Pbb^{(#1)}}
-
-\def\rset{\mathbb{R}}
-\def\rsets{\mathbb{R}^*}
-\def\cset{\mathbb{C}}
-\def\zset{\mathbb{Z}}
-\def\tset{\mathbb{T}}
-\def\nset{\mathbb{N}}
-\def\nsets{\mathbb{N}^{\star}}
-\def\qset{\mathbb{Q}}
-\def\Rset{\mathbb{R}}
-\def\Cset{\mathbb{C}}
-\def\Zset{\mathbb{Z}}
-\def\Nset{\mathbb{N}}
-\def\Tset{\mathbb{T}}
-
-\def\bN{\mathbb{N}}
-\def\bR{\mathbb{R}}
-\def\bRd{\mathbb{R}^{\dim}}
-\def\cF{\mathcal{F}}
-
-
-%%%% mathrm
-
-\def\rmP{\mathrm{P}}
-\def\rmQ{\mathrm{Q}}
-\def\rmR{\mathrm{R}}
-\def\rmb{\mathrm{b}}
-\def\mrb{\mathrm{b}}
-\def\wrm{\mathrm{w}}
-\def\rmw{\mathrm{w}}
-\def\rmd{\mathrm{d}}
-\def\rmm{\mathrm{m}}
-\def\rms{\mathrm{s}}
-\def\rmZ{\mathrm{Z}}
-\def\rmS{\mathrm{S}}
-\def\mrd{\mathrm{d}}
-\def\mre{\mathrm{e}}
-\def\rme{\mathrm{e}}
-\def\rmn{\mathrm{n}}
-\def\mrn{\mathrm{n}}
-\def\mrc{\mathrm{C}}
-\def\mrcc{\mathrm{c}}
-\def\rmc{\mathrm{C}}
-\def\rmC{\mathrm{C}}
-\def\GaStep{\Gamma}
-\def\rmcc{\mathrm{c}}
-\def\rma{\mathrm{a}}
-\def\rmf{\mathrm{f}}
-\def\rmg{\mathrm{g}}
-\def\rmh{\mathrm{h}}
-\def\rmv{\mathrm{v}}
-\def\mra{\mathrm{a}}
-
-\def\cov{\mathrm{Cov}}
-
-\newcommand{\cco}{\llbracket}
-\newcommand{\ccf}{\rrbracket}
-\newcommand{\po}{\left(}
-\newcommand{\pf}{\right)}
-\newcommand{\co}{\left[}
-\newcommand{\cf}{\right]}
-\newcommand{\R}{\mathbb R}
-\newcommand{\Z}{\mathbb Z}
-\newcommand{\D}{\mathcal D}
-\newcommand{\dd}{\mathrm{d}}
-\newcommand{\A}{\mathcal A}
-\newcommand{\M}{\mathcal M}
-\newcommand{\na}{\nabla}
-\newcommand{\loiy}{\mu_{\mathrm{v}}}
-
-
-\def\MeasFspace{\mathbb{M}}
-\def\xstar{x^\star}
-\def\Tr{\operatorname{T}}
-\def\trace{\operatorname{Tr}}
-\newcommandx{\functionspace}[2][1=+]{\mathbb{F}_{#1}(#2)}
-%% argmin, argmax
-\newcommand{\argmax}{\operatorname*{arg\,max}}
-\newcommand{\argmin}{\operatorname*{arg\,min}}
-\newcommand{\estimateur}[1]{\hat{\pi}_n^N(#1)}
-\def\RichR{\operatorname{R}}
-\def\piR{\hat{\pi}^{\RichR}}
-\def\estimatorRR{\piR}
-\newcommandx{\VarDeux}[3][3=]{\operatorname{Var}^{#3}_{#1}\left\{#2 \right\}}
-\newcommand{\VarDeuxLigne}[2]{\operatorname{Var}_{#1}\{#2 \}}
-\newcommand{\gramm}{\operatorname{Gramm}}
-\newcommand{\1}{\mathbbm{1}}
-\newcommand{\2}[1]{\mathbbm{1}_{\{#1\}}}
-
-
-
-
-\newcommand{\LeftEqNo}{\let\veqno\@@leqno}
-
-\newcommand{\lambdast}{\lambda^{s \rightarrow t}}
-\newcommand{\etast}{\eta^{s \rightarrow t}}
-\newcommand{\mst}{m^{s \rightarrow t}}
-\newcommand{\mun}{m^{1 \rightarrow 2}}
-\newcommand{\mdeux}{m^{2 \rightarrow 1}}
-\newcommand{\lambdaun}{\lambda^{2 \rightarrow 1}}
-\newcommand{\etaun}{\eta^{2 \rightarrow 1}}
-\newcommand{\lambdadeux}{\lambda^{1 \rightarrow 2}}
-\newcommand{\etadeux}{\eta^{1 \rightarrow 2}}
-\newcommand{\mnun}{m^{n+1 \rightarrow \pi(n+1)}}
-\newcommand{\etanun}{\eta^{n+1 \rightarrow \pi(n+1)}}
-\newcommand{\lambdanun}{\lambda^{n+1 \rightarrow \pi(n+1)}}
-\newcommand{\xpinun}{x_{\pi(n+1)}}
-\newcommand{\xnun}{x_{n+1}}
-\newcommand{\mpinun}{m^{\pi(n+1) \rightarrow n+1}}
-\newcommand{\etapinun}{\eta^{\pi(n+1) \rightarrow n+1}}
-\newcommand{\lambdapinun}{\lambda^{\pi(n+1) \rightarrow n+1}}
-\newcommand{\pinun}{\pi(n+1)}
-\newcommand{\vois}{\mathcal{N}}
-\newcommand{\mpii}{m^{i \rightarrow \pi(n+1)}}
-\newcommand{\etapii}{\eta^{i \rightarrow \pi(n+1)}}
-\newcommand{\lambdapii}{\lambda^{i \rightarrow \pi(n+1)}}
-\newcommand{\alphahat}{\widehat{\alpha}}
-\newcommand{\betahat}{\widehat{\beta}}
-\newcommand{\tildegamma}{\widetilde{\gamma}}
-\newcommand{\tildeP}{\widetilde{P}}
-
-\newcommand{\myeqref}[1]{Eq.~\eqref{#1}}
-
-
-
-%%%% Floating Points Notation
-
-\newcommand{\fpround}[1]{\lfloor #1 \rceil}
-\newcommand{\floor}[1]{\left\lfloor #1 \right\rfloor}
-\newcommand{\ceil}[1]{\left\lceil #1 \right\rceil}
-
-
-
-%voc
-\newcommand{\pth}{\ensuremath{p^{\text{th}}}}
-\newcommand{\qth}{\ensuremath{q^{\text{th}}}}
-\newcommand{\nth}{\ensuremath{n^{\text{th}}}}
-
-%order
-\newcommand{\ord}{\ensuremath{\operatorname{ord}}}
-\newcommand{\rad}{\ensuremath{\operatorname{rad}}}
-
-
-
-% Sets
-\newcommand{\N}{\ensuremath{\mathbb{N}}}
-\newcommand{\Q}{\ensuremath{\mathbb{Q}}}
-\newcommand{\C}{\ensuremath{\mathbb{C}}}
-
-%\newcommand{\F}{\ensuremath{\mathbb{F}}}
-\newcommand{\primes}{\ensuremath{\mathcal P}}
-
-\newcommand{\sfi}{\ensuremath{\mathcal{S}\!\mathcal{F}}}
-\newcommand{\sfibt}{\ensuremath{\mathcal{S}\!\mathcal{F}'}}
-
-\newcommand{\reghat}{\widehat{R}}
-
-\newcommand{\reghatn}{\widehat{R}_n}
-
-\newcommand{\arm}{\mathcal{A}}
-
-%\newcommand{\mX}{\widehat{X}}
-\newcommand{\PE}{\mathbb{E}}
-\newcommand{\PP}{\mathbb{P}}
-\newcommand{\Ft}{\mathcal{F}}
-
-\newcommand{\Sy}{\mathbf{S}}
-
-\newcommand{\Kfrac}{\mathscr{K}}
-
-% Operands
-\newcommand{\absolute}[1]{\left\vert #1 \right\vert}
-\newcommand{\abs}[1]{\left\vert #1 \right\vert}
-\newcommand{\absLigne}[1]{\vert #1 \vert}
-\newcommand{\tvnorm}[1]{\| #1 \|_{\mathrm{TV}}}
-\newcommand{\tvnormLigne}[1]{\| #1 \|_{\mathrm{TV}}}
-\newcommand{\tvnormEq}[1]{\left \| #1 \right \|_{\mathrm{TV}}}
-\newcommandx{\Vnorm}[2][1=V]{\| #2 \|_{#1}}
-\newcommandx{\VnormEq}[2][1=V]{\left\| #2 \right\|_{#1}}
-% \newcommandx{\norm}[2][1=]{\ifthenelse{\equal{#1}{}}{\left\Vert #2 \right\Vert}{\left\Vert #2 \right\Vert^{#1}}}
-% \newcommandx{\normLigne}[2][1=]{\ifthenelse{\equal{#1}{}}{\Vert #2 \Vert}{\Vert #2\Vert^{#1}}}
-\newcommand{\crochet}[1]{\left\langle#1 \right\rangle}
-\newcommand{\parenthese}[1]{\left(#1 \right)}
-\newcommand{\parentheseLigne}[1]{(#1 )}
-\newcommand{\parentheseDeux}[1]{\left[ #1 \right]}
-\newcommand{\parentheseDeuxLigne}[1]{[ #1 ]}
-\newcommand{\defEns}[1]{\left\lbrace #1 \right\rbrace }
-\newcommand{\defEnsLigne}[1]{\lbrace #1 \rbrace }
-\newcommand{\defEnsPoint}[1]{\left\lbrace #1 \right. }
-\newcommand{\defEnsPointDeux}[1]{\left. #1 \right  \rbrace }
-\newcommand{\defEnsL}[1]{\left\lbrace #1 \right. }
-\newcommand{\defEnsR}[1]{\left. #1 \right  \rbrace }
-
-%\newcommand{\defSystem}[1]{\left\lbrace #1 \right. }
-
-\newcommand{\ps}[2]{\left\langle#1,#2 \right\rangle}
-\newcommand{\eqdef}{=}
-\newcommand{\defeq}{=}
-
-% Relations
-\newcommand{\divid}{\mid}
-\newcommand{\ndivide}{\nmid}
-
-% Proba
-\newcommand{\proba}[1]{\mathbb{P}\left( #1 \right)}
-\newcommand{\probaCond}[2]{\mathbb{P}\left( \left. #1  \middle\vert #2 \right.\right)}
-\newcommand{\probaCondLigne}[2]{\mathbb{P}(#1  \vert #2 )}
-\newcommand{\probaCondLignePi}[2]{\Pi(#1  \vert #2 )}
-\newcommand{\probaLigne}[1]{\mathbb{P}( #1 )}
-\newcommandx\probaMarkovTilde[2][2=]
-{\ifthenelse{\equal{#2}{}}{{\widetilde{\mathbb{P}}_{#1}}}{\widetilde{\mathbb{P}}_{#1}\left[ #2\right]}}
-\newcommand{\probaMarkov}[2]{\mathbb{P}_{#1}\left[ #2\right]}
-\newcommand{\probaMarkovDD}[1]{\mathbb{P}_{#1}}
-\newcommand{\expe}[1]{\PE \left[ #1 \right]}
-\newcommand{\expesq}[1]{\PE^{1/2} \left[ #1 \right]}
-\newcommand{\expeExpo}[2]{\PE^{#1} \left[ #2 \right]}
-\newcommand{\expeLigne}[1]{\PE [ #1 ]}
-\newcommand{\expeLine}[1]{\PE [ #1 ]}
-\newcommand{\expeMarkov}[2]{\PE_{#1} \left[ #2 \right]}
-\newcommand{\expeMarkovD}[3]{\PE_{#1}^{#3} \left[ #2 \right]}
-\newcommand{\expeMarkovDD}[1]{\PE_{#1}}
-\newcommand{\expeMarkovLigne}[2]{\PE_{#1} [ #2 ]}
-\newcommand{\expeMarkovExpo}[3]{\PE_{#1}^{#2} \left[ #3 \right]}
-\newcommand{\probaMarkovTildeDeux}[2]{\widetilde{\mathbb{P}}_{#1} \left[ #2 \right]}
-\newcommand{\expeMarkovTilde}[2]{\widetilde{\PE}_{#1} \left[ #2 \right]}
-
-% Landau notation (big O)
-\newcommand{\bigO}{\ensuremath{\mathcal O}}
-\newcommand{\softO}{\Tilde{\ensuremath{\mathcal O}}}
-
-% Environments
-
-%\renewenvironment{proof}[1][{\textit{Proof:}}]{\begin{trivlist} \item[\em{\hskip \labelsep #1}]}{\ensuremath{\qed} \end{trivlist}}
-
-%\renewenvironment{proof}[1][{\textit{Proof:}}]{\begin{trivlist} \item[\em{\hskip \labelsep #1}]}{\ensuremath{\qed} \end{trivlist}}
-
-
-
-%fleche limite
-\newcommand{\flecheLimite}{\underset{n\to+\infty}{\longrightarrow}}
-\newcommand{\flecheLimiteOption}[2]{\underset{#1\to#2}{\longrightarrow}}
-\newcommand{\flecheLimiteHaut}{\overset{n\to+\infty}{\longrightarrow}}
-
-
-%notation infini
-\newcommand{\plusinfty}{+\infty}
-
-%notation egale
-\newcommand{\egale}[1]{\ensuremath{\underset{#1}{=}}}
-
-%plusieurs ligne indice
-%\sum\limits_{\substack{i=0 \\ i \neq i_0}}^{n}{A_
-
-
-
-\newcommand\numberthis{\addtocounter{equation}{1}\tag{\theequation}}
-
-
-\newcommand{\hilbert}{\mathcal{H}}
-
-
-\def\ie{\textit{i.e.}}
-\def\as{\textit{a.s}}
-\def\cadlag{càdlàg}
-\def\eqsp{\;}
-\newcommand{\coint}[1]{\left[#1\right)}
-\newcommand{\ocint}[1]{\left(#1\right]}
-\newcommand{\ooint}[1]{\left(#1\right)}
-\newcommand{\ccint}[1]{\left[#1\right]}
-\newcommand{\cointLigne}[1]{[#1)}
-\newcommand{\ocintLigne}[1]{(#1]}
-\newcommand{\oointLigne}[1]{(#1)}
-\newcommand{\ccintLigne}[1]{[#1]}
-
-\def\primr{f_r}
-\def\primrO{f_{r_0}}
-
-
-
-
-\newcommand{\indi}[1]{\1_{#1}}
-\newcommandx{\weight}[2][2=n]{\omega_{#1,#2}^N}
-\newcommand{\loi}{\mathcal{L}}
-\newcommand{\boule}[2]{\operatorname{B}(#1,#2)}
-\newcommand{\ball}[2]{\operatorname{B}(#1,#2)}
-\newcommand{\boulefermee}[2]{\bar{B}(#1,#2)}
-\newcommand{\cball}[2]{\bar{\operatorname{B}}(#1,#2)}
-\newcommand{\diameter}{\operatorname{diam}}
-\newcommand{\deta}{d_{\eta}}
-
-\def\TV{\mathrm{TV}}
-
-\newcommand{\george}[1]{\todo[color=orange!20]{{\bf GD:} #1}}
-\newcommand{\james}[1]{\todo[color=blue!20]{{\bf JT:} #1}}
-\newcommand{\arnaud}[1]{\todo[color=blue!20]{{\bf AD:} #1}}
-\newcommand{\arnaudi}[1]{\todo[color=blue!20,inline]{{\bf AL:} #1}}
-\newcommand{\valentin}[1]{\todo[color=blue!20]{{\bf VDB:} #1}}
-\newcommand{\valentintxt}[1]{\textcolor{red}{\textbf{VDB}: #1}}
- \newcommand{\valentini}[1]{\todo[color=blue!20,inline]{{\bf VDB:} #1}}
- 
-\newcommand{\emile}[1]{\todo[color=red!20]{{\bf EM:} #1}}
-
-\newcommand{\michael}[1]{\todo[color=green!20]{{\bf MJH:} #1}}
- 
-% \newcommand{\aymeric}[1]{\todo[color=blue!20]{{\bf AD:} #1}}
-% \newcommand{\francis}[1]{\todo[color=black!20]{{\bf FB:} #1}}
- \newcommand{\tcr}[1]{\textcolor{red}{#1}}
-% \newcommand{\tcb}[1]{\textcolor{blue}{#1}}
-
-
-\def\as{\ensuremath{\text{a.s.}}}
-\def\dist{\operatorname{dist}}
-
-\newcommandx\sequence[3][2=,3=]
-{\ifthenelse{\equal{#3}{}}{\ensuremath{\{ #1_{#2}\}}}{\ensuremath{\{ #1_{#2}, \eqsp #2 \in #3 \}}}}
-
-\newcommandx\sequenceD[3][2=,3=]
-{\ifthenelse{\equal{#3}{}}{\ensuremath{\{ #1_{#2}\}}}{\ensuremath{( #1)_{ #2 \in #3} }}}
-
-\newcommandx{\sequencen}[2][2=n\in\N]{\ensuremath{\{ #1_n, \eqsp #2 \}}}
-\newcommandx\sequenceDouble[4][3=,4=]
-{\ifthenelse{\equal{#3}{}}{\ensuremath{\{ (#1_{#3},#2_{#3}) \}}}{\ensuremath{\{  (#1_{#3},#2_{#3}), \eqsp #3 \in #4 \}}}}
-\newcommandx{\sequencenDouble}[3][3=n\in\N]{\ensuremath{\{ (#1_{n},#2_{n}), \eqsp #3 \}}}
-
-
-\newcommand{\wrt}{w.r.t.}
-\newcommand{\Withoutlog}{w.l.o.g.}
-\def\iid{i.i.d.}
-\def\ifof{if and only if}
-\def\eg{\textit{e.g.}}
-
-
-\newcommand{\notered}[1]{{\textbf{\color{red}#1}}}
-
-
-\newcommand{\opnorm}[1]{{\left\vert\kern-0.25ex\left\vert\kern-0.25ex\left\vert #1
-    \right\vert\kern-0.25ex\right\vert\kern-0.25ex\right\vert}}
-
-
-
-\def\Lip{\operatorname{Lip}}
-\def\Ltt{\mathtt{L}}
-\def\generator{\mathcal{A}}
-\def\generatorb{\bar{\mathcal{A}}}
-\def\generatort{\tilde{\mathcal{A}}}
-\def\generatorsp{\generator^{\sphere^d}}
-\def\generatorr{\generator^{\rset^d}}
-
-\def\momentNoise{\mathrm{m}}
-\def\bfe{\mathbf{e}}
-
-\def\bfv{\mathbf{v}}
-\def\ebf{\mathbf{e}}
-\def\vbf{\mathbf{v}}
-
-
-\def\Id{\operatorname{Id}}
-\def\Idbf{\mathbf{I}}
-
-\def\tildetheta{\tilde{\theta}}
-
-\def\calC{\mathcal{C}}
-
-
-\newcommandx{\CPE}[3][1=]{{\mathbb E}_{#1}\left[#2 \middle \vert #3  \right]} %%%% esperance conditionnelle
-\newcommandx{\CPELigne}[3][1=]{{\mathbb E}_{#1}[#2  \vert #3  ]} %%%% esperance conditionnelle
-\newcommandx{\CPEsq}[3][1=]{{\mathbb{E}^{1/2}}_{#1}\left[#2 \middle \vert #3  \right]} %%%% esperance conditionnelle
-\newcommandx{\CPVar}[3][1=]{\mathrm{Var}^{#3}_{#1}\left\{ #2 \right\}}
-\newcommand{\CPP}[3][]
-{\ifthenelse{\equal{#1}{}}{{\mathbb P}\left(\left. #2 \, \right| #3 \right)}{{\mathbb P}_{#1}\left(\left. #2 \, \right | #3 \right)}}
-
-\def\Ascr{\mathscr{A}}
-\def\scrA{\mathscr{A}}
-\def\scrB{\mathscr{B}}
-\def\scrC{\mathscr{C}}
-
-\def\barL{\bar{L}}
-
-\def\YL{\mathbf{Y}}
-\def\XEM{X}
-\def\steps{\gamma}
-\def\measSet{\mathbb{M}}
-
-%\newcommand\Ent[2]{\mathrm{Ent}_{#1}\left(#2\right)}
-\newcommandx{\osc}[2][1=]{\mathrm{osc}_{#1}(#2)}
-
-\def\Ybar{\bar{Y}}
-\def\Id{\operatorname{Id}}
-\def\IdM{\operatorname{I}_d}
-\newcommand\EntDeux[2]{\Ent_{#1}\left[#2 \right]}
-\def\Ltwo{\mathrm{L}^2}
-\def\Lone{\mathrm{L}^1}
-\newcommand\densityPi[1]{\frac{\rmd #1}{\rmd \pi}}
-\newcommand\densityPiLigne[1]{\rmd #1 /\rmd \pi}
-\newcommand\density[2]{\frac{\rmd #1}{\rmd #2}}
-\newcommand\densityLigne[2]{\rmd #1/\rmd #2}
-
-%\def\V{V}
-\def\VD{V}
-\def\Vsp{V^{\sphere^d}_{\b,\beta}}
-\def\Vr{V^{\rset^d}_{\b,\c,\beta}}
-
-\def\Prset{P^{\rset^d}}
-\def\Psphere{P^{\sphere^d}}
-
-\def\n{\mathrm{n}}
-\def\Vpsi{\psi}
-\def\Vkappa{\kappa}
-\def\Vkappat{\tilde{\kappa}}
-\def\Vchi{\chi}
-\def\Vchit{\tilde{\chi}}
-\def\Vphi{\phi}
-\def\Vrho{\rho}
-\def\psiV{\Vpsi}
-\def\rhoV{\Vrho}
-\def\phiV{\Vphi}
-\def\fV{f}
-\def\Vf{\fV}
-\def\kappaVt{\tilde{\Vkappa}}
-\def\kappaV{\Vkappa}
-\def\chiV{\Vchi}
-\def\chiVt{\Vchit}
-
-
-\def\a{a}
-\def\b{b}
-\def\c{c}
-\def\e{e}
-\def\rU{\mathrm{r}}
-
-\def\domain{\mathrm{D}}
-\def\dom{\mathrm{dom}}
-
-\def\martfg{M^{f,g}}
-\newcommand\Ddir[1]{D_{#1}}
-\newcommand\maxplus[1]{\parenthese{#1}_+}
-\def\Refl{\mathrm{R}}
-\def\phibf{\pmb{\phi}}
-\def\Gammabf{\mathbf{\Gamma}}
-
-
-\def\transpose{\top}
-%\def\v{v}
-\def\w{w}
-\def\y{y}
-\def\z{z}
-%%%% bar
-\def\bD{\bar{D}}
-\def\bC{\bar{C}}
-\def\brho{\bar{\rho}}
-\def\bt{\bar{t}}
-\def\bA{\bar{A}}
-\def\bb{\overline{b}}
-\def\bc{\bar{c}}
-\def\bgamma{\bar{\gamma}}
-\def\bU{\bar{U}}
-\def\Ub{\bU}
-\def\lambdab{\bar{\lambda}}
-\def\blambda{\bar{\lambda}}
-\def\blambdab{\bar{\lambda}}
-\def\bv{\bar{v}}
-\def\vb{\bv}
-\def\yb{\bar{y}}
-\def\by{\yb}
-\def\Xb{\bar{X}}
-\def\Yb{\bar{Y}}
-\def\Gb{\bar{G}}
-\def\Eb{\bar{E}}
-\def\Tb{\bar{T}}
-\def\taub{\bar{\tau}}
-
-\def\bX{\bar{X}}
-\def\bY{\bar{Y}}
-\def\bG{\bar{G}}
-\def\bE{\bar{E}}
-\def\bT{\bar{T}}
-\def\btau{\bar{\tau}}
-
-\def\pib{\bar{\pi}}
-\def\bpi{\pib}
-
-\def\S{S}
-
-%%%% tilde
-\def\tgamma{\tilde{\gamma}}
-\def\tC{\tilde{C}}
-\def\tB{\tilde{B}}
-\def\tc{\tilde{c}}
-\def\tvareps{\tilde{\vareps}}
-\def\trho{\tilde{\rho}}
-\def\tmsk{\tilde{\msk}}
-\def\tW{\tilde{W}}
-\def\tvarsigma{\tilde{\varsigma}}
-\def\tv{\tilde{v}}
-\def\vt{\tv}
-\def\yt{\tilde{y}}
-\def\ty{\yt}
-\def\Mt{\tilde{M}}
-\def\tM{\Mt}
-
-\def\tx{\tilde{x}}
-\def\xt{\tx}
-\def\Xt{\tilde{X}}
-\def\Yt{\tilde{Y}}
-\def\Gt{\tilde{G}}
-\def\Et{\tilde{E}}
-\def\Tt{\tilde{T}}
-\def\St{\tilde{S}}
-\def\taut{\tilde{\tau}}
-
-\def\tX{\tilde{X}}
-\def\tY{\tilde{Y}}
-\def\tG{\tilde{G}}
-\def\tE{\tilde{E}}
-\def\tT{\tilde{T}}
-\def\tS{\tilde{S}}
-\def\ttau{\tilde{\tau}}
-
-
-\def\Xb{\bar{X}}
-\def\Yb{\bar{Y}}
-\def\Gb{\bar{G}}
-\def\Eb{\bar{E}}
-\def\Tb{\bar{T}}
-\def\Sb{\bar{S}}
-\def\taub{\bar{\tau}}
-\def\Hb{\bar{H}}
-\def\Nb{\bar{N}}
-
-
-\def\bX{\bar{X}}
-\def\bY{\bar{Y}}
-\def\bG{\bar{G}}
-\def\bE{\bar{E}}
-\def\bT{\bar{T}}
-\def\btau{\bar{\tau}}
-\def\bS{\bar{S}}
-\def\bH{\bar{H}}
-%\def\bN{\bar{N}}
-
-%%%%%%%%
-
-\def\mgU{\mathrm{m}_{\nabla U}}
-\def\MintDrift{I}
-\def\CU{C_U}
-\def\RU{R_1}
-\def\RV{R}
-\def\Reps{R_{\epsilon}}
-\def\Resp{\Reps}
-\def\veps{\varepsilon}
-
-\def\sphere{\mss}
-
-\def\nablaUt{\overline{\nabla U}}
-\def\measureSphere{\nu^d}
-
-\def\etaU{\eta}
-\def\epsilonU{\epsilon}
-
-\def\Jac{\operatorname{Jac}}
-\def\jac{\operatorname{Jac}}
-\def\sign{\operatorname{sign}}
-\def\rate{\lambda_{\mathrm{r}}}
-
-
-
-
-
-
-
-\def\sigmaS{\sigma^2}
-
-\newcommand{\ensemble}[2]{\left\{#1\,:\eqsp #2\right\}}
-\newcommand{\ensembleLigne}[2]{\{#1\,:\eqsp #2\}}
-\newcommand{\set}[2]{\ensemble{#1}{#2}}
-
-\def\rmD{\mathrm{D}}%%rmd déjà pris
-\def\mrd{\mathrm{D}}
-\def\mrc{\mathrm{C}}
-
-\def\diag{\Delta_{\rset^d}}
-
-%\def\lyap{W}
-\newcommand\coupling[2]{\Gamma(\mu,\nu)}
-\def\supp{\mathrm{supp}}
-\def\tpi{\tilde{\pi}}
-\newcommand\adh[1]{\overline{#1}}
-
-\def\ACb{\mathrm{AC}_{\mathrm{b}}}
-
-\def\opK{\mathrm{K}}
-
-\newcommand{\fracm}[2]{\left. #1 \middle / #2 \right.}
-\newcommand{\fraca}[2]{ #1  / #2 }
-\newcommand{\fracaa}[2]{ #1  / (#2) }
-
-\newcommand{\complementary}{\mathrm{c}}
-
-% \renewcommand{\geq}{\geqslant}
-% \renewcommand{\leq}{\leqslant}
-\def\poty{H}
-% \def\diam{\mathrm{diam}}
-\def\diam{\mathfrak{d}}
-\def\talpha{\tilde{\alpha}}
-% \def\Leb{\mathrm{Leb}}
-\def\Leb{\lambda}
-\newcommand{\iintD}[2]{\{#1,\ldots,#2\}}
-\def\interior{\mathrm{int}}
-\def\iff{ if and only if }
-
-\def\vareps{\varepsilon}
-\def\bvareps{\bar{\varepsilon}}
-\def\varespilon{\varepsilon}
-\def\si{\text{ if } }
-\def\proj{\operatorname{proj}}
-\def\projd{\operatorname{proj}^{\msd}}
-\def\Phibf{\mathbf{\Phi}}
-\def\Psibf{\mathbf{\Psi}}
-
-\def\rker{\mathrm{R}}
-\def\kker{\mathrm{K}}
-
-\def\VEa{V}
-\def\KUa{K}
-\newcommandx{\KL}[2]{\operatorname{KL}\left( #1 | #2 \right)}
-\newcommandx{\KLsqrt}[2]{\operatorname{KL}^{1/2}\left( #1 | #2 \right)}
-\newcommandx{\Jef}[2]{\operatorname{J}\left( #1 , #2 \right)}
-\newcommandx{\JefLigne}[2]{\operatorname{J}( #1 , #2 )}
-\newcommandx{\KLLigne}[2]{\operatorname{KL}( #1 | #2 )}
-
-\def\gaStep
-\def\QKer{Q}
-\def\Tg{\mathcal{T}_{\gamma}}
-\def\Tk{\mathcal{T}_{k}}
-\def\Tn{\mathcal{T}_{k}}
-\def\Tnplusun{\mathcal{T}_{k+1}}
-\def\mcurb{m}
-%\newcommand{\coupling}[1]{\Gamma\left( #1 \right)}
-\newcommand{\couplingLine}[1]{\Gamma( #1 )}
-\def\distance{\mathbf{d}}
-\newcommandx{\wasserstein}[3][1=\distance,3=]{\mathbf{W}_{#1}^{#3}\left(#2\right)}
-\newcommandx{\wassersteinLigne}[3][1=\distance,3=]{\mathbf{W}_{#1}^{#3}(#2)}
-\newcommandx{\wassersteinD}[1][1=\distance]{\mathbf{W}_{#1}}
-\newcommandx{\wassersteinDLigne}[1][1=\distance]{\mathbf{W}_{#1}}
-
-
-\def\Rcoupling{\mathrm{R}}
-\def\Qcoupling{\mathrm{Q}}
-\def\Sker{\mathrm{S}}
-\def\Kcoupling{\mathrm{K}}
-\def\tKcoupling{\tilde{\mathrm{K}}}
-\def\Lcoupling{\mathrm{L}}
-\def\Kcouplingproj{\mathrm{K}^P}
-\def\vepsilon{\varepsilon}
-
-
-\newcommand{\defEnsE}[2]{\ensemble{#1}{#2}}
-\newcommand{\expeMarkovTildeD}[3]{\widetilde{\PE}_{#1}^{#3} \left[ #2 \right]}
-\newcommand{\probaMarkovTildeD}[3]{\widetilde{\PP}_{#1}^{#3} \left[ #2 \right]}
-\def\coordtildex{\mathrm{w}}
-\def\PPtilde{\widetilde{\PP}}
-\def\PEtilde{\widetilde{\PE}}
-\def\transfrr{\mathrm{F}}
-\def\diagSet{\Delta_{\msx}}
-\def\Deltar{\diagSet}
-\def\complem{\operatorname{c}}
-\def\alphar{\alpha}
-\def\tildex{\tilde{x}}
-\def\tildez{\tilde{z}}
-\def\tildey{\tilde{y}}
-\def\ar{\mathrm{a}}
-\def\Kr{\mathsf{K}}
-\def\Kar{K^{(\mathrm{a})}}
-\def\Xr{\mathrm{X}}
-\def\Yr{\mathrm{Y}}
-\def\Xrd{\mathit{X}}
-\def\Yrd{\mathit{Y}}
-\def\Zr{\mathrm{Z}}
-\def\Ur{\mathrm{U}}
-\def\sigmaD{\sigma^2}
-\def\sigmakD{\sigma^2_k}
-\newcommandx{\phibfs}[1][1=]{\pmb{\varphi}_{\sigmaD_{#1}}}
-\def\vphibf{\pmb{\varphi}}
-\def\varphibf{\pmb{\varphi}}
-\def\phibfvs{\pmb{\varphi}_{\varsigma^2}}
-\def\funreg{\mct}
-\def\kappar{\varpi}
-\def\Pr{\mathsf{P}}
-\def\Par{P^{(\mathrm{a})}}
-\def\Qr{\mathsf{Q}}
-\def\Qar{Q^{(\mathrm{a})}}
-\def\eventA{\msa}
-
-\def\borelSet{\B}
-\def\Er{\mathrm{E}}
-\def\E{\mathbb{E}}
-\def\er{\mathrm{e}}
-\def\transp{\operatorname{T}}
-
-\newcommandx\sequenceg[3][2=,3=]
-{\ifthenelse{\equal{#3}{}}{\ensuremath{( #1_{#2})}}{\ensuremath{( #1_{#2})_{ #2 \geq #3}}}}
-
-
-\def\indiar{\iota}
-\def\rated{\chi}
-\def\transar{\tau}
-\def\filtrationTilde{\tilde{\mcf}}
-
-\def\discrete{\mathrm{d}}
-\def\continuous{\mathrm{c}}
-
-
-\def\Xar{X^{(\mathrm{a})}}
-\def\Yar{Y^{(\mathrm{a})}}
-\def\War{W^{(\mathrm{a})}}
-\def\Xiar{\Xi^{(\mathrm{a})}}
-\def\mcfar{\mcf^{(\mathrm{a})}}
-
-\def\Xart{\tilde{X}^{(\mathrm{a})}}
-\def\Yart{\tilde{Y}^{(\mathrm{a})}}
-
-
-\def\Kker{\Kcoupling}
-\def\KkerD{\tilde{\Kcoupling}}
-\def\Rker{\Rcoupling}
-\def\tRker{\tilde{\Rker}}
-\def\Pker{\mathrm{P}}
-\def\Pkerf{\overrightarrow{\mathrm{P}}}
-\def\Pkerfou{\overrightarrow{\mathrm{P}}_{\mathrm{OU}}}
-\def\Pkerb{\overleftarrow{\mathrm{P}}}
-\def\Rkerb{\overleftarrow{\mathrm{R}}}
-\def\Skerb{\overleftarrow{\mathrm{S}}}
-\def\Qker{\mathrm{Q}}
-\def\Lker{\mathrm{L}}
-\def\rmL{\mathrm{L}}
-\def\rmG{\mathrm{G}}
-\def\bfmu{\bm{\mu}}
-
-\def\VlyapD{W}
-\def\VlyapDun{W_1}
-\def\VlyapDdeux{W_2}
-\def\VlyapDtrois{W_3}
-% \newcommandx{\distV}[1][1=W]{\mathbf{d}_{#1}}
-\newcommandx{\distV}[1][1=\bfc]{\mathbf{W}_{#1}}
-\newcommandx{\distVdeux}[1][1=W_2]{\mathbf{d}_{#1}}
-
-\def\inv{\leftarrow}
-\newcommand{\couplage}[2]{\Pi(#1,#2)}
-\def\mtt{\mathtt{m}}
-\def\mttzero{\mathtt{m}_0}
-\def\tmtt{\tilde{\mathtt{m}}}
-\def\ttm{\mathtt{m}}
-\def\mttplus{\mathtt{m}^{+}}
-\def\mttplusun{\mathtt{m}_1^{+}}
-\def\mttplusdeux{\mathtt{m}_2^{+}}
-\def\ttmplus{\mathtt{m}^{+}}
-\def\cconst{\mathtt{a}}
-\def\Run{R_1}
-\def\Rdeux{R_2}
-\def\Rtrois{R_3}
-\def\Rquatre{R_4}
-\def\tR{\tilde{R}}
-\def\tmttplus{\tilde{\mtt}^+}
-\newcommand{\tup}[1]{\textup{#1}}
-\def\Fix{\operatorname{Fix}}
-\newcommand{\stopping}[1]{\T_{\msc,\mathtt{n}_0}^{(#1)}}
-\def\wass{\mathcal{W}}
-\def\distY{\mathbf{d}}
-\def\Xibf{\boldsymbol{\Xi}}
-\def\rhomax{\rho_{\rm{max}}}
-\def\rhof{\overrightarrow{\rho}}
-\def\familydrift{\mathscr{B}}
-
-\def\wasscun{\mathbf{W}_{\bfc_1}}
-\def\wasscdeux{\mathbf{W}_{\bfc_2}}
-\def\wassctrois{\mathbf{W}_{\bfc_3}}
-
-\def\loiz{\mu_{\msz}}
-\def\muz{\loiz}
-\def\funH{H}
-
-\renewcommand{\doteq}{=}
-\newcommand{\Idd}{\operatorname{I}_d}
-
-
-\def\driftb{b}
-\def\Lttb{\mathtt{L}}
-
-%\def\upsigma
-
-%%% Local Variables:
-%%% mode: latex
-%%% TeX-master: "main
-%%% End:
-
diff --git a/doc/preamble/header.tex b/doc/preamble/header.tex
deleted file mode 100644
index fea3905..0000000
--- a/doc/preamble/header.tex
+++ /dev/null
@@ -1,226 +0,0 @@
-\usepackage[utf8]{inputenc}   % LaTeX, comprends les accents !
-\usepackage[T1]{fontenc}      % Police contenant les caractÃ¨res franÃ§ais
-%\usepackage[french]{babel}  % Placez ici une liste de langues
-%\usepackage{multicol}
-
-%%%%%%%%%%%%%%
-%% comment uncomment
-%\usepackage[notref,notcite]{showkeys}
-%%%%
-
-
- % \usepackage[notref,notcite]{showkeys}  %  comment out for final version
- % \renewcommand*\showkeyslabelformat[1]{\fbox{\normalfont\scriptsize\sffamily#1}}   % for showkeys
-
-\usepackage{comment}
-\usepackage{geometry}
-\geometry{a4paper,margin=1in}
-\usepackage{natbib}
-% \usepackage[bibstyle=trad-abbrv, natbib=true, citestyle=numeric-comp, backref=true, useprefix, uniquename=false,maxcitenames=2]{biblatex}
-% \newcommand{\citep}[]{}
-%\setcitestyle{square}
-
-\usepackage[tbtags]{amsmath}
-\usepackage{amsthm}
-\allowdisplaybreaks
-\usepackage{amssymb,mathrsfs}
-\usepackage{nccmath}
-\usepackage{amsfonts}
-\usepackage{upgreek}
-\usepackage{xspace}
-
-% \usepackage{nicefrac}
-
-%\usepackage[numbers]{natbib}
-\usepackage{graphicx}
-% \usepackage{subfig}
-%\usepackage[caption = false]{subfig} %package pour faire sous-figures
-\usepackage{color}
-%\usepackage[ruled,vlined]{algorithm2e}
-%\usepackage{algpseudocode,algorithm,algorithmicx}
-\usepackage{algorithm, algpseudocode}
-\begin{comment}
-
-\algnewcommand{\Inputs}[1]{%
-  \State \textbf{Inputs:}
-  \Statex \hspace*{\algorithmicindent}\parbox[t]{.8\linewidth}{\raggedright #1}
-}
-\algnewcommand{\Initialize}[1]{%
-  \State \textbf{Initialize:}
-  \Statex \hspace*{\algorithmicindent}\parbox[t]{.8\linewidth}{\raggedright #1}
-}
-\algnewcommand{\Outputs}[1]{%
-  \State \textbf{Outputs:}
-  \Statex \hspace*{\algorithmicindent}\parbox[t]{.8\linewidth}{\raggedright #1}
-}
-\end{comment}
-
-%###########
-%\usepackage{manuColor}
-\usepackage{stmaryrd}
-\usepackage[inline]{enumitem}
-%[wide, labelwidth=!, labelindent=0pt]
-\usepackage{url}
-\def\UrlBreaks{\do\/\do-}
-\usepackage{tikz}
-\usetikzlibrary{calc}
-\newcommand\yBlock{1}
-\newcommand\yNode{0.75}
-
-\newcommand\xNodemoinstiny{-1}
-\newcommand\xNodemoins{-1.5}
-\newcommand\xNodemoinsint{-2.}
-\newcommand\xNodeMoins{-3}
-\newcommand\xNodeMOINS{-4.5}
-
-\newcommand\xNodeplustiny{1}
-\newcommand\xNodeplus{1.5}
-\newcommand\xNodeplusint{2}
-\newcommand\xNodePlus{3}
-\newcommand\xNodePLUS{4.5}
-
-\usepackage{pgfplots}
-\usepackage{xcolor}
-\usepackage{bbm}
-\usepackage{ifthen}
-\usepackage{xargs}
-\usepackage[textwidth=1.8cm]{todonotes}
-
-\usepackage{aliascnt}
-% \usepackage{cleveref}
-\usepackage[capitalise,noabbrev]{cleveref}
-\usepackage{autonum}
-\makeatletter
-\newtheorem{theorem}{Theorem}
-% \crefname{theorem}{theorem}{Theorems}
-% \Crefname{Theorem}{Theorem}{Theorems}
-
-
-\newtheorem*{lemma_nonumber*}{Lemma}
-
-
-\newaliascnt{lemma}{theorem}
-\newtheorem{lemma}[lemma]{Lemma}
-\aliascntresetthe{lemma}
-% \crefname{lemma}{lemma}{lemmas}
-% \Crefname{Lemma}{Lemma}{Lemmas}
-
-
-
-\newaliascnt{corollary}{theorem}
-\newtheorem{corollary}[corollary]{Corollary}
-\aliascntresetthe{corollary}
-% \crefname{corollary}{corollary}{corollaries}
-% \Crefname{Corollary}{Corollary}{Corollaries}
-
-\newaliascnt{proposition}{theorem}
-\newtheorem{proposition}[proposition]{Proposition}
-\aliascntresetthe{proposition}
-% \crefname{proposition}{proposition}{propositions}
-% \Crefname{Proposition}{Proposition}{Propositions}
-
-\newaliascnt{definition}{theorem}
-\newtheorem{definition}[definition]{Definition}
-\aliascntresetthe{definition}
-% \crefname{definition}{definition}{definitions}
-% \Crefname{Definition}{Definition}{Definitions}
-
-\newaliascnt{remark}{theorem}
-\newtheorem{remark}[remark]{Remark}
-\aliascntresetthe{remark}
-% \crefname{remark}{remark}{remarks}
-% \Crefname{Remark}{Remark}{Remarks}
-
-
-\newtheorem{example}[theorem]{Example}
-% \crefname{example}{example}{examples}
-% \Crefname{Example}{Example}{Examples}
-
-\newtheorem{technique}{Technique}
-% \crefname{technique}{technique}{techniques}
-% \Crefname{Technique}{Technique}{Techniques}
-
-
-% \crefname{figure}{figure}{figures}
-% \Crefname{Figure}{Figure}{Figures}
-
-
-%\newtheorem{assumption}{\textbf{A}\hspace{-3pt}}
-%\Crefname{assumption}{\textbf{A}\hspace{-3pt}}{\textbf{A}\hspace{-3pt}}
-%\crefname{assumption}{\textbf{A}}{\textbf{A}}
-\newtheorem{assumption}{\textbf{A}\hspace{-3pt}}
-\crefformat{assumption}{{\textbf{A}}#2#1#3}
-
-\newtheorem{assumptionF}{\textbf{F}\hspace{-3pt}}
-\crefformat{assumptionF}{{\textbf{F}}#2#1#3}
-
-\newenvironment{assumptionbis}[1]
-  {\renewcommand{\theassumptionF}{\ref*{#1}$\mathbf{b}$}%
-   \addtocounter{assumptionF}{-1}%
-   \begin{assumptionF}}
-  {\end{assumptionF}}
-
-
-
-\newtheorem{assumptionB}{\textbf{B}\hspace{-3pt}}
-\Crefname{assumptionB}{\textbf{B}\hspace{-3pt}}{\textbf{B}\hspace{-3pt}}
-\crefname{assumptionB}{\textbf{B}}{\textbf{B}}
-
-\newtheorem{assumptionC}{\textbf{C}\hspace{-3pt}}
-\Crefname{assumptionC}{\textbf{C}\hspace{-3pt}}{\textbf{C}\hspace{-3pt}}
-\crefname{assumptionC}{\textbf{C}}{\textbf{C}}
-
-
-\newtheorem{assumptionH}{\textbf{H}\hspace{-3pt}}
-\Crefname{assumptionH}{\textbf{H}\hspace{-3pt}}{\textbf{H}\hspace{-3pt}}
-\crefname{assumptionH}{\textbf{H}}{\textbf{H}}
-
-\newtheorem{assumptionT}{\textbf{T}\hspace{-3pt}}
-\Crefname{assumptionT}{\textbf{T}\hspace{-3pt}}{\textbf{T}\hspace{-3pt}}
-\crefname{assumptionT}{\textbf{T}}{\textbf{T}}
-
-\newtheorem{assumptionD}{\textbf{D}\hspace{-3pt}}
-\Crefname{assumptionT}{\textbf{T}\hspace{-3pt}}{\textbf{T}\hspace{-3pt}}
-\crefname{assumptionT}{\textbf{T}}{\textbf{T}}
-
-
-\newtheorem{assumptionL}{\textbf{L}\hspace{-3pt}}
-\Crefname{assumptionL}{\textbf{L}\hspace{-3pt}}{\textbf{L}\hspace{-3pt}}
-\crefname{assumptionL}{\textbf{L}}{\textbf{L}}
-
-\newtheorem{assumptionQ}{\textbf{Q}\hspace{-3pt}}
-\Crefname{assumptionQ}{\textbf{Q}\hspace{-3pt}}{\textbf{Q}\hspace{-3pt}}
-\crefname{assumptionQ}{\textbf{Q}}{\textbf{Q}}
-
-% \newtheorem{assumptionD*}{\textbf{D}\hspace{-3pt}}
-% \Crefname{assumptionD}{\textbf{D}\hspace{-3pt}}{\textbf{D}\hspace{-3pt}}
-% \crefname{assumptionD}{\textbf{D}}{\textbf{D}}
-
-\newtheorem{assumptionAR}{\textbf{AR}\hspace{-3pt}}
-\Crefname{assumptionAR}{\textbf{AR}\hspace{-3pt}}{\textbf{AR}\hspace{-3pt}}
-\crefname{assumptionAR}{\textbf{AR}}{\textbf{AR}}
-
-
-
-\newcommand\diaW{11}
-\newcommand\diaH{5}
-\newcommand\diaJump{2.75}
-\newcommand\nextRow{1.25}
-\newcommand\imW{0.08}
-\newcommand\imWB{0.1}
-\newcommand\imOp{0.6}
-\newcommand\bend{5}
-
-\newcommand\offset{2}
-\newcommand\offsety{2.3}
-\newcommand\h{2.25}
-\newcommand\hsmall{1.75}
-\newcommand\ww{3.25}
-\newcommand\www{1.8}
-\newcommand\wwww{3.5}
-\newcommand\wwwww{4.8}
-\newcommand{\offsetsmall}{1.5}
-
-
-\usepackage{bm}
-\usepackage{wrapfig}
diff --git a/doc/prel.tex b/doc/prel.tex
deleted file mode 100644
index a51c07b..0000000
--- a/doc/prel.tex
+++ /dev/null
@@ -1,694 +0,0 @@
-\section{Preliminaries on stochastic Riemannian geometry}
-\label{sec:prel-stoch-riem}
-
-In this section, we recall some basic facts on Riemannian geometry and
-stochastic Riemannian geometry.  We follow
-\cite{hsu2002stochastic,lee2018introduction,lee2006riemannian} and refer to
-\cite{lee2010introduction,lee2013smooth} for a general introduction to
-topological and smooth manifolds. Throughout this section $\M$ is a
-$d$-dimensional smooth manifold, $\TM$ its tangent bundle and $\TMstar$ it
-cotangent bundle. We denote $\rmc^\infty(\M)$ the set of real-valued smooth
-functions on $\M$ and $\XM$ the set of vector fields on $\M$.
-
-\subsection{Tensor field, metric, connection and transport}
-\label{sec:metr-conn-tens}
-
-\paragraph{Tensor field and Riemannian metric}
-
-For a vector space $V$ let
-$\mathrm{T}^{k, \ell}(V) = V^{\otimes k} \otimes (V^\star)^{\otimes \ell}$ with
-$k, \ell \in \nset$. For any $k, \ell \in \nset$ we define the space of
-$(k,\ell)$-tensors as
-$\mathrm{T}^{k,\ell} \M = \sqcup_{p \in \M}
-\mathrm{T}^{k,\ell}(\mathrm{T}_p\M)$. Note that
-$\Gamma(\M, \mathrm{T}^{0,0}\M) = \mathrm{C}^\infty(\M)$,
-$\XM = \Gamma(\M, \mathrm{T}^{1,0} \M)$ and that the space of $1$-form on $\M$
-is given by $\Gamma(\M, \mathrm{T}^{0,1} \M)$, where $\Gamma(\M, V(\M))$ is a
-section of a vector bundle $V(\M)$ \citep[see][Chapter 10]{lee2013smooth}.  For
-any $k \in \nset$, we denote
-$\mathrm{T}^{\abs{k}} \M = \sqcup_{j=0}^k \mathrm{T}^{j,k-j} \M$.
-% \valentin{maybe talk about pushforward and pullback here ?}
-$\M$ is said to be
-a Riemannian manifold if there exists $g \in \Gamma(\M, \mathrm{T}^{0,2} \M)$ such that for
-any $x \in \M$, $g(x)$ is positive definite. $g$ is called the Riemannian metric
-of $\M$. Every smooth manifold can be equipped with a Riemannian metric
-\cite[see][Proposition 2.4]{lee2018introduction}. In local coordinates we define
-$G = \{g_{i,j}\}_{1 \leq i,j \leq d} = \{g(X_i, X_j)\}_{1 \leq i,j \leq d}$,
-where $\{X_i\}_{i=1}^d$ is a basis of the tangent space. In what follows we
-consider that $\M$ is equipped with a metric $g$ and for any $X, Y \in \XM$ we
-denote $\langle X,Y \rangle_{\M} = g(X,Y)$.
-
-\paragraph{Connection}
-A connection $\nabla$ is a mapping which allows one to differentiate vector
-fields w.r.t other vector fields. $\nabla$ is a linear map
-$\nabla: \ \XM \times \XM \to \XM$. In addition, we assume that
-\begin{enumerate*}[label=\roman*)]
-\item for any $f \in \rmc^\infty(\M)$, $X, Y \in \XM$, $\nabla_{f X}(Y) = f \nabla_X Y$, 
-\item for any $f \in \rmc^\infty(\M)$, $X, Y \in \XM$, $\nabla_{X}(fY) = f \nabla_X Y + X(f) Y$.
-\end{enumerate*}
-Given a system of local coordinates, the Christoffel symbols
-$\{\Gamma_{i,j}^k\}_{1 \leq i,j,k\leq d}$ are given for any
-$i,j \in \{1, \dots, d\}$ by
-$\nabla_{X_i}X_j = \sum_{k=1}^d \Gamma_{i,j}^k X_k$. We
-also define the Levi-Civita connection $\nabla$ by considering the additional
-two conditions: 
-\begin{enumerate*}[label=\roman*)]
-\item $\nabla$ is torsion-free, \ie \ for any $X, Y \in \XM$ we have
-  $\nabla_X Y - \nabla_Y X = [X,Y]$, where $[X,Y]$ is the Lie bracket between
-  $X$ and $Y$,
-\item $\nabla$ is compatible with the metric $g$, \ie \ for any $X,Y,Z \in \XM$,
-  $X (\langle Y,Z \rangle_\M) = \langle\nabla_X Y, Z\rangle_\M + \langle Y, \nabla_X Z \rangle_\M$.
-\end{enumerate*}
-We recall that the Levi-Civita connection is uniquely defined since for any
-$X,Y,Z \in \XM$ we have
-\begin{align}
-  2 \prodM{\nabla_X Y}{Z} &= X(\prodM{Y}{Z}) + Y(\prodM{Z}{X}) - Z(\prodM{X}{Y}) + \prodM{[X,Y]}{Z} - \prodM{[Z,X]}{Y} - \prodM{[Y,Z]}{X}  . 
-\end{align}
-In this case, we have that the Christoffel symbols are given for any
-$i,j,k \in \{1, \dots, d\}$ by
-\begin{equation}
-  \textstyle{\Gamma_{i,j}^k = (1/2) \sum_{m=1}^d g^{km} (\partial_j g_{m,i} + \partial_i g_{m,j} - \partial_m g_{i,j}) ,}
-\end{equation}
-where $\{g^{i,j}\}_{1 \leq i,j \leq d} = G^{-1}$. Note that if $\M$ is Euclidean
-then for any $i,j,k \in \{1, \dots, d\}$, $\Gamma_{i,j}^k = 0$. We also extend
-the connection so that for any $X \in \XM$ and $f \in \rmc^\infty(M)$ we have
-$\nabla_X f = X(f)$. In particular, we have that
-$\nabla_X f \in \rmc^\infty(\M)$. In addition, we extend the connection such
-that for any $\alpha \in \Gamma(\M, \mathrm{T}^{0,1} \M)$, $X,Y \in \XM$ we have
-$\nabla_X \alpha (Y) = \alpha(\nabla_X Y) - X(\alpha(Y))$. In particular, we
-have that $\nabla_X \alpha \in \Gamma(\M, \mathrm{T}^{1,0} \M)$. Note that for any
-$X \in \XM$ and $\alpha, \beta \in \mathrm{T}^{\abs{1}} \M$ we have
-$\nabla_X (\alpha \otimes \beta) = \nabla_X \alpha \otimes \beta + \alpha
-\otimes \nabla_X \beta$. Similarly, we can define recursively $\nabla_X \alpha$
-for any $\alpha \in \Gamma(\M, \mathrm{T}^{k,\ell}\M)$ with $k, \ell \in \nset$. Such an
-extension is called a covariant derivative.
-
-\paragraph{Parallel transport, geodesics and exponential mapping} Given a
-connection, we can define the notion of parallel transport, which transports
-vector fields along a curve. Let $\gamma: \ \ccint{0,1} \to \M$ be a smooth
-curve. We define the covariant derivative along the curve $\gamma$ by
-$D_{\dot{\gamma}}: \ \Xgamma \to \Xgamma$ similarly to the connection, where
-$\Xgamma = \Gamma(\gamma(\ccint{0,1}), \TM)$. In particular if $\dot{\gamma}$
-and $X \in \Xgamma$ can be extended to $\XM$ then we define
-$D_{\dot{\gamma}}(X) = \nabla_{\dot{\gamma}}X \in \XM$. In what follows, we
-denote $D = \nabla$ for simplicity. We say that $X \in \Xgamma$ is parallel to
-$\gamma$ if for any $t \in \ccint{0,1}$, $\nabla_{\dot{\gamma}}X(t) = 0$. In
-local coordinates, let $X \in \Xgamma$ be given for any $t \in \ccint{0,1}$ by 
-$X = \sum_{i=1}^d a_i(t) E_i(t)$ (assuming that $\gamma([0,1])$ is entirely
-contained in a local chart), then we have that for any $t \in \ccint{0,1}$ and
-$k \in \{1, \dots, d\}$
-\begin{equation}
-  \label{eq:parallel_transport}
-  \textstyle{\dot{a}_k(t) + \sum_{i,j=1}^d \Gamma_{i,j}^k(x(t)) \dot{x}_i(t) a_j(t) = 0  .}
-\end{equation}
-A curve $\gamma$ on $\M$ is said to be a geodesics if $\dot{\gamma}$ is parallel
-to $\gamma$. Using \cref{eq:parallel_transport} we get that
-\begin{equation}
-  \label{eq:geodesics}
-  \textstyle{\ddot{x}_k(t) + \sum_{i,j=1}^d \Gamma_{i,j}^k(x(t)) \dot{x}_i(t) \dot{x}_j(t) = 0  .}
-\end{equation}
-For more details on geodesics and parallel transport, we refer to \citet[Chapter
-4]{lee2018introduction}. Parallel transport will be key to define the frame
-bundle and the orthonormal frame bundle in \cref{sec:frame-bundle-orth}. In
-addition, we have that parallel transport provides a linear isomorphism between
-tangent spaces. Indeed, let $v \in \mathrm{T}_x \M$ and
-$\gamma: \ \ccint{0,1} \to \M$ with $\gamma(0) = x$ a smooth curve. Then, there
-exists a unique vector field $X^v \in \Xgamma$ such that $X^v(x) = v$ and $X^v$ is
-parallel to $\gamma$. For any $t \in \ccint{0,1}$, we denote
-$\Gamma_0^t: \mathrm{T}_{x} \M \to \mathrm{T}_{\gamma(t)} \M$ the linear
-isomorphism such that $\Gamma_0^t(v) = X^v(\gamma(t))$.
-
-For any $x \in \M$ and $v \in \mathrm{T}_x \M$ we denote
-$\gamma^{x,v}: \ \ccint{0,\vareps^{x,v}}$ the geodesics (defined on the maximal
-interval $\ccint{0, \vareps^{x,v}}$) on $\M$ such that $\gamma(0) = x$ and
-$\dot \gamma(0) = v$. We denote
-$\msu^x = \ensembleLigne{v \in \mathrm{T}_x \M}{\vareps^{x,v} \geq 1}$. Note
-that $0 \in \msu^x$. For any $x \in \M$, we define the exponential mapping
-$\exp_x: \ \msu^x \to \M$ such that for any $v \in \msu^x$,
-$\exp_x(v) = \gamma^{x,v}(1)$. If for any $x \in \M$,
-$\msu^x = \mathrm{T}_x \M$, the manifold is called \emph{geodesically
-  complete}. Note that any connected compact manifold is geodesically
-complete. As a consequence we have that there exists a geodesic between any two
-points $x, y \in \M$ \cite[see][Lemma 6.18]{lee2018introduction}. For any
-$x, y \in \M$, we denote $\mathrm{Geo}_{x,y}$ the sets of geodesics $\gamma$
-such that $\gamma(0) = x$ and $\gamma(y) = 1$. For any $x, y \in \M$ we denote
-$\Gamma_x^y(\gamma) : \ \mathrm{T}_x \M \to \mathrm{T}_y \M$ the linear
-isomorphism such that for any $v \in \mathrm{T}_x \M$,
-$\Gamma_x^y(v) = X^v(\gamma(1))$, where $\gamma \in \mathrm{Geo}_{x,y}$. Note
-that for any $x \in \M$ there exists $\msv^x \subset \M$ such that
-$x \in \msv^x$ and for any $y \in \msv^x$ we have that
-$\absLigne{\mathrm{Geo}_{x,y}}=1$.  In this case, we denote
-$\Gamma_x^y = \Gamma_x^y(\gamma)$ with $\gamma \in \mathrm{Geo}_{x,y}$.
-
-\paragraph{Orthogonal projection} We will make repeated use of orthonormal
-projections on manifolds. Recall that since $\M$ is a closed Riemannian manifold
-we can use the Nash embedding theorem \citep{gunther1991isometric}. In the rest
-of this paragraph, we assume that $\M$ is a Riemannian submanifold of $\rset^p$
-for some $p \in \nset$ such that its metric is induced by the Euclidean
-metric. In order to define the projection we introduce
-\begin{equation}  
-  \mathrm{unpp}(\M) = \ensembleLigne{x \in \rset^d}{\text{there exists a unique $\xi_x$ such that $\normLigne{x - \xi_x} = d(x, \M)$}}  . 
-\end{equation}
-Let $\mathcal{E}(\M) = \interior(\mathrm{unpp}(\M))$. By \citet[Theorem
-1]{leobacher2021existence}, we have that $\M \subset \mathcal{E}(\M)$. We define
-$\tilde{p}: \ \mathcal{E}(\M) \to \M$ such that for any $x \in \mathcal{E}(\M)$,
-$\tilde{p}(x) = \xi_x$. Using \citet[Theorem 2]{leobacher2021existence}, we have
-that $\tilde{p} \in \rmc^\infty(\rset^p, \M)$ and that for any $x \in \M$,
-$\tilde{P}(x) = \rmd \tilde{p}(x)$ is the orthogonal projection on
-$\mathrm{T}_x\M$. Since $\rset^p$ is normal and $\M$ and
-$\mathcal{E}(\M)^\complementary$ are closed, there exists $\msf$ open such that
-$\M \subset \msf \subset \mathcal{E}(\M)$. Let
-$p \in \rmc^\infty(\rset^p, \rset^p)$ such that for any $x \in \msf$,
-$p(x) = \tilde{p}(x)$ (given by Whitney extension theorem for
-instance). Finally, we define $P: \ \rset^p \to \rset^p$ such that for any
-$x \in \rset^p$, $P(x) = \rmd p(x)$. Note that for any $x \in \M$, $P(x)$ is the
-orthogonal projection $\mathrm{T}_x \M$ and that
-$P \in \rmc^\infty(\rset^p, \rset^p)$.
-
-
-\subsection{Stochastic Differential Equations on manifolds}
-\label{sec:stoch-diff-equat}
-
-
-\paragraph{Stratanovitch integral} For reasons that will become clear in the
-next paragraph it is easier to define Stochastic Differential Equations (SDEs)
-on manifolds w.r.t the Stratanovitch integral \cite[Part II, Chapter
-3]{kloeden:platen:2011}. We consider a filtered probability space
-$(\Omega, (\mcf_t)_{t \geq 0}, \Pbb)$. Let $(\bfX_t)_{t \geq 0}$ and
-$(\bfY_t)_{t \geq 0}$ be two real continuous semimartingales. We define the
-quadratic covariation $([\bfX,\bfY]_t)_{t \geq 0}$ such that for any $t \geq 0$
-\begin{equation}
-  \textstyle{[\bfX,\bfY]_t = \bfX_t \bfY_t - \bfX_0\bfY_0 - \int_0^t \bfX_s \rmd \bfY_s - \int_0^t \bfY_s \rmd \bfX_s  . }
-\end{equation}
-We refer to \citet[Chapter IV]{revuz1999continuous} for more details on
-semimartingales and quadratic variations. We denote $[\bfX] = [\bfX, \bfX]$. In
-particular, we have that $([\bfX, \bfY]_t)_{t \geq 0}$ is an adapted continuous
-process with finite-variation and therefore $[[\bfX, \bfY]] = 0$. Let
-$(\bfX_t)_{t \geq 0}$ and $(\bfY_t)_{t \geq 0}$ be two real continuous
-semimartingales, then we define the Stratanovitch integral as follows for any
-$t \geq 0$
-\begin{equation}
-  \textstyle{ \int_0^t \bfX_s \circ \rmd \bfY_s = \int_0^t \bfX_s \rmd \bfY_s + (1/2) [\bfX, \bfY]_t  . }
-\end{equation}
-In particular, denoting $(\bfZ_t^1)_{t \geq 0}$ and $(\bfZ_t^2)_{t \geq 0}$ the
-processes such that for any $t \geq 0$,
-$\bfZ_t^1 = \int_0^t \bfX_s \circ \rmd \bfY_s$ and
-$\bfZ_t^2 = \int_0^t \bfX_s \rmd \bfY_s$, we have that $[\bfZ^1] = [\bfZ^2]$. We
-refer to \cite{kurtz1995stratonovich} for more details on Stratanovitch
-integrals. Note that if for any $t \geq 0$,
-$\bfX_t = \int_0^t f(\bfX_s) \circ \rmd \bfY_s$ with $\rmc^1(\rset, \rset)$,
-then $[\bfX, \bfY]_t = \int_0^t f(\bfX_s) f'(\bfX_s) \rmd \bfY_s$. Assuming that
-$f \in \rmc^3(\rset, \rset)$ we have that \cite[Chapter IV, Exercise
-3.15]{revuz1999continuous}
-\begin{equation}
-  \label{eq:stratanovitch_lemma}
-  \textstyle{ f(\bfX_t) = f(\bfX_0) + \int_0^t f'(\bfX_s) \circ \rmd \bfX_s  .}
-\end{equation}
-The proof relies on the fact that for any $t \geq 0$,
-$\rmd [\bfX, f'(\bfX)]_t = f''(\bfX_t) \rmd [\bfX]_t$.  This result should be
-compared with It\^o's lemma. In particular, Stratanovitch calculus satisfies the
-ordinary chain rule making it a useful tool in differential geometry which
-makes a heavy use of diffeomorphism.
-
-\paragraph{SDEs on manifolds}
-We define semimartingales and SDEs on manifold through the lens of their actions
-on functions. A continuous $\M$-valued stochastic process $(\bfX_t)_{t \geq 0}$
-is called a $\M$-valued semimartingale if for any $f \in \rmc^\infty(\M)$ we
-have that $(f(\bfX_t))_{t \geq 0}$ is a real valued semimartingale. Let
-$\ell \in \nset$, $V^{1:\ell} = \{ V_i\}_{i=1}^\ell \in \XM^\ell$ and
-$Z^{1:\ell} = \{Z^i\}_{i=1}^\ell$ a collection of $\ell$ real-valued
-semimartingales. A $\M$-valued semimartingale $(\bfX_t)_{t \geq 0}$ is said to
-be the solution of $\SDE(V^{1:\ell}, Z^{1:\ell}, \bfX_0)$ up to a stopping
-$\tau$ with $\bfX_0$ a $\M$-valued random variable if for all
-$f \in \rmc^\infty(\M)$ and $t \in \ccint{0, \tau}$ we have 
-\begin{equation}
-  \textstyle{f(\bfX_t) = f(\bfX_0) + \sum_{i=1}^\ell \int_0^t V_i(f)(\bfX_s) \circ \rmd \bfZ^i_s  . } 
-\end{equation}
-Since the previous SDE is defined w.r.t the Stratanovitch integral we have that
-if $(\bfX_t)_{t \geq 0}$ is a solution of $\SDE(V^{1:\ell}, Z^{1:\ell}, \bfX_0)$
-and $\Phibf: \M \to \mathcal{N}$ is a diffeomorphism then $(\Phibf(\bfX_t))_{t \geq 0}$
-is a solution of $\SDE(\Phibf_\star V^{1:\ell}, Z^{1:\ell}, \Phibf(\bfX_0))$,
-where $\Phibf_\star$ is the pushforward operation \cite[see][Proposition
-1.2.4]{hsu2002stochastic}. Because the vector fields $\{V_i\}_{i=1}^\ell$ are
-smooth we have that for any $\ell \in \nset$,
-$V^{1:\ell} = \{ V_i\}_{i=1}^\ell \in \XM^\ell$ and
-$Z^{1:\ell} = \{Z^i\}_{i=1}^\ell$ a collection of $\ell$ real-valued
-semimartingales, there exists a unique solution to
-$\SDE(V^{1:\ell}, Z^{1:\ell}, \bfX_0)$ \cite[see][Theorem
-1.2.9]{hsu2002stochastic}.
-
-
-\subsection{Frame bundle and orthonormal frame bundle}
-\label{sec:frame-bundle-orth}
-
-We now introduce the concepts of frame bundle and orthonormal bundle over the
-manifold $\M$. These concepts are useful to define stochastic processes on $\M$
-using Euclidean stochastic processes. In particular, we will see that a Brownian
-motion on the manifold can be linked to the Euclidean Brownian motion using the
-orthonormal bundle. For any $x \in \M$, a frame at $x$ is an isomorphism
-$f: \ \rset^d \to \mathrm{T}_x \M$. Note that $f$ is equivalent to the choice of
-a basis in $\mathrm{T}_x \M$. We denote $\mathrm{F}_x \M$ the set of frames at
-$p$. The frame bundle denoted $\FM$ is given by
-$\FM = \sqcup_{x \in \M} \mathrm{F}_x \M$. The frame bundle can be given a
-smooth structure and is therefore a $d + d^2$-dimensional manifold. Similarly,
-for any $x \in \M$, an orthonormal frame at $x$ is a linear isometry
-$f: \ \rset^d \to \mathrm{T}_x \M$. Note that $f$ is equivalent to the choice of
-an orthonormal basis in $\mathrm{T}_x \M$. We denote $\mathrm{O}_p \M$ the set
-of orthonormal frames at $p$. The orthonormal frame bundle denoted $\OM$ is
-given by $\OM = \sqcup_{x \in \M} \mathrm{O}_x \M$. The orthonormal frame bundle
-can be given a smooth structure and is therefore a $d + d(d-1)/2$-dimensional
-manifold. We denote $\pi: \ \FM \to \M$ the smooth projection such that for any
-$u = (x,f) \in \FM$, $\pi(u) = x$. Note that the restriction of $\pi$ to the
-orthonormal bundle is also smooth.  Frame bundles and orthonormal bundles are
-primary examples of principal bundles and we refer to \cite{kolar2013natural}
-for more details.
-
-One key element of frame bundles and orthonormal bundles is their link with the
-connections on $\M$. Let $u = (x, f) \in \FM$ and
-$U \in \mathrm{T}_u \mathrm{F}M$. $U$ is said to be vertical if there exists a
-smooth curve $u: \ \ccint{0,1} \to \FM$ such that for any $t \in \ccint{0,1}$,
-$\pi(u(t)) = x$ and $\dot u(0) = U$. We say that $U$ is tangent to the fibre
-$\mathrm{F}_{\pi(u)}\M$. The space of vertical tangent vectors is called the
-vertical space and is denoted $\mathrm{V}_u \FM$. We have that
-$\mathrm{dim}(\mathrm{V}_u \mathrm{F}\M) = d^2$. We now define the horizontal
-space as follows. Let $u: \ \ccint{0,1} \to \FM$ be a smooth curve. We say that
-$u = (f,x)$ is horizontal if for any $t \in \ccint{0,1}$ and
-$i \in \{1, \dots, d\}$, $\nabla_{\dot x} (f e_i)(t) = 0$, where
-$\{e_i\}_{i=1}^d$ is the canonical basis of $\rset^d$. In other words, the
-horizontal curve corresponds to the parallel transport of a frame along a smooth
-curve in $\M$. Let $u = (x, f) \in \FM$ and $U \in \mathrm{T}_u
-\mathrm{F}M$. $U$ is said to be horizontal if there exists a smooth horizontal
-curve $u: \ \ccint{0,1} \to \FM$ such that $\dot u(0) = U$. The space of
-horizontal tangent vectors is called the horizontal space and is denoted
-$\mathrm{H}_u \FM$. Let $v \in \rset^d$, we define the vector field
-$H_v \in \mathcal{X}(\FM)$ such that for any $u \in \FM$, $H_v(u) = \dot u(0)$
-with $\gamma=(x,f): \ \ccint{0,1} \to \FM$ a smooth curve on $\FM$ such that
-$\dot x(0) = e(0)v$ and $\gamma(0) = u$. The existence of $H_v$ for any
-$v \in \rset^d$ is discussed in \citet[p.69-70]{kobayashi1963foundations} and
-$H_v$ is called the horizontal lift of $v$. For any $i \in \{1, \dots, d\}$ we
-denote $H_i = H_{e_i}$ where $\{e_i\}_{i=1}^d$ is the canonical basis of
-$\rset^d$. In particular, since any horizontal curve is entirely specified by
-$\gamma(0) = (x(0), f(0))$ and $\dot{x}(0)$, we get that
-$\mathrm{dim}(\mathrm{H}_u \FM) = d$ for any $u \in \FM$.
-
-Consider a connection $\nabla$ on $\M$. Note that for any $u = (x,f) \in \FM$, we have
-$\mathrm{T}_u \FM = \mathrm{T}_u \M \oplus \mathrm{V}_u \FM$. In local
-coordinates $\{x_i\}_{i=1}^d$, we denote $\{X_i\}_{i=1}^d$ a basis of
-$\mathrm{T}_x \M$. For any $j \in \{1, \dots, j\}$, there exist
-$\{f_{i,j}\}_{i=1}^d$ such that $f e_j = \sum_{i=1}^d f_{i,j} X_i$ (note that
-$\{f_{i,j}\}_{1 \leq i,j \leq d}$ can be interpreted as the matrix transforming
-a vector of $\rset^d$ into a vector of $\mathrm{T}_x\M$ expressed in the basis
-$\{X_i\}_{i=1}^d$). In particular, we have that
-$\{x_k, f_{i,j}\}_{1 \leq i,j, k \leq d}$ are local coordinates for $\FM$. We
-denote by $\{X_k, X_{i,j}\}_{1 \leq i,j,k \leq d}$ the associated basis in
-$\mathrm{T}_u \FM$ for any $u \in \msu$, where $\msu$ is an open subset of $\FM$
-on which the local coordinates are well-defined. Leveraging properties of
-parallel transport, we have that for any $j \in \{1, \dots, d\}$ and $u \in \msu$
-\begin{equation}
-  \label{eq:horizontal_lift}
-  \textstyle{ H_j(u) = \sum_{i=1}^d f_{i,j} X_i - \sum_{\ell, m=1}^d \{ \sum_{i, k=1}^d f_{i,j} f_{k,m} \Gamma_{i,k}^\ell\} X_{\ell,m}  ,}
-\end{equation}
-where we recall that $\{\Gamma_{i,j}^k\}_{1 \leq i,j,k \leq d}$ are the
-Christoffel symbols of the connection in local coordinates.  In particular, it
-is clear that for any $u \in \FM$, $\{H_i(u)\}_{i=1}^d$ is a basis of
-$\mathrm{H}_u \FM$ and that $\mathrm{H}_u \FM \cap \mathrm{V}_u \FM = \{0\}$,
-hence $\mathrm{T}_u \FM = \mathrm{H}_u \FM \oplus \mathrm{V}_u \FM$. Using
-\cref{eq:horizontal_lift} we have that the horizontal space is entirely defined
-by the connection $\nabla$. Reciprocally, any smooth linear complement of the
-vertical space gives rise to a connection \cite[see][Section
-11.11]{kolar2013natural}.
-
-We now illustrate how we can go from a smooth curve on $\M$ (equipped with a
-connection $\nabla$) to a smooth curve on $\rset^d$. First, let
-$x: \ \ccint{0,1} \to \M$ be a smooth curve on manifold. Define
-$f(0) \in \mathrm{F}_{x(0)} \M$ and consider $u: \ \ccint{0,1} \to \FM$ the
-smooth horizontal curve associated with $x$ and starting frame $f(0)$. Now
-consider the antidevelopment of $u$ given by the smooth curve
-$z: \ \ccint{0,1} \to \rset^d$ such that for any $t \in \ccint{0,t}$
-\begin{equation}
-  \textstyle{ z(t) = \int_0^t f(s)^{-1} \dot x(s) \rmd s   . }
-\end{equation}
-We now show how a smooth curve on $\rset^d$ gives rise to a smooth curve in
-$\M$. First, note that for any $t \in \ccint{0,1}$, we have that
-$\dot u (t) = \sum_{i=1}^d H_i(u(t)) \dot z_i(t)$. Hence, specifying $u(0)$ any
-smooth curve $z$ on $\rset^d$ is associated to a smooth curve on $\FM$. We
-obtain a smooth curve on $\M$ by considering $x = \pi(u)$. In the next section,
-we present similar ideas when smooth curves are replaced by semimartingales.
-
-\subsection{Horizontal lift and stochastic development}
-\label{sec:horiz-lift-stoch}
-
-We are now ready to present the notion of horizontal semimartingale, which is
-key to draw the link between semimartingales on $\M$ and semimartingales on
-$\rset^d$. We follow the presentation of \citet[Section
-2.3]{hsu2002stochastic}. Again, we consider a filtered probability space
-$(\Omega, (\mcf_t)_{t \geq 0}, \Pbb)$. All the semimartingales we consider are
-defined w.r.t this filtered probability space. We assume that the manifold $\M$
-is equipped with a connection $\nabla$.
-
-\begin{definition}[Stochastic development]
-  Let $(\bfZ^{1:d}_t)_{t \geq 0} = \{(\bfZ_t^i)_{t \geq 0}\}_{i=1}^d$ be a
-  collection of real-valued semimartingales.  Let $(\bfU_t)_{t \geq 0}$ be the
-  $\FM$ semimartingale solution of $\SDE(H^{1:d}, \bfZ^{1:d}, \bfU_0)$ with
-  $H^{1:d} = \{H_i\}_{i=1}^d$. $(\bfU_t)_{t \geq 0}$ is called the \emph{stochastic
-    development} of $\bfZ^{1:d}$ on $\FM$. Similarly, the $\M$-valued
-  semimartingale $(\bfX_t)_{t \geq 0} = (\pi(\bfU_t))_{t \geq 0}$ is called the
-  \emph{stochastic development} of $\bfZ^{1:d}$ on $\M$.
-\end{definition}
-
-The previous definition allows to transfer a semimartingale on $\rset^d$ to a
-semimartingale on $\M$ in an \emph{intrinsic} manner. Reciprocally, we also aim
-at transferring a semimartingale on $\M$ to a semimartingale on $\rset^d$.
-
-\begin{definition}[Horizontal lift and antivelopment]
-  Let $(\bfX_t)_{t \geq 0}$ be a $\M$-valued semimartingale. If there exist a
-  $\FM$-valued semimartingale $(\bfU_t)_{t \geq 0}$ and
-  $(\bfZ^{1:d}_t)_{t \geq 0} = \{(\bfZ_t^i)_{t \geq 0}\}_{i=1}^d$ a collection
-  of real-valued semimartingales such that
-  $(\bfX_t)_{t \geq 0} = (\pi(\bfU_t))_{t \geq 0}$ and $(\bfU_t)_{t \geq 0}$ is
-  solution of $\SDE(H^{1:d}, \bfZ^{1:d}, \bfU_0)$ with
-  $H^{1:d} = \{H_i\}_{i=1}^d$ then $(\bfU_t)_{t \geq 0}$ is called the
-  \emph{horizontal lift} of $(\bfX_t)_{t \geq 0}$ and
-  $(\bfZ^{1:d}_t)_{t \geq 0}$ the \emph{antidevelopment} of
-  $(\bfX_t)_{t \geq 0}$.
-\end{definition}
-
-The existence of an horizontal lift and an antidevelopment is not
-trivial. Considering the Nash embedding theorem 
-\citep[see for example][]{gunther1991isometric}, it is possible to show the existence
-and uniqueness of these processes (up to initialization). Without loss of
-generality, we can then assume that $\M \subset \rset^p$ and for any $x \in \M$,
-$\mathrm{T}_x \M \subset \rset^p$ with $p \geq d(d+1)/2$ (and
-$p \leq \max(d(d+5)/2, d(d+3)/2+5)$). For any $x \in \M$, we denote
-$P(x): \ \rset^p \to \mathrm{T}_x \M$ the projection operator. In addition for
-any $x \in \M$, we denote $\{P_i(x)\}_{i=1}^p = \{P(x) e_i\}_{i=1}^p$, where
-$\{e_i\}_{i=1}^p$ is the canonical basis of $\rset^p$. Note that
-$\{P_i\}_{i=1}^p \in \XM^p$. In addition for any $x \in \M$ we denote
-$\{x^i\}_{i=1}^p$ its coordinates in $\rset^p$, \ie \ for any
-$i \in \{1, \dots, p\}$, $x^i = \langle x, e_i \rangle$. In particular, if
-$(\bfX_t)_{t \geq 0}$ is a $\M$-valued process then for any
-$i \in \{1, \dots, p\}$,
-$(\bfX_t^i)_{t \geq 0} = (\langle \bfX_t, e_i \rangle)_{t \geq 0}$ is a
-real-valued process. If $(\bfX_t)_{t \geq 0}$ is a $\M$-valued semimartingale
-then it is the solution of $\SDE(\{P_i\}_{i=1}^p, \{\bfX^i\}_{i=1}^p, \bfX_0)$
-\cite[see][Lemma 2.3.3]{hsu2002stochastic}. Then, a candidate for the horizontal
-lift of $(\bfX_t)_{t \geq 0}$ is given by
-$(\bfU_t)_{t \geq 0}=(\bfX_t, \bfE_t)_{t \geq 0}$ solution of
-$\SDE(\{P_i^\star\}_{i=1}^p, \{\bfX^i\}_{i=1}^p, \bfU_0)$, where for any
-$i \in \{1,\dots,p\}$, $P_i^\star(u) = H_{f^{-1}P_i(\pi(u))}(u)$ and
-$\bfX_0 = \pi(\bfU_0)$. We have that $(\bfU_t)_{t \geq 0}$ is the stochastic
-development of $\{(\bfZ_t^i)_{t \geq 0}\}_{i=1}^d$ where for any $t \geq 0$,
-$\bfZ_t = \sum_{i=1}^p \int_0^t \bfE_s^{-1} P_i(\bfX_s) \circ \rmd \bfX_s^i$
- \cite[see][Theorem 2.3.4]{hsu2002stochastic}. Finally, we have that given
-$\bfU_0$, $(\bfU_t)_{t \geq 0}$ is the unique horizontal lift of
-$(\bfX_t)_{t \geq 0}$ and $(\bfZ_t)_{t \geq 0}$ is the unique antidevelopment of
-$(\bfX_t)_{t \geq 0}$ \cite[see][Theorem 2.3.5]{hsu2002stochastic}.
-
-\subsection{Brownian motion on manifolds}
-\label{sec:brown-moti-manif}
-
-In this section, we introduce the notion of Brownian motion on manifolds. We
-derive some of its basic convergence properties and provide alternative
-definitions (stochastic development, isometric embedding, random walk
-limit). These alternative definitions are the basis for our alternative
-methodologies to sample from the time-reversal. To simplify our discussion, we
-assume that $\M$ is a connected compact Riemannian manifold equipped with the
-Levi-Civita connection $\nabla$. We denote $p_{\textup{ref}}b$ the Haussdorff measure of
-the manifold (which coincides with the measure associated with the Riemannian
-volume form \citep[see][Theorem 2.10.10]{federer2014geometric} and
-$p_{\textup{ref}} = p_{\textup{ref}}b / p_{\textup{ref}}(\M)$ the associated probability measure.
-
-\paragraph{Gradient, divergence and Laplace operators}
-Let $f \in \rmc^{\infty}(\M)$. We define $\nabla f \in \XM$ such that for any
-$X \in \XM$ we have $\langle X, \nabla f \rangle_{\M} = X(f)$. Let
-$\{X_i\}_{i=1}^d \in \XM^d$ such that for any $x \in \M$, $\{X_i(x)\}_{i=1}^d$
-is an orthonormal basis of $\mathrm{T}_x \M$. Then, we define
-$\dive: \ \XM \to \rmc^\infty(\M)$ (linear) 
-such that for any $X \in \XM$,
-$\dive(X) = \sum_{i=1}^d \prodM{\nabla_{X_i}X}{X_i}$. The following Stokes
-formula (also called divergence theorem, see \citet[p.51]{lee2018introduction})
-holds for any $f \in \rmc^\infty(\M)$ and $X \in \XM$,
-$\int_{M} \dive(X)(x) f(x) \rmd p_{\textup{ref}}(x) = - \int_M X(f)(x) \rmd
-p_{\textup{ref}}(x)$. Let $X = \sum_{i=1}^d a_i X_i$ in local coordinates.  Using the
-Stokes formula and the definition of the gradient we get that in local
-coordinates
-\begin{equation}
-\textstyle{  \nabla f = \sum_{i,j=1}^d g^{i,j} \partial_i f X_j  ,  \qquad \dive(X) = \det(G)^{-1/2} \sum_{i=1}^d \partial_i(\det(G)^{1/2} a_i)  . }
-\end{equation}
-The Laplace-Beltrami operator is given by 
-$\Delta_{\M} : \ \rmc^\infty(M) \to \rmc^\infty(M)$ and for any
-$f \in \rmc^\infty(M)$ by $\Delta_{\M}(f) = \dive(\grad(f))$. In local
-coordinates we obtain 
-$\Delta_{\M}(f) = \det(G)^{-1/2} \sum_{i=1}^d \partial_i (\det(G)^{1/2}
-\sum_{j=1}^d g^{i,j} \partial_j f)$. Using the Nash isometric embedding theorem
-\citep{gunther1991isometric} we will see that $\Delta_{\M}$ can always be
-written as a sum of squared operators. However, this result requires an
-\emph{extrinsic} point of view as it relies on the existence of projection
-operators. In contrast, if we consider the orthonormal bundle $\OM$ we can
-define the Laplace-Bochner operator
-$\Delta_{\OM}: \ \rmc^\infty(\OM) \to \rmc^\infty(\OM)$ as
-$\Delta_{\OM} = \sum_{i=1}^d H_i^2$, where we recall that for any
-$i \in \{1, \dots, d\}$, $H_i$ is the horizontal lift of $e_i$. In this case,
-$\Delta_{\OM}$ is a sum of squared operators and we have that for any
-$f \in \rmc^\infty(\M)$, $\Delta_{\OM}(f \circ \pi) = \Delta_{\M}(f)$
-\cite[see][Proposition 3.1.2]{hsu2002stochastic}. Being able to express the various
-Laplace operators as a sum of squared operators is key to express the associated
-diffusion process as the solution of an SDE.
-
-\paragraph{Alternatives definitions of Brownian motion}
-
-We are now ready to define a Brownian motion on the manifold $\M$. Using the
-Laplace-Beltrami operator, we can introduce the Brownian motion through the lens
-of diffusion processes.
-
-\begin{definition}[Brownian motion]
-  Let $(\bfB_t^\M)_{t \geq 0}$ be a $\M$-valued semimartingale.
-  $(\bfB_t^\M)_{t \geq 0}$ is a Brownian motion on $\M$ if for any
-  $f \in \rmc^\infty(\M)$, $(\bfM_t^f)_{t \geq 0}$ is a local martingale where
-  for any $t \geq 0$
-  \begin{equation}
-    \textstyle{\bfM_t^f = f(\bfB_t^\M) - f(\bfB_0^\M) - (1/2)\int_0^t \Delta_{\M}f(\bfB_s^\M) \rmd s  .}
-  \end{equation}
-\end{definition}
-
-Note that this definition is in accordance with the definition of the Brownian
-motion as a diffusion process in the Euclidean space $\rset^d$, since in this
-case $\Delta_{\M} = \Delta$. As emphasized in the previous section any
-semimartingale on $\M$ can be associated to a process on $\FM$ (or $\OM$) and a
-process on $\rset^d$. The proof of the following result can be found in
-\citet[Propositions 3.2.1 and 3.2.2]{hsu2002stochastic}.
-
-\begin{proposition}[Intrinsic view of Brownian motion]
-  \label{prop:intrinsic_brownian}
-  Let $(\bfB_t^\M)_{t \geq 0}$ be a $\M$-valued semimartingales. Then
-  $(\bfB_t^\M)_{t \geq 0}$ is a Brownian motion on $\M$ if and only on the
-  following conditions hold:
-  \begin{enumerate}[label=\alph*)]
-  \item The horizontal lift $(\bfU_t)_{t \geq 0}$ is a $\Delta_{\OM}/2$
-    diffusion process, \ie \ for any $f \in \rmc^\infty(\OM)$, we have that
-    $(\bfM_t^f)_{t \geq 0}$ is a local martingale where for any $t \geq 0$
-  \begin{equation}
-    \textstyle{\bfM_t^f = f(\bfU_t) - f(\bfU_0) - (1/2)\int_0^t \Delta_{\OM}f(\bfU_s) \rmd s  .}
-  \end{equation}    
-\item The stochastic antidevelopment of $(\bfB_t^\M)_{t \geq 0}$ is a
-  $\rset^d$-valued Brownian motion $(\bfB_t)_{t \geq 0}$.
-  \end{enumerate}
-\end{proposition}
-
-In particular the previous proposition provides us with an \emph{intrisic} way
-to sample the Brownian motion on $\M$ with initial condition $\bfB_0^\M$. First
-sample $(\bfU_t)_{t \geq 0}$ solution of $\SDE(H^{1:d}, \bfB^{1:d}, \bfU_0)$
-with $H^{1:d} = \{H_i\}_{i=1}^d$ and $\pi(\bfU_0) = \bfB_0^\M$ and $\bfB^{1:d}$ the
-Euclidean $d$-dimensional Brownian motion. Then, we recover the $\M$-valued
-Brownian motion $(\bfB_t^\M)_{t \geq 0}$ upon letting
-$(\bfB_t^\M)_{t \geq 0} = (\pi(\bfU_t))_{t \geq 0}$.
-% habermann
-
-We now consider an \emph{extrinsic} approach to the sampling of Brownian motions
-on $\M$. Using the Nash embedding theorem \citep{gunther1991isometric}, there
-exists $p \in \nset$ such that without loss of generality we can assume that
-$\M \subset \rset^p$. For any $x \in \M$, we denote
-$P(x): \ \rset^p \to \mathrm{T}_x \M$ the projection operator. In addition for
-any $x \in \M$, we denote $\{P_i(x)\}_{i=1}^p = \{P(x) e_i\}_{i=1}^p$, where
-$\{e_i\}_{i=1}^p$ is the canonical basis of $\rset^p$. For any
-$i \in \{1, \dots, p\}$, we smoothly extend $P_i$ to $\rset^p$. In this case, we
-have the following proposition \cite[Theorem 3.1.4]{hsu2002stochastic}:
-
-\begin{proposition}[Extrinsic view of Brownian motion]
-  \label{prop:extrinsic_brownian}
-  For any $f \in \rmc^{\infty}(\M)$ we have that
-  $\Delta_M(f) = \sum_{i=1}^p P_i(P_i(f))$. Hence, we have that
-  $(\bfB_t^\M)_{t \geq 0}$ solution of
-  $\SDE(\{P_i\}_{i=1}^{p}, \bfB^{1:p}, \bfB_0^\M)$ with $\bfB_0^\M$ a $\M$-valued
-  random variable and $\bfB^{1:p}$ a $\rset^p$-valued Brownian motion.
-\end{proposition}
-
-The second part of this proposition, stems from the fact that any solution of
-$\SDE(\{V_i\}_{i=1}^{\ell}, \bfB^{1:\ell}, \bfX_0)$, where $\bfX_0$ is a
-$\M$-valued random variable and $\bfB^{1:\ell}$ a $\rset^\ell$-valued Brownian
-motion is a diffusion process with generator $\generator$ such that for any
-$f \in \rmc^\infty(\M)$, $\generator(f) = \sum_{i=1}^\ell V_i(V_i(f))$. The
-\emph{extrinsic} approach is particularly convenient since the SDE appearing in 
-\cref{prop:extrinsic_brownian} can be seen as an SDE on the Euclidean space
-$\rset^p$. 
-
-We finish this paragraph, by investigating the behavior of the Brownian motion
-in local coordinates. For simplicity, we assume here that we have access to a
-system of global coordinates. In the case where the coordinates are strictly
-local then we refer to \citet[Chapter 5, Theorem 1]{ikeda1989sto} for a
-construction of a global solution by patching local solutions. We denote
-$\{X_k, X_{i,j}\}_{1 \leq i,j,k \leq d}$ such that for any $u \in \FM$,
-$\{X_k(u), X_{i,j}(u)\}_{1 \leq i,j,k \leq d}$ is a basis of $\mathrm{T}_u \FM$,
-similarly as in the previous section. Using \cref{eq:horizontal_lift} we get
-that $(\bfU_t)_{t \geq 0} = (\{\bfX^k_t, \bfE_t^{i,j}\}_{1 \leq i,j,k \leq d})$
-obtained in \cref{prop:intrinsic_brownian} is given in the global coordinates for
-any $i,j,k \in \{1, \dots, d\}$ by
-\begin{equation}
-  \textstyle{
-    \rmd \bfX_t^k = \sum_{j=1}^d \bfE_t^{k,j} \circ \rmd \bfB_t^k  , \qquad \rmd \bfE_t^{i,j} = - \sum_{n=1}^d \{\sum_{\ell, m=1}^d \bfE_t^{\ell,n}\bfE_t^{m,j} \Gamma_{\ell,m}^{i}(\bfX_t)\} \circ \rmd \bfB_t^n  . 
-    }
-  \end{equation}
-  By definition of the Stratanovitch integral we have that for any $k \in \{1, \dots, d\}$
-  \begin{equation}
-    \textstyle{
-      \rmd \bfX_t^k = \sum_{j=1}^d \{ \bfE_t^{k,j} \rmd \bfB_t^k +(1/2) \rmd [\bfE_t^{k,j}, \bfB_t^j]_t \}  .
-      }
-    \end{equation}
-    Let $(\bfM_t)_{t \geq 0} = (\{\bfM_t^k\}_{k=1}^d)_{t \geq 0}$ such that for
-    any $t \geq 0$ and $k \in \{1, \dots, d\}$
-    $\bfM_t^k = \sum_{j=1}^d \int_0^t \bfE_t^{k,j} \rmd \bfB_t^k$. We obtain
-    that $\rmd \bfM_t = G(\bfX_t)^{-1/2} \rmd \bfB_t$ for some $d$-dimensional
-    Brownian motion $(\bfB_t)_{t \geq 0}$, using L\'evy's characterization of
-    Brownian motion. In addition, we have that for any
-    $k, j \in \{1, \dots, d\}$
-    \begin{equation}
-      \textstyle{[\bfE^{k,j}, \bfB^j]_t = -\sum_{\ell, m=1}^d \int_0^t \bfE_t^{\ell, j} \bfE_t^{m,j} \Gamma_{\ell, m}^k(\bfX_t) \rmd t }
-    \end{equation}
-    Hence, using this result and the fact that
-    $\sum_{j=1}^d \bfE_t^{\ell, j} \bfE_t^{m,j} = g^{\ell,m}(\bfX_t)$, we get
-    that for any $k \in \{1, \dots, d\}$
-    \begin{equation}
-      \textstyle{\rmd \bfX_t^k =-  (1/2) \sum_{\ell, m=1}^d g^{\ell,m}(\bfX_t) \Gamma_{\ell, m}^k(\bfX_t) \rmd t + (G(\bfX_t)^{-1/2} \rmd \bfB_t)^k  . }
-    \end{equation}
-    Note that this result could also have been obtained using the expression of
-    the Laplace-Beltrami in local coordinates.
-
-
-    \paragraph{Brownian motion and random walks}
-
-    In the previous paragraph we consider three SDEs to obtain a Brownian motion
-    on $\M$ (stochastic development, isometric embedding and local
-    coordinates). In this section, we summarize results from
-    \cite{jorgensen1975central} establishing the limiting behavior of Geodesic
-    Random Walks (GRWs) when the stepsize of the random walk goes to $0$. This will be
-    of particular interest when considering the time-reversal process. We start
-    by defining the geodesic random walk on $\M$, following \citet[Section
-    2]{jorgensen1975central}.
-
-    Let $\{ \nu_x \}_{x \in \M}$ such that for any $x \in \M$,
-    $\nu_x: \mcb{\mathrm{T}_x \M} \to \ccint{0,1}$ with
-    $\nu_x(\mathrm{T}_x \M) =1$, \ie \ for any $x \in \M$, $\nu_x$ is a
-    probability measure on $\mathrm{T}_x \M$. Assume that for any $x \in \M$,
-    $\int_{\M} \normLigne{v}^3 \rmd \nu_x(v)< +\infty$. In addition assume that
-    there exists $\mu^{(1)} \in \XM$ and $\mu^{(2)} \in \XMdeux$, where
-    $\XMdeux$ is the section
-    $\Gamma(\M, \sqcup_{x \in \M} \mathcal{L}(\mathrm{T}_x \M))$, such that for
-    any $x \in \M$, $\int_{\M} v \rmd \nu_x(v) = \mu^{(1)}(x)$ and
-    $\int_{\M} v \otimes v \rmd \nu_x(v) = \mu^{(2)}(x)$. In addition, we assume
-    that for any $x \in \M$,
-    $\Sigma(x) = \mu^{(2)}(x) - \mu^{(1)}(x) \otimes \mu^{(1)}(x)$ is strictly
-    positive definite and that there exists $\Ltt \geq$ such that for any
-    $x, y \in \M$, $\tvnorm{\nu_x - \nu_y} \leq \Ltt d(x,y)$. Where we have that
-    for any $\nu_1 \in \Pens(\mathrm{T}_x \M)$ and $\nu_2 \in \Pens(\mathrm{T}_y \M)$,
-    \begin{equation}
-      \tvnorm{\nu_x - \nu_y} = \sup \ensembleLigne{\nu_1[f] - \Gamma_{x}^y(\gamma)_\# \nu_2[f]}{\gamma \in \mathrm{Geo}_{x,y}, \ f \in \rmc(\mathrm{T}_x \M)}  . 
-    \end{equation}
-    Note that if $d(x,y) \leq \vareps$ then for some $\vareps > 0$ we have that $\abs{\mathrm{Geo}_{x,y}}=1$.
-
-
-    \begin{definition}[Geodesic random walk]
-      Let $X_0$ be a $\M$-valued random variable.  For any $\gamma > 0$, we
-      define $(\bfX_t^{\gamma})_{t \geq 0}$ such that $\bfX_0^\gamma = X_0$ and
-      for any $n \in \nset$ and $t \in \ccint{0, \gamma}$,
-      $\bfX_{n\gamma + t} = \exp_{\bfX_{n \gamma}}[t\gamma \{ \mu_n +
-      (1/\sqrt{\gamma}) (V_n - \mu_n)\}]$, where $(V_n)_{n \in \nset}$ is a sequence
-      of random variables in such that for any $n \in \nset$, $V_n$
-      has distribution $\nu_{\bfX_{n \gamma}}$ conditionally to $\bfX_{n \gamma}$.
-    \end{definition}
-
-    For any $\gamma > 0$, the process
-    $(X_n^\gamma)_{n \in \nset} = (\bfX_{n \gamma}^\gamma)_{n \in \nset}$ is
-    called a geodesic random walk. In particular, for any $\gamma>0$ we denote
-    $(\Rker_n^{\gamma})_{n \in \nset}$ the sequence of Markov kernels such that
-    for any $n \in \nset$, $x \in \M$ and $\msa \in \mcb{\M}$ we have that
-    $\updelta_x \Rker(\msa) = \Pbb(X_n^\gamma \in \msa)$, with $X_0^\gamma =
-    x$. The following theorem establishes that the limiting dynamics of a
-    geodesic random walk is associated with a diffusion process on $\M$ whose
-    coefficients only depends on the properties of $\nu$ \cite[see][Theorem
-    2.1]{jorgensen1975central}.
-
-    \begin{theorem}[Convergence of geodesic random walks]
-      \label{thm:jorgensen_appendix}
-      For any $t \geq 0$, $f \in \rmc(\M)$ and $x \in \M$ we have that
-      $\lim_{\gamma \to 0} \normLigne{ \Rker_{\gamma}^{\ceil{t/\gamma}}[f] -
-        \Pker_t[f]}_{\infty} = 0$, where $(\Pker_t)_{t \geq 0}$ is the
-      semi-group associated with the infinitesimal generator
-      $\generator: \ \rmc^\infty(\M) \to \rmc^\infty(\M)$ given for any
-      $f \in \rmc^\infty(\M)$ by
-      $\generator(f) = \langle p_{\textup{ref}}^1, \nabla f \rangle_{\M} + (1/2) \langle
-      \Sigma, \nabla^2f \rangle_{\M}$.
-    \end{theorem}   
-
-    In particular if $\mu^{(1)} = 0$ and $\mu^{(2)} = \Id$ then the random walk
-    converges towards a Brownian motion on $\M$ in the sense of the convergence
-    of semi-groups. For any $x \in \M$ in local coordinates we have that
-    $\Phi_\# \nu_x$ has zero mean and covariance matrix $G(x)$, where $\Phi$ is
-    a local chart around $x$ and $G(x) = (g_{i,j}(x))_{1 \leq i,j \leq d}$ the
-    coordinates of the metric in that chart.
-
-    
-\paragraph{Convergence of Brownian motion}
-
-We finish this section with a few considerations regarding the convergence of
-the Brownian motion on $\M$. Since we have assumed that $\M$ is compact we have
-that there exist $(\Phi_k)_{k \in \nset}$ an orthonormal basis of $\Delta_\M$ in
-$\mathrm{L}^2(p_{\textup{ref}})$, $(\lambda_k)_{k \in \nset}$ such that for any
-$i, j \in \nset$, $i \leq j$, $\lambda_i \leq \lambda_j$ and $\lambda_0 = 0$, $\Phi_0=1$ and
-for any $k \in \nset$, $\Delta_\M \Phi_k = -\lambda_k \Phi_k$. For any $t \geq 0$
-and $x,y \in \M$,
-$p_t(x,y) = \sum_{k \in \nset} \exp[-\lambda_k t] \Phi_k(x) \Phi_k(y)$ where for
-any $f \in \rmc^\infty$ we have
-\begin{equation}
-  \textstyle{\expeLigne{f(\bfB_t^{\M,x})} = \int_\M p_t(x,y) f(y) \rmd p_{\textup{ref}}(y)  , }
-\end{equation}
-where $(\bfB_t^{\M,x})_{t \geq 0}$ is the Brownian motion on $\M$ with $\bfB_0^{\M,x} = x$
-and $p_{\textup{ref}}$ is the probability measure associated with the Haussdorff measure on
-$\M$. we also have the following result \cite[see][Proposition
-2.6]{urakawa2006convergence}.
-
-\begin{proposition}[Concergence of Brownian motion]
-\label{prop:brownian_conv_repeat}
-  For any $t > 0$, $\Pker_t$ admits a density $p_t$ w.r.t $p_{\textup{ref}}$ and
-  $p_{\textup{ref}} \Pker_t = p_{\textup{ref}}$, \ie \ $p_{\textup{ref}}$ is an invariant measure for
-  $(\Pker_t)_{t \geq 0}$. In addition, if there exists $C, \alpha \geq 0$ such
-  that for any $t \in \ocint{0,1}$, $p_t(x,x) \leq C t^{-\alpha /2}$ then 
-  for any $\pizero \in \Pens(\M)$ and for any $t \geq 1/2$ we have 
-  \begin{equation}
-    \textstyle{\tvnorm{\pizero \Pker_t - p_{\textup{ref}}} \leq C^{1/2} \rme^{\lambda_1 /2} \rme^{-\lambda_1 t}  ,}
-  \end{equation}
-  where $\lambda_1$ is the first non-negative eigenvalue of $-\Delta_\M$ in
-  $\mathrm{L}^2(p_{\textup{ref}})$ and we recall that $(\Pker_t)_{t \geq 0}$ is the
-  semi-group of the Brownian motion.
-\end{proposition}
-A review on lower bounds on the first positive eigenvalue
-of the Laplace-Beltrami operator can be found in \citep{he2013lower}. These lower
-bounds usually depend on the Ricci curvature of the manifold or its diameter. We
-conclude this section by noting that in the non-compact case \citep{li1986large}
-establishes similar estimates in the case of a manifold with non-negative Ricci
-curvature and maximal volume growth.
-
-
-%%% Local Variables:
-%%% mode: latex
-%%% TeX-master: "main"
-%%% End:
diff --git a/doc/prel_main.tex b/doc/prel_main.tex
deleted file mode 100644
index 5c587ba..0000000
--- a/doc/prel_main.tex
+++ /dev/null
@@ -1,218 +0,0 @@
-\section{Euclidean Score-based Generative Modeling}
-\label{sec:eucl-sgm-riem}
-
-We recall here briefly the key concepts behind SGMs on the Euclidean space $\rset^d$ for some $d \in \nset$. We refer to \cite{song2020score,song2019generative,debortoli2021neurips} for a more detailed introduction to SGMs. In what follows, let $p_0$ denote the
-data distribution. We have practically only access to an empirical approximation of this distribution given by the available data.
-
-
-We consider a forward
-\emph{noising} process $(\bfX_t)_{t \geq 0}$ defined by the following Stochastic
-Differential Equation (SDE)
-\begin{equation}\label{eq:forward_SDE}
-  \rmd \bfX_t = -\bfX_t \rmd t + \sqrt{2} \rmd \bfB_t,\quad \bfX_0 \sim p_0 
-\end{equation}
-where $(\bfB_t)_{t \geq 0}$ is a $d$-dimensional Brownian motion. As a result
-$(\bfX_t)_{t \geq 0}$ is an Ornstein--Ulhenbeck process targeting a multivariate standard Gaussian
-distribution. Let $T \geq0$, under
-mild conditions on the data distribution $p_0$, the time-reversed process
-$(\bfhX_t)_{t \geq 0} = (\bfX_{T-t})_{t \in \ccint{0,T}}$ also satisfies an SDE
-\citep{cattiaux2021time,haussmann1986time} given by
-\begin{equation} \label{eq:backward_SDE}
-  \rmd \bfhX_t = \{ \bfhX_t + 2 \nabla \log p_{T-t}(\bfhX_t)\} \rmd t + \sqrt{2} \rmd \bfB_t,\quad \bfhX_0 \sim p_T 
-\end{equation}
-where $p_t$ denotes the density of $\bfX_t$. By construction, the law of $\bfhX_{T-t}$ is equal to the law of $\bfX_t$ for $t \in \ccint{0,T}$ and in particular $\bfhX_{T}\sim p_0$. Hence, if one could sample from
-$(\bfhX_t)_{t \in \ccint{0,T}}$ then its final distribution would be the target
-data distribution $p_0$.  
-
-Unfortunately there are three sources of intractability that prevents us from sampling the process $(\bfhX_t)_{t \in \ccint{0,T}}$. 
-
-\textbf{Problem 1:} Its initial distribution is given by $p_T$ which is intractable.
-
-\textbf{Solution:} The Ornstein--Ulhenbeck process \eqref{eq:forward_SDE} converges exponentially fast towards a standard multivariate Gaussian so one can approximate $p_T$ by this Gaussian for $T$ large enough. 
-
-\textbf{Problem 2:} The scores are intractable so the dynamics \eqref{eq:backward_SDE} cannot be implemented. 
-
-\textbf{Solution:} To approximate the scores, we exploit the following identity 
-\begin{equation}\label{eq:scoreidentity}
-  \textstyle{\nabla \log p_t(x) = \int_{\rset^d} \nabla \log p_{t|0}(x|x_0) p_{0|t}(x_0|x) \rmd x_0,}
-\end{equation}
-where $p_{t|0}(x'|x)$ is the transition density of the Ornstein--Ulhenbeck process which is available in closed-form. It follows directly that $\nabla \log p_t$ is the minimizer of the loss function
-$\ell_t(s) = \expeLigne{\normLigne{s(\bfX_t) - \nabla \log
-    p_{t|0}(\bfX_t|\bfX_0)}^2}$ over function $s$ where the expectation is over the joint distribution of $\bfX_0,\bfX_t$. This result can be exploited as follows. We consider a neural network approximation $\bm{s}_\theta: \ccint{0,T} \times \rset^d \to \rset^d$ which we train by minimizing the loss function $\ell(\theta)=\int_0^T \lambda_t \ell_{t}(\bm{s}_\theta(t,\cdot))\rmd t$ for some weighting function $\lambda_t>0$ . 
-
-
-\textbf{Problem 3:} The loss function $\ell(\theta)$ and the SDE approximating \eqref{eq:backward_SDE} by replacing the scores $(\nabla \log p_t)_{t \in \ccint{0,T}}$ by $(s_\theta(t,\cdot))_{t \in \ccint{0,T}}$ and $p_T$ by the standard multivariate normal cannot not be simulated exactly on a computer.
-
-\textbf{Solution:} For a discretization step $\gamma$ such that $T=\gamma N$ for integer $N$, the loss function is approximated by $\sum_{n=0}^N \lambda_{n\gamma} \ell_{n \gamma}(\bm{s}_\theta(n \gamma,\cdot))$ and we perform a Euler--Maruyama discretization of the resulting SDEe; i.e. we define $(Y_n)_{n \in \{0, \dots, N\}}$ such that for $Z_n\overset{\textup{i.i.d.}}{\sim} \mathcal{N}(0,I_d)$
-% \begin{equation}
-%   \label{eq:backward_discreteexactscores}
-%   Y_{n+1} = Y_n + \gamma \{Y_n + 2 \nabla \log p_{T -n \gamma}(Y_n) \} + \sqrt{2} Z_{n+1},\quad Y_0\sim \mathcal{N}(0,I_d)
-% \end{equation}
-% for any $n \in \{0, \dots, N-1\}$ where $\mathcal{N}(0,I_d)$ denotes the multivariate standard normal on $\mathbb{R}^d$,  $\gamma > 0$ such that $T = N \gamma$, $(Z_n)\overset{\textup{i.i.d.}}{\sim} \mathcal{N}(0,I_d)$.
-    
-    
-    
-%     Finally, at sampling times we consider the following dynamics
-\begin{equation}\label{eq:backward_discrete_final}
-  Y_{n+1} = Y_n + \gamma \{Y_n + 2  \bm{s}_\theta(T -n \gamma, Y_n) \} + \sqrt{2} Z_{n+1},\quad Y_0\sim \mathcal{N}(0,I_d).
-\end{equation}
-
-
-
-
-% \textbf{Problem 2:} The continuous-time process \eqref{eq:backward_SDE} cannot not be simulated exactly on a computer even if the scores $(\nabla \log p_t)_{t \in \ccint{0,T}}$ were known. 
-
-% \textbf{Solution:} We perform a time-discretization of the resulting SDE using an Euler--Maruyama scheme; i.e. we define $(Y_n)_{n \in \{0, \dots, N\}}$ such that
-% \begin{equation}
-%   \label{eq:backward_discreteexactscores}
-%   Y_{n+1} = Y_n + \gamma \{Y_n + 2 \nabla \log p_{T -n \gamma}(Y_n) \} + \sqrt{2} Z_{n+1},\quad Y_0\sim \mathcal{N}(0,I_d)
-% \end{equation}
-% for any $n \in \{0, \dots, N-1\}$ where $\mathcal{N}(0,I_d)$ denotes the multivariate standard normal on $\mathbb{R}^d$,  $\gamma > 0$ such that $T = N \gamma$, $(Z_n)\overset{\textup{i.i.d.}}{\sim} \mathcal{N}(0,I_d)$.
-
-
-
-
-
-
-% First, its initial distribution is given by $p_T$ which is intractable. Second, the continuous-time process \eqref{eq:backward_SDE} cannot not be simulated exactly on a computer even if the scores $(\nabla \log p_t)_{t \in \ccint{0,T}}$ were known. Third, the scores are also intractable. \emile{repetition with following paragraph} 
-
-% Unfortunately there are three sources of intractability that prevents us from sampling the process $(\bfhX_t)_{t \in \ccint{0,T}}$. First, its initial distribution is given by $p_T$ which is intractable. However, the
-% Ornstein--Ulhenbeck process \eqref{eq:forward_SDE} converges exponentially fast towards a standard multivariate Gaussian so one can approximate $p_t$ by this Gaussian for $T$ large enough. Second, the continuous-time process \eqref{eq:backward_SDE} cannot not be simulated exactly on a computer even if the scores $(\nabla \log p_t)_{t \in \ccint{0,T}}$ were known. 
-% We then perform a time-discretization of the resulting SDE using an Euler--Maruyama scheme; i.e. we define $(Y_n)_{n \in \{0, \dots, N\}}$ such that
-% \begin{equation}
-%   \label{eq:backward_discreteexactscores}
-%   Y_{n+1} = Y_n + \gamma \{Y_n + 2 \nabla \log p_{T -n \gamma}(Y_n) \} + \sqrt{2} Z_{n+1},\quad Y_0\sim \mathcal{N}(0,I_d)
-% \end{equation}
-% for any $n \in \{0, \dots, N-1\}$ where $\mathcal{N}(0,I_d)$ denotes the multivariate standard normal on $\mathbb{R}^d$,  $\gamma > 0$ such that $T = N \gamma$, $(Z_n)\overset{\textup{i.i.d.}}{\sim} \mathcal{N}(0,I_d)$.
-% Third, the scores are also intractable so the discrete-time process \eqref{eq:backward_discreteexactscores} cannot be implemented either. To approximate the scores, we exploit the following identity 
-% \begin{equation}\label{eq:scoreidentity}
-%   \textstyle{\nabla \log p_t(x) = \int_{\rset^d} \nabla \log p_{t|0}(x|x_0) p_{0|t}(x_0|x) \rmd x_0,}
-% \end{equation}
-% where $p_{t|0}(x'|x)$ is the transition density of the Ornstein--Ulhenbeck process which is available in closed-form. It follows directly that $\nabla \log p_t$ is the minimizer of the loss function
-% $\ell_t(s) = \expeLigne{\normLigne{s(\bfX_t) - \nabla \log
-%     p_{t|0}(\bfX_t|\bfX_0)}^2}$ over function $s$ where the expectation is over the joint distribution of $\bfX_0,\bfX_t$. In practice, we thus consider consider a neural network approximation
-% % $\bm{s}_\theta: \big\{0,...,N-1\big\} \times \rset^d \to \rset^d$ 
-% $\bm{s}_\theta: [0, T] \times \rset^d \to \rset^d$ 
-% which we train by minimizing over $\theta$ the loss 
-% % $\sum_{n=0}^{N-1} \lambda_n \ell_{n \gamma}(\bm{s}^\theta(n \gamma, \cdot))$ for some positive weights $\lambda_n>0$ in a preliminary training phase.
-% $ \mathbb{E}_{t}\left[\lambda(t) \ell_{t}(\bm{s}_\theta(t, \cdot))\right]$ where $t \sim \mathcal{U}([0,T])$ and $\lambda: [0, T] \rightarrow \R^{+}$ is a positive weighting function.
-% Finally, at sampling times we consider the following dynamics
-% \begin{equation}\label{eq:backward_discrete_final}
-%   Y_{n+1} = Y_n + \gamma \{Y_n + 2  s_\theta(T -n \gamma, Y_n) \} + \sqrt{2} Z_{n+1},\quad Y_0\sim \mathcal{N}(0,I_d).
-% \end{equation}
-
-% \michael{Just playing with layout options}
-% %% Testing enumerate
-% Unfortunately there are three sources of intractability that prevents us from sampling the process $(\bfhX_t)_{t \in \ccint{0,T}}$. 
-
-% \begin{enumerate}
-%     \item Its initial distribution is given by $p_T$ which is intractable. 
-%     However, the Ornstein--Ulhenbeck process \eqref{eq:forward_SDE} converges exponentially fast towards a standard multivariate Gaussian so one can approximate $p_t$ by this Gaussian for $T$ large enough. 
-%     \item The continuous-time process \eqref{eq:backward_SDE} cannot not be simulated exactly on a computer even if the scores $(\nabla \log p_t)_{t \in \ccint{0,T}}$ were known. 
-%     We then perform a time-discretization of the resulting SDE using an Euler--Maruyama scheme; i.e. we define $(Y_n)_{n \in \{0, \dots, N\}}$ such that
-%     \begin{equation}
-%       \label{eq:backward_discreteexactscores}
-%       Y_{n+1} = Y_n + \gamma \{Y_n + 2 \nabla \log p_{T -n \gamma}(Y_n) \} + \sqrt{2} Z_{n+1},\quad Y_0\sim \mathcal{N}(0,I_d)
-%     \end{equation}
-%     for any $n \in \{0, \dots, N-1\}$ where $\mathcal{N}(0,I_d)$ denotes the multivariate standard normal on $\mathbb{R}^d$,  $\gamma > 0$ such that $T = N \gamma$, $(Z_n)\overset{\textup{i.i.d.}}{\sim} \mathcal{N}(0,I_d)$.
-%     \item The scores are also intractable so the discrete-time process \eqref{eq:backward_discreteexactscores} cannot be implemented either.
-%     To approximate the scores, we exploit the following identity 
-%     \begin{equation}\label{eq:scoreidentity}
-%       \textstyle{\nabla \log p_t(x) = \int_{\rset^d} \nabla \log p_{t|0}(x|x_0) p_{0|t}(x_0|x) \rmd x_0,}
-%     \end{equation}
-%     where $p_{t|0}(x'|x)$ is the transition density of the Ornstein--Ulhenbeck process which is available in closed-form. It follows directly that $\nabla \log p_t$ is the minimizer of the loss function
-%     $\ell_t(s) = \expeLigne{\normLigne{s(\bfX_t) - \nabla \log
-%         p_{t|0}(\bfX_t|\bfX_0)}^2}$ over function $s$ where the expectation is over the joint distribution of $\bfX_0,\bfX_t$. In practice, we thus consider consider a neural network approximation
-%     $\bm{s}_\theta: \big\{0,...,N-1\big\} \times \rset^d \to \rset^d$. 
-%       We train $\bm{s}_\theta$ by minimizing over $\theta$
-%     $\sum_{n=0}^{N-1} \lambda_n \ell_{n \gamma}(\bm{s}^\theta(n \gamma, \cdot))$ for some positive weights $\lambda_n>0$ in a preliminary training phase. Finally, at sampling times we consider the following dynamics
-%     \begin{equation}\label{eq:backward_discrete_final}
-%       Y_{n+1} = Y_n + \gamma \{Y_n + 2  s^\theta_{T -n \gamma}(Y_n) \} + \sqrt{2} Z_{n+1},\quad Y_0\sim \mathcal{N}(0,I_d).
-%     \end{equation}
-% \end{enumerate}
-% %% Testing enumerate
-
-% %% Testing problem-solution
-% Unfortunately there are three sources of intractability that prevents us from sampling the process $(\bfhX_t)_{t \in \ccint{0,T}}$. 
-
-% \textbf{Problem 1:} Its initial distribution is given by $p_T$ which is intractable.
-
-% \textbf{Solution:} The Ornstein--Ulhenbeck process \eqref{eq:forward_SDE} converges exponentially fast towards a standard multivariate Gaussian so one can approximate $p_t$ by this Gaussian for $T$ large enough. 
-
-
-% \textbf{Problem 2:} The continuous-time process \eqref{eq:backward_SDE} cannot not be simulated exactly on a computer even if the scores $(\nabla \log p_t)_{t \in \ccint{0,T}}$ were known. 
-
-% \textbf{Solution:} We perform a time-discretization of the resulting SDE using an Euler--Maruyama scheme; i.e. we define $(Y_n)_{n \in \{0, \dots, N\}}$ such that
-% \begin{equation}
-%   \label{eq:backward_discreteexactscores}
-%   Y_{n+1} = Y_n + \gamma \{Y_n + 2 \nabla \log p_{T -n \gamma}(Y_n) \} + \sqrt{2} Z_{n+1},\quad Y_0\sim \mathcal{N}(0,I_d)
-% \end{equation}
-% for any $n \in \{0, \dots, N-1\}$ where $\mathcal{N}(0,I_d)$ denotes the multivariate standard normal on $\mathbb{R}^d$,  $\gamma > 0$ such that $T = N \gamma$, $(Z_n)\overset{\textup{i.i.d.}}{\sim} \mathcal{N}(0,I_d)$.
-
-% \textbf{Problem 3:} The scores are also intractable so the discrete-time process \eqref{eq:backward_discreteexactscores} cannot be implemented either. 
-
-% \textbf{Solution:} To approximate the scores, we exploit the following identity 
-% \begin{equation}\label{eq:scoreidentity}
-%   \textstyle{\nabla \log p_t(x) = \int_{\rset^d} \nabla \log p_{t|0}(x|x_0) p_{0|t}(x_0|x) \rmd x_0,}
-% \end{equation}
-% where $p_{t|0}(x'|x)$ is the transition density of the Ornstein--Ulhenbeck process which is available in closed-form. It follows directly that $\nabla \log p_t$ is the minimizer of the loss function
-% $\ell_t(s) = \expeLigne{\normLigne{s(\bfX_t) - \nabla \log
-%     p_{t|0}(\bfX_t|\bfX_0)}^2}$ over function $s$ where the expectation is over the joint distribution of $\bfX_0,\bfX_t$. In practice, we thus consider consider a neural network approximation
-% $\bm{s}_\theta: \big\{0,...,N-1\big\} \times \rset^d \to \rset^d$. 
-%   We train $\bm{s}_\theta$ by minimizing over $\theta$
-% $\sum_{n=0}^{N-1} \lambda_n \ell_{n \gamma}(\bm{s}^\theta(n \gamma, \cdot))$ for some positive weights $\lambda_n>0$ in a preliminary training phase. Finally, at sampling times we consider the following dynamics
-% \begin{equation}\label{eq:backward_discrete_final}
-%   Y_{n+1} = Y_n + \gamma \{Y_n + 2  s^\theta_{T -n \gamma}(Y_n) \} + \sqrt{2} Z_{n+1},\quad Y_0\sim \mathcal{N}(0,I_d).
-% \end{equation}
-%% testing problem-solution
-
-% Hence, we consider
-% the process $(\bfY_t)_{t \in \ccint{0,T}}$ such that $\bfY_0$ is a Gaussian
-% random variable with zero mean and identity covariance matrix and
-% $(\bfY_t)_{t \in \ccint{0,T}}$ satisfies \eqref{eq:backward_SDE}. In order to
-% obtain an algorithm which can be implemented in practice we first discretize the
-% process \eqref{eq:backward_SDE}, \ie \ we define $(Y_n)_{n \in \{0, \dots, N\}}$
-% such that $Y_0$ is a Gaussian random variable with zero mean and identity
-% covariance matrix and for any $n \in \{0, \dots, N-1\}$ we have
-% \begin{equation}
-%   \label{eq:backward_discrete}
-%   Y_{n+1} = Y_n + \gamma \{Y_n + 2 \nabla \log p_{T -n \gamma}(Y_n) \} + \sqrt{2} Z_{n+1} \eqsp ,
-% \end{equation}
-% where $\gamma > 0$ such that $T = N \gamma$, $(Z_n)_{n \in \nset}$ is a sequence
-% of i.i.d. Gaussian random variables with zero mean and identity covariance
-% matrix. Note that \eqref{eq:backward_discrete} is simply the Euler-Maruyama
-% discretization of \eqref{eq:backward_SDE}. One last key step relies in the
-% approximation of the dynamics of \eqref{eq:backward_discrete} since the
-% logarithmic gradient (or Stein score) $(\nabla \log p_t)_{t \in \ccint{0,T}}$ is
-% not tractable. To do so, we consider a neural network approximation
-% $\bm{s}_\theta: \ \ccint{0,T} \times \rset^d \to \rset^d$. Since for any
-% $t \in \ccint{0,T}$ and $x \in \rset^d$ we have that
-% \begin{equation}
-%   \textstyle{\nabla \log p_t(x) = \int_{\rset^d} \nabla \log p_{t|0}(x|x_0) p_{0|t}(x_0|x) \rmd x_0 \eqsp , }
-% \end{equation}
-% we obtain that for any $t \in \ccint{0,T}$, $\nabla \log p_t$ is the minimizer
-% of the loss function
-% $\ell_t(s) = \expeLigne{\normLigne{s(\bfX_t) - \nabla \log
-%     p_{t|0}(\bfX_t|\bfX_0)}^2}$. We train $\bm{s}_\theta$ to minimize
-% $\int_0^T \lambda(t) \ell_t(\bm{s}_\theta(t, \cdot)) \rmd t$, where $t \mapsto \lambda(t)$ is some positive weighting function.
-% \emile{Would remove from main paper}
-% In \citet[Theorem 1]{debortoli2021neurips}, error bounds on
-% the total variational norm between the law of $Y_N$ and $p_0$ are established depending on $N$, $\gamma$
-% and the approximation error of the neural network $\bm{s}_\theta$. In
-% particular, the approximation error can be decomposed into two terms: one corresponding to the mixing of the process  \eqref{eq:backward_discreteexactscores} which is controlled by the mixing of the forward process \eqref{eq:forward_SDE}, another one which corresponds to the approximation of the scores. %Hence the bottleneck of SGMs are not mixing issues does not reside in the mixing time of the backward chain but in the approximation of the score.
-
-We have presented the basics of SGM but we highlight that many recent works
-improve on these models;
-\citep[see e.g.][]{song2020score,song2020improved,song2020denoising,jolicoeur2020adversarial,jolicoeur2021gotta,nichol2021beatgans}. In
-particular, it is worth noting that choosing an adaptive stepsize
-$(\gamma_n)_{n \in \nset}$ \citep{bao2022analyticdpm,watson2021learning} drastically improve the
-synthesis results as well as using a predictor-corrector scheme
-\citep{song2020score} instead of a simple Euler--Maruyama discretization. Finally,
-we note that there exist other approaches to introduce SGMs using variational and maximum likelihood
-techniques \citep{ho2020denoising,huang2021variational,durkan2021maximum}.
-
-
-%%% Local Variables:
-%%% mode: latex
-%%% TeX-master: "main"
-%%% End:
diff --git a/doc/random_walk.tex b/doc/random_walk.tex
deleted file mode 100644
index 64a682b..0000000
--- a/doc/random_walk.tex
+++ /dev/null
@@ -1,9 +0,0 @@
-\section{Random walks and time-reversal}
-\label{sec:random-walks-time}
-
-This section is based on the results of \cite{jorgensen1975central}.
-
-%%% Local Variables:
-%%% mode: latex
-%%% TeX-master: "main_arxiv"
-%%% End:
diff --git a/doc/rel_work.tex b/doc/rel_work.tex
deleted file mode 100644
index 80c64b1..0000000
--- a/doc/rel_work.tex
+++ /dev/null
@@ -1,148 +0,0 @@
-\section{Related work}
-\label{sec:related-works}
-
-% The study of distribution on manifolds and their approximation is a central part
-% of \emph{directional statistics}, see \citep{mardia2009directional}.
-% Traditional methods rely on fitting mixtures of distributions to the target. However, in
-% recent years several other methods have been introduced in the context of generative modeling.
-In what follows, we discuss previous work on parametrizing family of distributions for manifold-valued data. %supported on manifolds.
-% We first briefly discuss
-Note that in this work, the manifold structure is considered to be prescribed.
-In contrast, another line of work has been focusing on jointly learning the manifold structure and a generative model
-% combining manifold learning and generative modeling have also been proposed 
-\citep{brehmer2020flows,kalatzis2021multi,caterini2021Rectangular}.
-% At a high level, the related work can be divided 
-
-
-\paragraph{Parametric family of distributions.} Defining flexible easy-to-sample
-distributions on manifolds is not a trivial task.
-The various parametric families of distributions that have been proposed can broadly be categorised into three main approaches \citep{navarro2017multivariate}: wrapping, projecting and conditioning.
-Wrapped distributions consider a parametric distribution on $\mathbb{R}^n$ that is pushed-forward along an invertible map $\psi: \mathbb{R}^n \rightarrow \M$.
-% Parametric wrapped distributions are usually defined on $\mathbb{S}^1$, 
-A canonical example is the wrapped normal distribution on $\mathbb{S}^1$
-\citep{collett1981Discriminating}.  Another example has been proposed by
-\cite{mathieu2019continuous,nagano2019wrapped} on the hyperbolic space with the
-exponential map \valentin{this is the same thing as push forward of Euclidean
-  NF?}.  Given a Euclidean submanifold $\M \subset \mathbb{R}^n$ and a
-distribution $p_{\text{amb}} \in \Pens(\mathbb{R}^n)$,
-% a distribution can be defined on $\M$ by marginalizing out a 
-marginalizing out $p_{\text{amb}}$ along the normal bundle induces a distribution on $\M$.
-Samples are obtained by first sampling $p_{\text{amb}}$ and then applying an orthogonal projection on these samples.
-Finally, the conditioning method consists into considering the unormalized density defined by the restriction of an ambient density $p_{\text{amb}}$ with $\M$.
-% According to \citep{navarro2017multivariate}, there exists three main methods to sample from
-% these distributions on manifolds such as spheres and tori: wrapping, projecting and conditioning.
-% Wrapped distributions are usually defined on $\mathbb{S}^1$,
-% by considering a distribution on $\rset$. Then using that
-% $\phi: \mathbb{S}^1 \times \zset \to \rset$ with
-% $\phi(\theta, k) = \theta + 2k\uppi$ is a bijection, we define a probability
-% distribution on $\mathbb{S}^1$ by marginalizing along the second component.
-% Let $\mathcal{M}$ be a submanifold of $\rset^d$ such that
-% $\mathcal{M} = \ensembleLigne{x \in \rset^d}{\phi^{-1}(x)_2 = a}$ with
-% $\phi: \ \mse_1 \times \mse_2 \to \rset^d$ a diffeomorphism and $a \in
-% \mse_2$
-% Then, the projecting method consists into considering a probability
-% distribution in $\mse_1 \times \mse_2$ and marginalizing w.r.t. $\mse_2$.
-% On the
-% $(d-1)$-dimensional sphere this amounts to sampling from the probability
-% distribution and normalizing the samples.
-% Finally, the conditioning method
-% consists into considering the disintegration of a probability distribution
-% w.r.t. $\phi^{-1}_2$.
-Such distributions encompass the von Mises-Fisher
-distribution \citep{fisher1953dispersion} and the Kent distribution
-\citep{kent1982fisher}.
-These distributions are usually unimodal and 
-% in order to fit more complex distributions it is necessary to consider mixtures
-considering mixtures of thereof is key to increase flexibility
-\citep{peel2001fitting,mardia2008multivariate}.
-% quid Riemannian normal distributions? (max entropy generalisation)
-% also quid power spherical distribution?
-
-% \paragraph{Normalizing flows in latent spaces.}
-\paragraph{Push-forward of Euclidean normalizing flows.}
-More recently, approaches leveraging the flexibility of normalizing flows
-\citep{papamakarios2019normalizing} have been proposed.
-Following the wrapping method described above, these methods 
-% The simplest approach is to 
-parametrize a normalizing flow in the Euclidean space $\mathbb{R}^n$ that is pushed-forward along an invertible map $\psi: \mathbb{R}^n \rightarrow \M$.
-However, to globally represent the manifold, the map $\psi$ needs to be a homeomorphism, which can only happen if $\M$ is topologically equivalent to $\mathbb{R}^n$, hence limiting the scope of that approach.
-One natural choice for this map if the exponential map $\exp_x: \mathrm{T}_x \M \cong \mathbb{R}^d$. %, leading so called wrapped distributions.
-This approach has been taken, for instance, by \cite{falorsi2019reparameterizing} and \cite{bose2020latent}, respectively parametrizing distributions on Lie groups and hyperbolic space.
-% \cite{gemici2016normalizing} introduced normalizing flows on the sphere using the stereographic projection.
-% One limitation of this approach is
-% that the probability distribution is hard to model near the pole which is sent
-% to $\infty$ using the stereographic mapping. Indeed, if one tries to model a
-% probability distribution with one mode near the pole then most of the mass of
-% the distribution pushed by the stereographic mapping is concentrated away from
-% the origin. As a result, it is hard to approximate this distribution by learning
-% a deformation of an easy-to-sample distribution, like a Gaussian
-% distribution.
-% For Lie groups, \cite{falorsi2019reparameterizing} proposed to
-% perform the inference in the Lie algebra and then push the distribution to the
-% whole manifold using that for compact Lie groups the exponential mapping is
-% surjective.
-% Similarly, \cite{bose2020latent} proposed in hyperbolic spaces two
-% approaches to push a normalizing flow defined in tangent spaces of the manifold
-% using different wrappings.
-%
-% a normalizing flow approach based on a recursive construction which is more numerically stable than the one proposed in \citep{gemici2016normalizing}.
-
-\paragraph{Neural ODE on manifolds.}
-To avoid artifacts or numerical instabilities due to the manifold embedding, another line
-of work uses tools from Riemannian geometry to define flows directly on the
-manifold of interest
-\citep{falorsi2020neural,mathieu2020riemannian,falorsi2021Continuous}.
-Since these methods do not require a specific embedding mapping, they 
-% can be considered as \emph{intrinsic}.
-are referred as \emph{Riemannian}.
-% These methods leverage tools from continuous normalizing flows (CNFs) \citep{grathwohl2019Scalable},
-They extend continuous normalizing flows (CNFs) \citep{grathwohl2019Scalable} to the manifold setting, by implicity parametrizing flows as solutions of Ordinary Differential Equations (ODEs).
-As such, the parametric flow is a \emph{continuous} function of time.
-% extending the evolution equation of CNFs to Riemannian manifolds.
-This approach has recently been extended by \cite{rozen2021moser}
-introducing Moser flows, whose main appeal being that it circumvents the need to solve an ODE in the training process. % by reparametrizing the vector field with an interpolant function.
-% is that they do not require backpropagating
-% through and Ordinary Differential Equation (ODE).
-% Similarly to Moser flow, RGSM learns an interpolation between the target distribution and an easy-to-sample distribution. 
-
-
-% \begin{itemize}
-% \item limitation 1: divergence to compute high dimensional
-% \item limitation 2: the importance sampling (high dimensional as well)
-% \item comparison on high dimensional sphere
-% \item comparison with bunny as well
-% \end{itemize}
-
-\paragraph{Optimal transport on manifolds.}
-Another line of work has focused on developing flows on manifolds 
-% Finally, another recent method introduces flows on manifolds 
-using tools from optimal transport. % \citep{ambrosio2003Optimal}.
-\cite{sei2013jacobian} introduced a flow that is given by $f_\theta: x \mapsto \exp_x(\nabla \psi^c_\theta)$ 
-% the exponential map applied to the gradient of a $c$-convex function, where $c$ is the squared distance on the Riemannian manifold.
-with $\psi^c_\theta$ a $c$-convex function and $c=d^2_\M$, where $d_\M$ is the
-geodesic distance.  This approach is motivated by the fact that the
-optimal transport map takes such an expression
-\citep{ambrosio2003Optimal}.  These methods operate directly on the manifold,
-similarly to CNFs, yet in contrast they are \emph{discrete} in time.  The
-benefits of this approach depend on the specific choice of parametric family of
-$c$-convex functions \citep{rezende2021Implicit,cohen2021riemannian},
-trading-off expressively with scalability.
-% The optimization of these flows is then
-% conducted on the parameters of the chosen family of $c$-convex functions
-In the case of tori and spheres, \cite{rezende2020Normalizing} introduced \emph{discrete} Riemannian ﬂows based on Möbius transformations and spherical splines.
-
-
-% Methods to check:
-% \begin{itemize}
-% \item \cite{mathieu2019continuous} -- pas vraiment relie. C'est le latent space d'un VAE qui est un espace hyperbolique.
-% \item \cite{nagano2019wrapped} -- pareil ?
-% \item \cite{rey2019diffusion} -- vae aussi
-% \item \cite{falorsi2018explorations} -- aussi
-% \item \cite{davidson2018hyperspherical} -- aussi
-% \end{itemize}
-
-
-%%% Local Variables:
-%%% mode: latex
-%%% TeX-master: "main"
-%%% End:
diff --git a/doc/schrodinger.tex b/doc/schrodinger.tex
deleted file mode 100644
index 71872de..0000000
--- a/doc/schrodinger.tex
+++ /dev/null
@@ -1,145 +0,0 @@
-\section{Schr\"odinger Bridges on Manifolds}
-\label{sec:extension}
-
-%\valentin{no compelling examples for conditional sampling... Maybe in physics?}
-
-For Euclidean SGMs, the generative model is given by an approximation
-of the time-reversal of the noising dynamics $(\bfX_t)_{t \in \ccint{0,T}}$ while the backward dynamics
-$(\bfY_t)_{t \in \ccint{0,T}}$ is initialized with the invariant distribution of
-the noising dynamics (the uniform distribution $\piinv$ in case of
-RSGM). However, in order for the method to yield good results we need
-$\mathcal{L}(\bfY_0) \approx \mathcal{L}(\bfX_T)$ \cite[see][Theorem
-1]{debortoli2021neurips}. Usually, this requires the number of steps in the
-backward process to be large in order to keep $T$ large and $\gamma$ small
-(where $\gamma > 0$ is the stepsize in the Geodesic Random Walk). Another
-limitation of SGMs is that existing methods target
-an easy-to-sample reference distribution. Hence, classical SGMs
-cannot interpolate between two distributions defined by datasets. To
-circumvent this problem, one can consider a process whose initial and terminal
-distribution are pinned down using Schr\"odinger bridges
-\citep{schrodinger1932theorie,leonard2012schrodinger,chen2016entropic,debortoli2021neurips}.
-
-\paragraph{Dynamical Schr\"odinger bridges}
-We briefly recall the notion of dynamical Schr\"odinger bridge
-\citep{leonard2012schrodinger,chen2016entropic,vargas2021solving,debortoli2021neurips,chen2021likelihood}. We
-consider a reference path probability measure
-$\Pbb \in \Pens(\rmc(\ccint{0,T}, \M))$. In practice, we set $\Pbb$ to be the
-distribution of the Brownian motion $(\bfB_t^\M)_{t \in \ccint{0,T}}$ such that
-$\bfB_0^\M$ has distribution $\pizero$, the target data distribution. Then, we consider
-the \emph{dynamical Schr\"odinger bridge problem}
-\begin{equation}
-  \Qbb^\star = \argmin \ensembleLigne{\KL{\Qbb}{\Pbb}}{\Qbb \in \Pens(\ccint{0,T}, \M), \ \Qbb_0 = \pizero, \ \Qbb_T = \piinv} . 
-\end{equation}
-The solution $\Qbb^\star$ is called the Schr\"odinger Bridge (SB).  Note that if
-$\Qbb^\star$ is associated with a backward process
-$(\bfY_t^\star)_{t \in \ccint{0,T}}$, then we can obtain a generative model as
-follows. First sample from $\piinv = \mathcal{L}(\bfY^\star_T)$ and then follow
-the (backward) dynamics of $(\bfY^\star_t)_{t \in \ccint{0,T}}$. By definition, we obtain
-that $\mathcal{L}(\bfY^\star_0) = \pizero$, the target distribution.
-
-In practice however, the solution of the SB problem is approximated using the
-Iterative Proportional Fitting (IPF) algorithm. Note that in discrete space the
-IPF is also known as the Sinkhorn algorithm \citep{sinkhorn1967diagonal,peyre2019computational}. The
-IPF defines a sequence of path probability measures
-$(\Qbb^n)_{n \in \nset} \in (\Pens(\rmc(\ccint{0,T}, \M)))^\nset$, such that
-$\Qbb^0 = \Pbb$ and for any $n \in \nset$
-\begin{align}
-  &\Qbb^{2n+1} = \argmin \ensembleLigne{\KL{\Qbb}{\Qbb^{2n}}}{\Qbb \in \Pens(\rmc(\ccint{0,T}, \M)), \Qbb_T = \piinv}  , \\
-  &\Qbb^{2n+2} = \argmin \ensembleLigne{\KL{\Qbb}{\Qbb^{2n+1}}}{\Qbb \in \Pens(\rmc(\ccint{0,T}, \M)), \Qbb_0 = \pizero} .
-\end{align}
-Under mild assumptions on $\Pbb$, $\pizero$ and $\piinv$, we have that
-$(\Qbb^n)_{n \in \nset}$ converges towards $\Qbb^\star$ \cite[see][]{nutz2022stability}.
-In what follows, we propose an algorithm to
-approximately sample from $(\Qbb^n)_{n \in \nset}$. In Euclidean state spaces,
-\cite{debortoli2021neurips,vargas2021solving,chen2021likelihood} have proposed
-an algorithm based on time-reversal to compute the IPF. We now extend these
-techniques to the case of Riemannian manifolds.
-
-\paragraph{Riemannian Diffusion Schr\"odinger Bridge}
-
-We propose Riemannian Diffusion Schr\"odinger Bridge (RDSB) an extension of
-Diffusion Schr\"odinger Bridge \cite{debortoli2021neurips} to approximate
-solutions of SB problems. First, we connect the iterates
-$(\Qbb^n)_{n \in \nset}$ with diffusion processes on $\M$.
-
-\begin{proposition}
-  \label{prop:continuous_schro}
-  Let $\Pbb$ be the path measure of the Brownian motion initialized at $\piinv$.
-  Assume that for any $n \in \nset$, $\KL{\Qbb^n}{\Pbb}< +\infty$ and that for
-  any $t \in \ccint{0,T}$ and $n \in \nset$, $\Qbb^n_t$ admits a smooth positive
-  density w.r.t.\ $\piinv$. Then, for any $n \in \nset$ we have:
-  \begin{enumerate}[wide, labelwidth=!, labelindent=0pt, label=(\alph*)]
-  \item $R(\Qbb^{2n+1})$ solves the martingale problem with generator $\generator^{2n+1}(t,u) = \langle \nabla u, b_{T-t}^n \rangle + (1/2) \Delta u$;
-  \item $\Qbb^{2n+2}$ solves the martingale problem with generator $\generator^{2n+2}(t,u) = \langle \nabla u, f_{t}^{n+1} \rangle + (1/2) \Delta u$;    
-  \end{enumerate}
-  where for any $n \in \nset$, $t \in \ccint{0,T}$ and 
-  $x \in \rset^d$, $b^{n}_t( x) = -f^{n}_t(x) + \nabla \log p^{n}_t(x)$, 
-  $f^{n+1}_t(x) = -b^n_t(x) + \nabla \log q^n_t(x)$, with $f^0_t(x) = 0$, and $p^n_t$, $q_t^n$
-  the densities of $\Qbb^{2n}_t$ and  $\Qbb_t^{2n+1}$.
-\end{proposition}
-
-\begin{proof}
-  The proof is similar to \citet[Proposition 6]{debortoli2021neurips} using
-  \Cref{thm:time_reversal_manifold} instead of \citet[Theorem
-  4.19]{cattiaux2021time}
-\end{proof}
-
-In particular, we have that $\Qbb^1$ is the diffusion process associated with
-RSGM, \ie \ the time-reversal of the Brownian motion initialized at
-$\piinv$. Hence, $\Qbb^{2n+1}$ for $n \in \nset$ with $n \geq 1$ can be seen as
-a refinement of $\Qbb^1$. In the next proposition, we show that the drift term
-of the diffusion processes associated with $(\Qbb^n)_{n \in \nset}$ can be
-approximated leveraging score-based techniques.
-
-\begin{proposition}
-  \label{prop:loss_implicit_explicit}
-  Let $(\bfX_t)_{t \in \ccint{0,T}}$ be a $\M$-valued process with distribution
-  $\Pbb \in \Pens(\rmc(\ccint{0,T}, \M))$ such that for any $t \in \ccint{0,T}$,
-  $\bfX_t$ admits a positive density $p_t \in \rmc^\infty(\M)$
-  w.r.t.\ $\piinv$. Let $s: \ \ccint{0,T} \to \XM$. For any $t \in \ccint{0,T}$
-  and $x \in \M$, let
-  \begin{equation}
-    r(t,x) = -s(t,x) + \nabla \log p_t(x) . 
-  \end{equation}
-  Then, for any $t \in \ccint{0,T}$, we have that
-  \begin{equation}
-    r(t, \cdot) = \argmin \ensembleLigne{\expeLigne{(1/2)\normLigne{s(t, \bfX_t) + r(\bfX_t)}^2 + \dive(r)(\bfX_t)}}{r \in \mathrm{L}^2(\Pbb_t)} . 
-  \end{equation}
-\end{proposition}
-
-\begin{proof}
-  Let $t \in \ccint{0,T}$. First, we have for any $x \in \M$
-  \begin{align}
-    &\normLigne{r(t,x) - \{-s(t,x) + \nabla \log p_t(x) \}}^2\\
-    & \qquad = \normLigne{r(t,x) + s(t, x)}^2 -2 \langle r(t,x), \nabla \log p_t(x) \rangle + \normLigne{\nabla \log p_t(x)}^2 - 2 \langle s(t,x), \nabla \log p_t(x) \rangle . 
-  \end{align}
-  Hence, we get that
-  $r(t, \cdot) = \argmin \ensembleLigne{\expeLigne{\norm{s(t, \bfX_t) +
-        r(\bfX_t)}^2 - 2 \langle r(\bfX_t), \nabla \log p_t(\bfX_t) \rangle}}{r
-    \in \XM}$.
-Using the
-      divergence theorem \cite[see][p.51]{lee2018introduction}, we have for any $r \in \XM$
-      \begin{align}
-        \expeLigne{\langle r(\bfX_t), \nabla \log p_t(\bfX_t) \rangle} &= \textstyle{\int_\M \langle r(x_t), \nabla \log p_t(x_t) \rangle p_t(x_t) \rmd \piinv(x_t)} \\
-                                                                       &= - \textstyle{\int_\M \dive(r)(x_t)  p_t(x_t) \rmd \piinv(x_t) = -\expeLigne{\dive(r)(\bfX_t)}}  ,
-      \end{align}
-which concludes the proof.  
-\end{proof}
-
-
-
-% Once we have defined general score-based generative moedls on compact Riemannian
-% manifolds, these models can be used as the basis for several extensions. We list
-% two of them: conditional sampling and Schr\"odinger bridge. 
-
-% \paragraph{Conditional sampling} We first consider inverse problems on the
-% manifold $\M$. Namely, given an observation $y$, we aim at recovering the
-% initial signal $x \in \M$. Inverse problems on manifolds are ubiquitous in
-
-
-
-
-%%% Local Variables:
-%%% mode: latex
-%%% TeX-master: "main"
-%%% End:
diff --git a/doc/score_approx.tex b/doc/score_approx.tex
deleted file mode 100644
index 0fcdd67..0000000
--- a/doc/score_approx.tex
+++ /dev/null
@@ -1,639 +0,0 @@
-\section{Riemannian Score-based Generative Modeling}
-\label{sec:score-appr-manif}
-
-Similarly to the Euclidean setting, three ingredients are required to extend SGM
-to compact Riemannian manifolds:
-\begin{enumerate*}[label=\roman*)]
-\item a forward \emph{noising} process on the Riemannian manifold which converges to an easy-to-sample reference distribution, 
-\item a time-reversal formula on Riemannian manifolds which defines a backward generative process, 
-\item a method to efficiently approximate the drift of the time-reversal process.
-\end{enumerate*}
-We address all these problems and introduce RGSM. The key differences between
-SGMs and RSGMs are summarised in \cref{tab:difference}.
-
-
-\begin{table}[h]
-\small
-\centering
-\renewcommand*{\arraystretch}{1.2}
-\begin{tabular}{lcc}
-%   \toprule 
-  Ingredient \textbackslash ~Space  &       Euclidean                & Compact manifold \\ \hline
-  Forward process & Ornstein--Ulhenbeck & Brownian motion \\
-  Easy-to-sample distribution & Gaussian & Uniform \\
-  Time reversal  &  \citet[Theorem 4.9]{cattiaux2021time} & \cref{thm:time_reversal_manifold}  \\   
-  Sampling of the forward process & Direct & Geodesic Random Walk \\
-  Sampling of the backward process & Euler--Maruyama & Geodesic Random Walk \\
-%   \bottomrule
-\end{tabular}
-\caption{\small Differences between SGM on Euclidean spaces and RSGM on compact Riemannian manifolds.}
-\label{tab:difference}
-\end{table}
-
-
-
-\subsection{Brownian motion on compact Riemannian manifolds}
-\label{sec:brown-moti-comp}
-
-\paragraph{Brownian motion and uniform distribution}
-
-First, we define a forward noising process on $\M$ targeting an easy-to-sample
-reference distribution. In Euclidean spaces, the reference distribution is a
-standard normal in the compact manifold setting the uniform distribution
-$\piinv$ is the natural choice.  For most manifolds of interest, one can either
-sample exactly from $\piinv$ or obtain approximate samples with high accuracy.
-For the forward noising dynamics, the Ornstein--Ulhenbeck process
-\eqref{eq:forward_SDE} used in Euclidean scenarios is now replaced by the
-Brownian motion defined on $\M$ as it converges exponentially fast to
-$\piinv$---see \Cref{prop:brownian_conv} below. We refer to
-\Cref{sec:brown-moti-manif} for a general introduction to Brownian motions on
-manifolds. This Brownian motion is defined as follows.
-
-\begin{definition}[Brownian motion]
-  $(\bfB^\M_t)_{t \geq 0}$ is a Brownian motion on $\M$ if
-  $(\bfB^\M_t)_{t \geq 0}$ is associated with the SDE with infinitesimal
-  generator $\generator(f) = \Delta f$, see \cref{sec:notation}.
-\end{definition}
-
-We refer to \cref{sec:brown-moti-manif} or \citet[Chapter 1, Chapter
-3]{hsu2002stochastic} for the definition of a $\M$-valued semimartingale and the
-Laplace-Beltrami operator. By \citet[Proposition 3.2.1]{hsu2002stochastic}, we
-have that for any initial condition $\bfB^\M_0$ with distribution
-$\pizero \in \Pens(\M)$, there exists $(\bfB^\M_t)_{t \geq 0}$. The convergence
-rates are obtained w.r.t. the total variation distance between the uniform
-distribution and the semi-group $(\Pker_t)_{t \geq 0}$\footnote{We define
-  $(\Pker_{t,s})_{t, s \geq 0, t \geq s}$ the semi-group such that for any
-  $f,g \in \rmc(\M)$ and $t, s \geq 0$ with $t \geq s$ we have
-  $\expeLigne{f(\bfB_t^\M)g(\bfB_s^\M)} = \expeLigne{\int_{\M}f(y)
-    \Pker_{t|s}(\bfB_s^\M, \rmd y) g(\bfB_s^\M)}$. For the rest of this paper we
-  denote $(\Pker_t)_{t \geq 0} = (\Pker_{t|0})_{t \geq 0}$.}
-%, see \citet[Proposition 2.6]{urakawa2006convergence}.
-
-\begin{proposition}[{{Convergence of Brownian motion \cite[Proposition 2.6]{urakawa2006convergence}}}]
-  \label{prop:brownian_conv}
-  For any $t > 0$, $\Pker_t$ admits a density $p_t$ w.r.t $\piinv$ and
-  $\piinv \Pker_t = \piinv$, \ie \ $\piinv$ is an invariant measure for
-  $(\Pker_t)_{t \geq 0}$. In addition, if there exists $C, \alpha \geq 0$ such
-  that for any $t \in \ocint{0,1}$, $p_t(x,x) \leq C t^{-\alpha /2}$ then 
-  for any $\pizero \in \Pens(\M)$ and for any $t \geq 1/2$ we have 
-  \begin{equation}
-    \textstyle{\tvnorm{\pizero \Pker_t - \piinv} \leq C^{1/2} \rme^{\lambda_1 /2} \rme^{-\lambda_1 t}  ,}
-  \end{equation}
-  where $\lambda_1$ is the first non-negative eigenvalue of $-\Delta_\M$  in $\mathrm{L}^2(\piinv)$.
-\end{proposition}
-
-The diagonal upper bound on the heat kernel is satisfied for many manifolds
-including the $d$-dimensional torus and sphere \cite[see][Section
-3]{saloff1994precise}. Hence, \cref{prop:brownian_conv} ensures that under mild
-conditions the Brownian motion converges exponentially fast towards the uniform
-distribution on the compact Riemmanian manifold $\M$. Therefore, in the context
-of SGM, the Brownian motion on $\M$ is the counterpart to the Ornstein--Ulhenbeck
-process and the uniform distribution is the counterpart to the Gaussian one.
-
-We note that in previous works on SGMs, the Brownian motion has also been used
-as a forward noising process \citep{song2020score,song2019generative}. However,
-in these cases, the Brownian motion is not geometrically ergodic and does not
-admit any invariant distribution contrary to our setting. Two issues remain to
-be solved. First, we need to be able to sample this forward noising process
-$(\bfB_t^\M)_{t \geq 0}$. Second we need to obtain tractable approximations of
-the heat kernel, i.e. the transition kernel of this process, in order to define
-efficient score approximation schemes.
-
-\paragraph{Sampling from diffusions}
-In Euclidean spaces, sampling an Ornstein--Ulhenbeck process is straightforward
-whereas obtaining samples from a Brownian motion on a manifold is non-trivial in
-general. First, if $\M$ is isometrically embedded into $\rset^p$ (with
-$p \geq d$)---i.e.\ $\M \subset \rset^p$---then we have that
-$(\bfB^\M_t)_{t \geq 0}$ (seen as a process on the ambient space $\rset^p$)
-satisfies the following SDE
-\begin{equation}
-  \label{eq:brownian_motion_extrinsic}
-  \textstyle{\rmd \bfB^\M_t = \sum_{i=1}^p P_i(\bfB^\M_t) \circ \rmd \bfB_t^i,}
-\end{equation}
-where $\circ$ denotes the Stratanovitch integral \footnote{Manifold valued processes are usually defined using the Stratanovitch integral because it satisfies the chain rule of differential calculus. For more details we refer to \cref{sec:stoch-diff-equat}.},
-$(\{\bfB_t^i\}_{i=1}^p)_{t \geq 0}$ is a $p$-dimensional Brownian motion and for
-any $i \in \{1, \dots, p\}$ we have $P_i(x) = P(x) e_i$ for any $x \in \M$, where $\{e_i\}_{i=1}^p$ is the canonical basis of $\rset^p$
-and $P(x): \ \rset^p \to \mathrm{T}_x \M$ is the orthogonal projection operator,
-see \cref{sec:metr-conn-tens}. However, this approach is \emph{extrinsic} and
-requires the knowledge of the projection
-operator. % It is also limited to the Brownian motion and it is not
-% easily extended to other diffusions on $\M$.
-Here we consider an \emph{intrisic} 
-approach based on Geodesic Random Walks (GRWs), see \cite{jorgensen1975central}
-for a review of their properties.
-
-GRWs are not restricted to approximating the Brownian motion on $\M$ but in fact
-can approximate \emph{any} well-behaved diffusion on $\M$. This property will be
-useful when sampling the backward diffusion process. Hence, we introduce GRWs in
-a general framework and we are going to consider a discrete-time process
-$(X_n^\gamma)_{n \in \nset}$ which approximates $(\bfX_t)_{t \geq 0}$
-is associated with
-\begin{equation}
-  \label{eq:generic}
- \rmd \bfX_t = b(\bfX_t) \rmd t + \sigma(\bfX_t) \rmd \bfB_t^\M.
-\end{equation}
-Let
-$\{ \nu_x \}_{x \in \M}$ such that for any $x \in \M$,
-$\nu_x \in \Pens(\mathrm{T}_x \M)$. Assume that for any $x \in \M$,
-$\int_{\M} \normLigne{v}^2 \rmd \nu_x(v)< +\infty$. In addition assume that
-there exists $b \in \XM$ and $\Sigma \in \XMdeux$, such that for any $x \in \M$,
-$\int_{\M} v \rmd \nu_x(v) = b(x)$ and
-$\int_{\M} (v - b(x)) \otimes (v - b(x)) \rmd \nu_x(v) =
-\Sigma(x) = \sigma(x) \sigma(x)^\top$. % In addition, we assume
-    % that for any $x \in \M$, $\Sigma(x) = \mu^{(2)}(x) - \mu^{(1)}(x) \otimes \mu^{(1)}(x)$ is
-    % strictly positive definite and that there exists $\Ltt \geq$ such that for
-    % any $x, y \in \M$, $\tvnorm{\nu_x - \nu_y} \leq \Ltt d(x,y)$. Where we have
-    % that for any $\nu_1 \in \mathrm{T}_x \M$ and $\nu_2 \in \mathrm{T}_y \M$,
-    % \begin{equation}
-    %   \tvnorm{\nu_x - \nu_y} = \sup \ensembleLigne{\nu_1[f] - \Gamma_{x}^y(\gamma)_\# \nu_2[f]}{\gamma \in \mathrm{Geo}_{x,y}, \ f \in \rmc(\mathrm{T}_x \M)}  . 
-    % \end{equation}
-    % Note that if $d(x,y) \leq \vareps$ then for some $\vareps > 0$ we have that $\abs{\mathrm{Geo}_{x,y}}=1$.
-
-
-\begin{definition}[Geodesic Random Walk]
-  Let $X_0$ be a $\M$-valued random variable.  For any $\gamma > 0$, we
-  define $(\bfX_t^{\gamma})_{t \geq 0}$ such that $\bfX_0^\gamma = X_0$ and
-  for any $n \in \nset$ and $t \in \ccint{0, \gamma}$,
-  $\bfX_{n\gamma + t} = \exp_{\bfX_{n \gamma}}\left(t\gamma \{ \mu_n +
-  (1/\sqrt{\gamma}) (V_n - \mu_n)\}\right)$\footnote{where $\exp_x: \ \mathrm{T}_x \M \to \M$ is the exponential mapping on the manifold, see \citet[Chapter 20]{lee2013smooth} for details.}, where $(V_n)_{n \in \nset}$ is a
-  sequence of random variables in such that for any $n \in \nset$, $V_n$ has
-  distribution $\nu_{\bfX_{n \gamma}}$ conditionally to $\bfX_{n
-    \gamma}$. We say that
-  $(X_n^\gamma)_{n \in \nset} = (\bfX_{n \gamma})_{n \in \nset} \in 
-  \M$ is a Geodesic
-  Random Walk (GRW).
-\end{definition}
-
-Note that for any $n \in \nset$ and $\gamma >0$, $X_n^\gamma \in \M$.  For any
-$\gamma>0$, we denote by $(\Qker_n^{\gamma})_{n \in \nset}$ the sequence of
-Markov kernels such that for any $n \in \nset$, $x \in \M$ and
-$\msa \in \mcb{\M}$ we have that
-$\updelta_x \Qker_n^\gamma(\msa) = \Pbb(X_n^\gamma \in \msa)$, with
-$X_0^\gamma = x$.  GRWs are appealing because, under mild conditions, when the
-stepsize $\gamma \to 0$, they converge towards $(\bfX_t)_{t \geq 0}$ solution of
-\eqref{eq:generic} in the following sense:
-
-\begin{theorem}[{{Convergence of geodesic random walk \cite[Theorem 2.1]{jorgensen1975central}}}]
-  \label{thm:grw_diffusion}
-  Under the conditions of \cref{thm:jorgensen_appendix}, for any $t \geq 0$,
-  $f \in \rmc(\M)$  we have that
-  $\lim_{\gamma \to 0} \normLigne{ \Qker_{\gamma}^{\ceil{t/\gamma}}[f] -
-    \Pker_t[f]}_{\infty} = 0$, where $(\Pker_t)_{t \geq 0}$ is the
-  semi-group associated with the infinitesimal generator
-  $\generator: \ \rmc^\infty(\M) \to \rmc^\infty(\M)$ given for any
-  $f \in \rmc^\infty(\M)$ by
-  $\generator(f) = \langle b, \nabla f \rangle_{\M} + (1/2) \langle
-  \Sigma, \nabla^2f \rangle_{\M}$.
-\end{theorem}   
-
-In particular if $b = 0$ and $\sigma = \Id$, then the random walk
-converges towards a Brownian motion on $\M$ in the sense of the convergence
-of semi-groups.
-% In this case, for any $x \in \M$ in local coordinates we
-% have that $\Phi_\# \nu_x$ has zero mean and covariance matrix $G(x)$, where
-% $\Phi$ is a local chart around $x$ and
-% $G(x) = (g_{i,j}(x))_{1 \leq i,j \leq d}$ the coordinates of the metric in
-% that chart.
-One advantage of GRW is that they allow to samples from arbitrary diffusions
-under mild assumptions. This property will be key to sample from the backward
-process. \cref{thm:grw_diffusion} can be extended to approximate time
-inhomogeneous diffusions. We leave the proof of this extension for future
-work. In \cref{alg:grw}, we remind how to approximately sample from a diffusion
-$(\bfX_t)_{t \in \ccint{0,T}}$ using GRWs, where $(\bfX_t)_{t \in \ccint{0,T}}$
-associated with the family of infinitesimal generator
-$(\generator_t)_{t \in \ccint{0,T}}$ given for any $t \in \ccint{0,T}$ and
-$f \in \rmc^2(\M)$ by
-$\generator_t(f) = \langle b_t, \nabla f \rangle + \langle \Sigma_t, \nabla^2 f
-\rangle$, where $b: \ \ccint{0,T} \to \XM$, $\Sigma_t = \sigma_t \sigma_t^\top$
-with $\sigma_t : \ \ccint{0,T} \to \XMdeux$. For simplicity, in \cref{alg:grw},
-we assume that $\M$ is embedded in $\rset^p$ and use the projection to define
-the noise on the tangent space (such an embedding always exists using the Nash
-embedding theorem, see \cite{gunther1991isometric} for example).  In a more
-general setting, we compute the noise on the tangent space using local
-coordinates.
-
-
-\begin{algorithm}[!t]
-\caption{\small Geodesic Random Walk (GRW)}
-\label{alg:grw}
-\begin{algorithmic}[1]
- \small
-  \Require $T, K, X_0, b, \sigma, P$
-  \State $\gamma = T / K$ \Comment Step-size
-  \For{$k \in \{0, \dots, K-1\}$}
-  \State $\bar{Z}_{k+1} \sim \mathcal{N}(0, I_p)$ \Comment Standard Gaussian in ambient space $\rset^p$
-  \State $Z_{k+1} = P(X_k) \bar{Z}_{k+1}$ \Comment Projection in the tangent space $\mathrm{T}_x \M$ 
-  \State $V_{k+1} = \gamma b(k \gamma, X_k) + \sqrt{\gamma} \sigma(k \gamma, X_k) Z_{k+1}$ \Comment Euler-Maruyama step on tangent space 
-  \State $X_{k+1} = \exp_{X_k}\left(V_{k+1}\right)$ \Comment Geodesic projection onto $\M$
-  \EndFor
-  \State {\bfseries return} $\{ X_k\}_{k=0}^{K-1}$
-\end{algorithmic}
-\end{algorithm}
-
-\paragraph{Heat kernel on compact Riemannian manifolds}
-The semi-group of the Brownian motion $(\Pker_t)_{t \geq 0}$ (called the heat
-kernel) admits a density w.r.t.\ $\piinv$, such that for any $f \in \rmc(\M)$,
-$x_0 \in \M$ and $t > 0$ we have
-\begin{equation}
-  \textstyle{
-    \updelta_{x_0} \Pker[f] = \int_{\M} f(x_t)p_{t|0}(x_t|x_0)  \rmd \piinv(x_t).
-    }
-\end{equation}
-In addition, this transition density is positive and
-$(t,x,y) \mapsto p_{t|0}(y|x) \in \rmc^\infty(\ooint{0,+\infty} \times \M \times
-\M)$ and satisfies the heat equation $\partial_t p_{t|0} = \Delta
-p_{t|0}$. However, contrary to the Gaussian transition density of the
-Ornstein--Ulhenbeck process, it is typically only available as an infinite
-series. In order to circumvent this issue we consider two
-techniques: \begin{enumerate*}[label=\roman*)]
-\item a truncation approach, 
-\item a Taylor expansion around $t=0$, \ie \ a Varadhan asymptotics.
-\end{enumerate*}    
-%
-First, we recall that in the case of compact manifolds we have that for any
-$t > 0$ and $x, y \in \M$
-\begin{equation}
-  \label{eq:infinite_sum}
-  \textstyle{p_{t|0}(x,y) = \sum_{j \in \nset} \rme^{-\lambda_j t} \phi_j(x)\phi_j(y),}
-\end{equation}
-where the convergence occurs in $\mathrm{L}^2(\piinv \otimes \piinv)$,
-$(\lambda_j)_{j \in \nset}$ and $(\phi_j)_{j \in \nset}$ are the
-eigenvalues, respectively the eigenvectors, of $-\Delta_\M$ in
-$\mathrm{L}^2(\piinv)$ \cite[see][Section 2]{saloff1994precise}. When the eigenvalues and eigenvectors are known, we approximate the
-logarithmic gradient of $p_{t|0}$ by truncating the sum in
-\cref{eq:infinite_sum} with $J \in \nset$ terms to obtain for any
-$t > 0$ and $x,y \in \M$
-\begin{equation}
-  \nabla_x \log p_{t|0}(x,y) \approx \textstyle{S_{J,t}(x,y) = \sum_{j=0}^J \rme^{-\lambda_j t} \nabla \phi_j(x) \phi_j(y) / \sum_{j=0}^J \rme^{-\lambda_j t} \phi_j(x) \phi_j(y). }
-\end{equation}    
-Note that for any $t \geq 0$, $x, y\in \M$,
-$S_{J,t}(x,y) \in \mathrm{T}_x \M$.  Under regularity conditions on $\M$ it can be
-shown that for any $x,y \in \M$ and $t \geq 0$,
-$\lim_{J \to +\infty} S_{J,t} = \nabla_x \log p_{t|0}(x,y)$ \cite[see][Lemma
-1]{jones2008Manifold}. In the case of the $d$-dimensional torus or sphere
-the eigenvalues and eigenvectors are known, \cite[see][Section
-2]{saloff1994precise} and we can apply this method to approximate $p_{t|0}$
-for any $t > 0$. We refer to \cref{sec:eigenf-eigenv-lapl} for more details
-about eigenvalues and eigenfunctions of the Laplace-Beltrami operator in the
-special case of the $d$-dimensional torus and sphere.  \valentin{experiment:
-  quality o the approximation as a function of $J$ and $t$. On the same
-  graph put the Varadhan approx}
-
-When the eigenvalues and eigenvectors are not tractable, we
-can still derive an approximation of the heat kernel for small times $t$. Using
-Varadhan's asymptotics---see \citet[Theorem 3.8]{bismut1984large} or
-\citet[Theorem 2.1]{chen2021logarithmic}---for any $x, y \in \M$ with
-$y \notin \mathrm{Cut}(x)$ (where $\mathrm{Cut}(x)$ is the cut-locus of $x$
-in $\M$) we have that \cite[see][Chapter 10]{lee2018introduction} 
-\begin{equation}
-  \label{eq:varadhan}
-  \textstyle{\lim_{t \to 0} t \nabla_y \log p_{t|0}(x,y) = - \exp^{-1}_x(y) . }
-\end{equation}
-Note that since the cut-locus has null measure under the uniform distribution
-$\piinv$ \citep[Theorem 10.34]{lee2006riemannian}, the previous relation is
-valid almost everywhere.  We will see in \cref{sec:riem-score-appr} that an
-approximation for any $x, y \in \M$ of $t \nabla_y \log p_t(x,y)$ for small
-values of $t \geq 0$ is enough to define a score approximation.
-
-
-\subsection{A manifold time-reversal formula}
-\label{sec:time-revers-form}
-
-After having defined the forward noising process targeting a reference
-distribution, a second key ingredient of SGMs is to derive a time-reversal
-formula. Namely, if $(\bfX_t)_{t \in \ccint{0,T}}$ is a diffusion process then
-$(\bfX_{T-t})_{t \in \ccint{0,T}}$ is also a diffusion process w.r.t.\ the
-backward filtration whose coefficients can be computed, see
-\cref{sec:time-reversal}.  Our next result is the Riemannian
-counterpart to the Euclidean time-reversal formula, see \citet[Theorem
-4.9]{cattiaux2021time} and \citet{haussmann1986time} for instance, which states
-under mild regularity and integrability conditions if the $\rset^d$-valued
-process $(\bfX_t)_{t \in \ccint{0,T}}$ is a (weak) solution to the SDE
-\begin{equation}
-  \rmd \bfX_t = b(\bfX_t) \rmd t +  \rmd  \bfB_t  , 
-\end{equation}
-then $(\bfY_t)_{t \in \ccint{0,T}} = (\bfX_{T-t})_{t \in \ccint{0,T}}$ is a
-(weak) solution to the SDE
-\begin{equation}
-  \rmd \bfY_t = \{-b(\bfY_t) + \nabla \log p_{T-t}(\bfY_t) \}\rmd t + \rmd \bfB_t  , 
-\end{equation}
-In the case where $(\bfX_t)_{t \in \ccint{0,T}}$ is an Ornstein-Ulhenbeck
-process, then we recover \cref{eq:backward_SDE}. 
-
-\begin{theorem}[Reverse diffusion]
-  \label{thm:time_reversal_manifold}
-  Let $T \geq 0$ and $(\bfB_t^\M)_{t \geq 0}$ be a Brownian motion on $\M$ such
-  that $\bfB_0^\M$ has distribution $\piinv$.  Let
-  $(\bfX_t)_{t \in \ccint{0,T}}$ associated with the SDE
-  $\rmd \bfX_t = b(\bfX_t) \rmd t + \rmd \bfB_t^\M$.  Let
-  $(\bfY_t)_{t \in \ccint{0,T}} = (\bfX_{T-t})_{t \in \ccint{0,T}}$ and assume
-  that $\KLLigne{\Pbb}{\Qbb} < +\infty$, where
-  $\Qbb \in \Pens(\rmc(\ccint{0,T}, \M))$ is the distribution of
-  $(\bfB_t^\M)_{t \in \ccint{0,T}}$. In addition, assume that for any
-  $t \in \ccint{0,T}$, $\Pbb_t$ admits a smooth positive density $p_t$ w.r.t.\
-  $\piinv$. Then, we have that $(\bfY_t)_{t \in \ccint{0,T}}$ is associated with
-  the SDE
-  \begin{equation}
-    \label{eq:time_reversal_manifold}
-   \rmd \bfY_t = \{-b(\bfY_t) + \nabla \log p_{T-t}(\bfY_t)\} \rmd t + \rmd \bfB_t^\M. 
-  \end{equation}
-\end{theorem}
-
-\begin{proof}
-  The proof is a smooth extension of \citet[Theorem 4.9]{cattiaux2021time} to the
-  Riemannian manifold case. We postpone the detailed proof to
-  \cref{sec:time-reversal}.
-\end{proof}
-
-Note the formula obtained for the drift of the time reversed process are the
-same in the Euclidean and the Riemannian settings upon replacing the Euclidean
-gradient operator, Laplacian and scalar product by the Riemannian ones.  As a
-corollary of \cref{thm:time_reversal_manifold}, we get the following result.
-
-% Recall that in the case where the manifold is isometrically embedded in $\rset^p$
-% for some $p \geq d$ then $(\bfB_t^\M)_{t \in \ccint{0,T}}$ satisfies \eqref{eq:brownian_motion_extrinsic}.
-% Therefore, if we let $(\bfY_t)_{t \in \ccint{0,T}}= (\bfB_{T-t}^\M)_{t \in \ccint{0,T}} $ we have that for any $j \in \{1, \dots, p\}$
-% \begin{equation}
-% ....
-% \end{equation}
-%\valentin{I dont even see now how we can get the extrinsic result!}
-
-\begin{corollary}
-  Under the conditions of \cref{thm:time_reversal_manifold}, denote
-  $(\Pker_t)_{t \in \ccint{0,T}}$ and
-  $(\Qker_{s,t})_{s, t \in \ccint{0,T}, t\geq s}$ the semi-group associated with
-  $\Pbb$, respectively $R(\Pbb)$. Then:
-  \begin{enumerate}[label= (\alph*),  wide, labelwidth=!, labelindent=0pt]
-  \item $(\Pker_t)_{t \in \ccint{0,T}}$ is associated with the generator
-          $\generator: \ \rmc^\infty(\M) \to \rmc^\infty(\M)$ given for any
-      $f \in \rmc^\infty(\M)$ by
-      $\generator(f) = \langle b, \nabla f \rangle_{\M} + (1/2) \Delta_\M f$.
-  \item $(\Qker_{t|s})_{s,t \in \ccint{0,T}, t\geq s}$ is associated with the family of  generators $(\generator_u)_{u \in \ccint{0,T}}$ such that for any $u \in \ccint{0,T}$, 
-          $\generator_u: \ \rmc^\infty(\M) \to \rmc^\infty(\M)$ is given for any
-      $f \in \rmc^\infty(\M)$ by
-      $\generator_u(f) = \langle -b + \nabla \log p_{T-u}, \nabla f \rangle_{\M} + (1/2) \Delta_\M f$.
-  \end{enumerate}
-\end{corollary}
-
-In particular, we can approximate the time-reversed process using a GRW using
-\cref{thm:grw_diffusion}.
-
-\subsection{Score approximation on Riemannian manifolds}
-\label{sec:riem-score-appr}
-
-The last ingredient in order to define the (compact) Riemannian manifold
-extension of SGM is an approximation of the logarithmic gradient appearing in
-\cref{eq:time_reversal_manifold}.
-
-
-\paragraph{Score-matching and loss functions}
-We aim to approximate $\nabla \log p_t(x)$ for every $t \in \ocint{0,T}$ and
-$x \in \M$. To do so, we first remark that for any $s,t \in \ocint{0,T}$ with $t > s$ and
-$x_t \in \M$, $p_t(x_t) = \int_{\M} p_{t|s}(x_t|x_s) \rmd \Pbb_s(x_s)$.  Therefore,
-we obtain that for any $s, t \in \ccint{0,T}$ with $t > s$ and $x_t \in \M$
-  \begin{equation}
-  \textstyle{
-    \nabla \log p_t(x_t) = \int_{\M} \nabla_x \log p_{t|s}(x_t|x_s) \Pker_{s|t}(x_t, \rmd x_s)  .
-    }    
-  \end{equation}
-  Hence, for any $s, t \in \ccint{0,T}$ with $t > s$ we have that
-  \begin{equation}
-    \textstyle{
-      \nabla \log p_t = \argmin \ensembleLigne{\ell_{t|s}(s_t)}{s_t \in \rmL^2(\Pbb_t)}  , \quad \ell_{t|s}(s_t) = \int_{\M \times \M} \normLigne{\nabla_x \log p_{t|s}(x_t|x_s) - s_t(x_t)}^2 \rmd \Pbb_{s,t}(x_s,x_t)   .
-      }
-    \end{equation}
-    The loss function $\ell_{t|s}$ is called the Denoising Score Matching (DSM)
-    loss. It can also be written in an \emph{implicit} fashion.
-    \begin{proposition}
-      \label{prop:implicit_der}
-      Let $t \in \ocint{0,T}$. If $s_t \in \rmc^\infty(\M)$ then we have that  $\ell_{t|s}(s_t) = 2 \ellim_t(s_t) + \int_{\M \times \M} \normLigne{\nabla \log p_{t|s}(x_t|x_s)}^2 \rmd \Pbb_{s,t}(x_s,x_t)$, where
-      \begin{equation}
-        \textstyle{
-          \ellim_t(s_t) = \int_\M \{ \tfrac{1}{2}\normLigne{s_t(x_t)}^2 + \dive(s_t)(x_t) \}  \rmd \Pbb_t(x_t)  . 
-          }
-      \end{equation}
-
-    \end{proposition}
-
-    \begin{proof}
-      The proof is postponed to \Cref{sec:implicit-losses}.
-    \end{proof}
-
-    For any $t \in \ocint{0,T}$ the minimizers of the loss $\ellim_t$ on $\XM$
-    are the same as the ones for $\ell_{t|s}$. The loss $\ellim_t$ is called the
-    \emph{implicit} score matching (ISM) loss (or sliced score matching (SSM)
-    loss if the divergence is approximated using the Hutchinson's trace
-    estimator \cite{hutchinson1989stochastic}).  Depending on the assumptions on
-    the specific manifold at hand it may be more convenient to use $\ell_{t|s}$
-    or $\ellim_t$.  Assume that we have access to
-    $\ensembleLigne{\nabla \log p_{t|s}}{s, t \in \ccint{0,T}, \ t > s}$ or an
-    approximation of this family, then we can use $\ell_{t|s}$, the
-    \emph{explicit} score function to learn
-    $\ensembleLigne{s_t}{t \in \ccint{0,t}}$. Using the results of
-    \cref{sec:brown-moti-comp}, we highlight to methods to approximate
-    $\ell_{t|s}$:
-    \begin{enumerate}[label= (\alph*),  wide, labelwidth=!, labelindent=0pt]
-    \item If we have access to an approximation of
-      $\ensembleLigne{p_{t|0}}{t \in \ocint{0,T}, \ t}$ then $\ell_{t|0}$ can be
-      used. Note that this loss is similar to the one used in the Euclidean
-      setting, see
-      \citep{song2020score,song2020improved,song2020denoising,ho2020denoising}
-      for instance. In the case, where the eigenvalues and the eigenfunctions of
-      the Laplace-Beltrami operator are known then such an approximation is
-      available, see \cref{sec:brown-moti-comp}. However, the quality of the
-      approximation deteriorates when $t$ is close to
-      $0$. % In particular, in the case of
-      % the sphere or the torus, we use this loss function as our baseline, see
-      % \cref{sec:experiments}. \valentin{TO MODIFY}
-    \item If we do not have access to the eigenvalues and eigenfunctions of the
-      Laplace-Beltrami operators then we can still derive an approximation of
-      the $\nabla \log p_{t|s}$ for all $s \in \ccint{0,t}$ if $\abs{t-s}$ is
-      small enough, using Varadahn type approximations \eqref{eq:varadhan} and
-      the inverse of $\exp$\footnote{If $\exp^{-1}$ is not available then it can
-        be estimated using approximated logarithmic mappings
-        \citep{goto2021approximated,schiela2020sqp} or inverse retractions
-        \citep{zhu2020riemannian,sato2019riemannian}.}. In this case we use the
-      loss functions $\ell_{t|s}$ for $\absLigne{t-s}$ small enough.
-    \end{enumerate}
-    We highlight that these two methods can be used in conjunction. For
-    instance, one can rely on the truncation techniques to estimate $\ell_{t|0}$
-    for large $t$ and the Varadhan asymptotics for small $t$.
-    
-    Last but not least, the \emph{implicit} score loss $\{\ellim_t\}_{t=0}^T$ is
-    used in cases where we do not have access to the approximations of $p_{t|s}$
-    for $s,t \in \ccint{0,T}$ with $t > s$. The only requirement to learn the
-    implicit score is to be able to (approximately) sample from the forward
-    dynamics, i.e. the Brownian motion on the Riemannian manifold. In
-    particular, no approximation of the logarithmic derivative of the heat
-    kernel is needed. One downside of using such an approach is that it relies
-    on the computation of the divergence of the score $s_t$. The exact
-    computation of the divergence is too costly in high dimension as it requires
-    $d$ Jacobian-vector calls and estimators need to be used
-    \cite{hutchinson1989stochastic}. Note that the loss function used in
-    \citep{rozen2021moser} also involves computing a divergence. We summarize our
-    different loss functions in \cref{tab:sm_losses}.
-
-\begin{table}[h]
-\centering
-\small
-\renewcommand*{\arraystretch}{1.4}
-\begin{tabular}{ccl}
-Method & Loss function  & Requirements \\
-\midrule
-$\ell_{t|0}$ (DSM)   &  $\frac{1}{2} \E \left[ \| s(\bfX_t) - \nabla \log p_{t|0}(\bfX_t|\bfX_0) \|^2 \right]$ &  \vtop{\hbox{\strut $\triangleright$ Sampling of $(\bfX_t, \bfX_0)$}\hbox{$\triangleright$ Approximation of $\nabla \log p_{t|0}$}}  \\ %\hline
-$\ell_{t|s}$ (DSM)   &  $\frac{1}{2} \E \left[ \| s(\bfX_t) - \nabla \log p_{t|0}(\bfX_t|\bfX_s) \|^2 \right]$ & \vtop{\hbox{\strut $\triangleright$ Sampling of $(\bfX_t, \bfX_s)$ for $\abs{t-s}$ small}\hbox{$\triangleright$ Approximation of $\nabla \log p_{t|s}$ for $\abs{t-s}$ small}} \\ %\hline
-$\ellim_t$ (ISM)  &  $\E \left[\frac{1}{2} \| s(\bfX_t) \|^2 + \dive( s)(\bfX_t)  \right]$  & \vtop{\hbox{\strut $\triangleright$ Sampling of $\bfX_t$}\hbox{$\triangleright$ Approximation of $\dive(\bm{s}_\theta)$}}
-\end{tabular}
-\caption{\small Riemannian score matching losses.}
-\label{tab:sm_losses}
-\end{table}
-
-\paragraph{Parametric family of vector fields}
-% We need to define a parametric family of functions in order to
-We approximate $\{\nabla \log p_t\}_{t=0}^T$ by a
-family of function $\{\bm{s}_\theta\}_{\theta \in \Theta}$ where $\Theta$ is a
-set of parameters and for any $\theta \in \Theta$,
-$\bm{s}_\theta: \ \ccint{0,T} \to \XM$. In this work, we consider several
-parameterisations of vector fields:
-%
-\begin{itemize}
-\item \textbf{Projected vector field}. We define
-  $\bm{s}_\theta(t, x) = \text{proj}_{T_{x}\M}(\tilde{\bm{s}}_\theta(t, x)) = P(x)
-  \tilde{\bm{s}}_\theta(t, x) $ for any $t \in \ccint{0,T}$ and $x \in \M$, with
-  $\tilde{\bm{s}}_\theta: \ \rset^p \times \ccint{0,T} \to \rset^p$ an ambient vector
-  field and $P(x)$ the orthogonal projection over $\mathrm{T}_x\M$ at $x \in M$.
-  According to \citet[Lemma 2]{rozen2021moser}, then
-  $\dive(s_\theta)(x,t) = \dive_E(s_\theta)(x,t)$ for any $x \in \M$, where
-  $\dive_E$ denotes the standard Euclidean divergence.
-    % \mjh{We use the same trick in the vec field GP paper} \emile{We also do that in practice in \citep{mathieu2020riemannian}}
-    
-
-\item \textbf{Divergence-free vector fields}: For any compact \valentin{I think
-    this is needed? Maybe not} Lie group, any basis of the Lie algebra $\mathfrak{g}$
-  yields a global frame. Indeed, let $v \in \mathfrak{g}$ and define the flow
-  $\Phi: \ \rset \times \M \to \M$ given for any $t \in \rset$ and $x \in M$ by
-  $\Phi_t^v(x) = x \exp(t v)$. Then defining
-  $\{E_i\}_{i=1}^d = \{\partial_t \Phi_0^{v_i}\}_{i=1}^d$, where
-  $\{v_i\}_{i=1}^d$ is a basis of $\mathfrak{g}$, we get that $\{E_i\}_{i=1}^d$
-  is a left-invariant global frame. As a result, we have that for any
-  $i \in \{1, \dots, d\}$, $\dive(E_i)=0$ (for the classical left invariant
-  metric). This result simplifies the computation of $\dive(\bm{s}_\theta)$ where
-  $\bm{s}_\theta(t,s) = \sum_{i=1}^d \bm{s}^i_\theta(t,x) E_i(x)$ for any
-  $t \in \ccint{0,T}$ and $x \in \M$ \cite[see][]{falorsi2020neural}.
-%   We define
-%   $s_\theta(x, t) = \sum_{i=1}^d s^i_\theta(x, t) f^i(x)$ for any
-%   $t \in \ccint{0,T}$ and $x \in \M$, where for any $i \in \{1, \dots, d\}$,
-%   $f_i(x) = \partial_t \exp( \cdot \xi_i)(0) \cdot x$ (with some frame
-%   $\{\xi_i\}_{i=1}^d$). For any homogeneous space, this family of vector fields
-%   generates the tangent bundle and is divergence-free \valentin{ref}.
-%     % \mjh{Is the divergence free nature of the field going to impose some extra constraint on the score function, and the resulting process?}
-%     % The $f^i$ are divergence free but the linear combination which yields $s_\theta$ has no constraint.
-%   Then for any $t \in \ccint{0,T}$ and $x \in \M$, we have
-%   $\dive(s_\theta)(x, t) = \sum_{i=1}^d \dive(s^i_\theta(\cdot, t) f^i)(x)  = \sum_{i=1}^d \langle \nabla
-%     s^i_\theta(x, t), f^i(x)\rangle$. \valentin{this can be estimated stochastically. How?}
-% %    For more information see notes at \url{https://www.overleaf.com/read/thvfprqwmkjq}.
-%     % Not sure whether this can be useful here but this term does appear in standard (non-denoising) score matching \citep{hyvarinenEstimation,song2019Sliced}. }
-\item \textbf{Coordinates vector fields}. We define
-  $\bm{s}_\theta(t, x) = \sum_{i=1}^d \bm{s}^i_\theta(t,x) E_i(x)$ for any
-  $t \in \ccint{0,T}$ and $x \in \M$, with
-  $\{E_i\}_{i=1}^d = \{\partial_i \varphi(x)\}_{i=1}^d$ the vector fields
-  induced by a choice of local coordinates, where $\varphi$ is a local
-  parameterization $\varphi: \ \msu \to \M$ and $z \in \msu \subset
-  \rset^d$. Then the divergence can be computed in these local coordinates
-  $\dive(\bm{s}_\theta)(t, \varphi(z)) =\absLigne{\det G}^{-1/2} \sum_{i=1}^d
-  \partial_i \{ \absLigne{\det G}^{1/2} \bm{s}^i_\theta(t,
-  \varphi(\cdot))\}(z)$. In the case of the sphere, one recovers the standard
-  divergence in spherical coordinates using this formula.
-  % If the manifold is not
-  % parallelisable, there does not exist a global frame, which implies that
-  % $\{f^i(x)\}$ is not a basis for any $x \in \M$. Think of the Hairy-ball
-  % theorem for the (n-)sphere.
-  % \mjh{but we don't actually need a basis. We just need a smooth set of basis
-  % vector fields that span the tangent space. The fields can be redundant
-  % e.g. 3 axis fields on the sphere. We then just mix these smooth fields with
-  % smooth scalar coeffs from an nn and mix them to get a smooth field}
-  % \emile{Agree that we don't need a basis, we only need a generator of the
-  % tangent bundle.}
-\end{itemize}
-\emile{We do not discuss NN architectural choices for $\{s_\theta^i\}_i$ but can do for the next iteration.}
-%
-Combining this parameterization with the score-matching losses, the
-time-reversal formula \cref{sec:time-revers-form} and the sampling of forward
-and backward processes \cref{sec:brown-moti-comp}, we now define our Riemannian
-Score-based Generative Modeling algorithm, in \cref{alg:rsgm}.
-
-
-  \begin{algorithm}[!t]
-   \caption{\small Computation of the loss}
-   \label{alg:rsgm}
-   \begin{algorithmic}[1]
-     \small
-     \Require $\vareps, T, K, \pizero, \mathrm{loss}, \mathrm{thres}, s$
-     \State $\bfX_0 \sim \pizero$
-     \State $t \sim U(\ccint{\vareps, T})$ \Comment Uniform sampling between $\vareps$ and $T$
-     \State $\bfX_t \sim \Pker_{t|0}(\bfX_0, \cdot)$ \Comment Approximate sampling using \cref{alg:grw} 
-     \If{$\mathrm{loss = denoising}$} \Comment Denoising loss function
-     \If{$t < \mathrm{thres}$}
-     \State $\mathrm{score} = -(1/t) \exp^{-1}_{\bfX_t}(\bfX_0)$ \Comment Varadhan asymptotics
-     \Else
-     \State $\mathrm{score} = \sum_{j=0}^J \rme^{-\lambda_j t} \nabla \phi_j(x) \phi_j(y) / \sum_{j=0}^J \rme^{-\lambda_j t} \phi_j(x) \phi_j(y).$ \Comment Series truncation
-     \EndIf
-     \State $\ell(s) = \norm{s(\bfX_t) - \mathrm{score}}^2$
-     \Else \Comment Implicit loss function
-     \State $\ell(s) = (1/2) \norm{s(\bfX_t)}^2 + \dive(s)(\bfX_t)$
-     \EndIf
-      \State {\bfseries return} $\ell(s)$
-    \end{algorithmic}
-    \end{algorithm}
-
-
-\subsection{Likelihood computation}
-\label{sec:likel-comp}
-
-Similarly to \cite{song2020score}, once the score is learned we can use it
-in conjunction with an Ordinary Differential Equation (ODE) solver to compute
-the likelihood of the model. Let $\{\Phi_t\}_{t=0}^T$ be a family of vector
-fields. We define $(\bfX_t)_{t \in \ccint{0,T}}$ such that $\bfX_0$ has
-distribution $p_0$ (the data distribution) and satisfying
-$\rmd \bfX_t = \Phi_t(\bfX_t) \rmd t$. Assuming that  $p_0$ admits a density
-w.r.t.\ $\piinv$ then for any $t \in \ccint{0,T}$, the distribution of $\bfX_t$
-admits a density w.r.t.\ $\piinv$ and we denote $p_t$ this density.  We recall that
-$\partial_t \log p_t(\bfX_t) = \dive(\Phi_t)(\bfX_t)$, see \citet[Proposition 
-2]{mathieu2020riemannian} for instance.
-
-Recall that we consider a Brownian motion on the manifold as a forward process
-$(\bfB_t^\M)_{t \in \ccint{0,T}}$ with $\{p_t\}_{t=0}^T$ the associated family
-of densities. Thus we have that for any $t \in \ccint{0,T}$ and $x \in \M$
-\begin{equation}
-  \partial_t p_t(x) = \tfrac{1}{2} \Delta p_t(x) = \dive\left(\tfrac{1}{2} p_t \nabla \log p_t \right)(x)  . 
-\end{equation}
-Hence, we can define $(\bfX_t)_{t \in \ccint{0,T}}$ satisfying
-$\rmd \bfX_t = \tfrac{1}{2} \nabla \log p_t(\bfX_t) \rmd t$ such that $\bfX_0$ has
-distribution $p_0$.
-Defining
-$(\bfhX_t)_{t \in \ccint{0,T}} = (\bfX_{T-t})_{t \in \ccint{0,T}}$, it follows
-that $\bfhX_0$ has distribution $\mathcal{L}(\bfX_T)$ and satisfies
-\begin{equation}
-  \label{eq:backward_flow}
- \rmd \bfhX_t =-\tfrac{1}{2} \nabla \log p_{T-t}(\bfhX_t) \rmd t  . 
-\end{equation}
-Finally, we introduce $(\bfY_t)_{t \in \ccint{0,T}}$ satisfying
-\eqref{eq:backward_flow} but such that $\bfY_0 \sim \piinv$.  Note
-that if $T \geq 0$ is large then the two processes
-$(\bfY_t)_{t \in \ccint{0,T}}$ and $(\bfhX_t)_{t \in \ccint{0,T}}$ are close
-since $\mathcal{L}(\bfX_T)$ is close to $\piinv$.  Therefore, using the score
-network and a manifold ODE solver \citep[as in][]{mathieu2020riemannian}, we
-are able to approximately solve the following ODE
-\begin{equation}
-  \partial_t \log q_t(\bfY_t) = -\tfrac{1}{2}\dive(\bm{s}_\theta(t,\cdot))(\bfY_t)  ,
-\end{equation}
-with $q_t$ the density of $\bfY_t$ w.r.t.\ $\piinv$ and $\log q_0(\bfY_0) =
-0$. The likelihood approximation of the model is then given by $\log q_T(\bfY_T)$. In
-\cref{sec:diff-betw-ode}, we highlight that this likelihood computation is
-slightly different from the one obtained using the SDE.
-
-
-%%% Local Variables:
-%%% mode: latex
-%%% TeX-master: "main"
-%%% End:
diff --git a/doc/stuff.tex b/doc/stuff.tex
deleted file mode 100644
index fe57561..0000000
--- a/doc/stuff.tex
+++ /dev/null
@@ -1,425 +0,0 @@
-\part{Ideas score manifold, things to try, equivariance, etc.}
-
-\section{Score-based generative modelling}
-
-The overall idea of score-based generative models is to construct a diffusion process mapping the data distribution $p_0$ into a tractable noise distribution---invariant wrt the diffusion process $p_T$.
-The Wiener or Ornstein–Uhlenbeck processes are typical choices for the diffusion process.
-It is known that a diffusion process is a Markov process under time reversal \cite{u.g.haussmann1986Time}.
-Then one can generate new samples from the data distribution by sampling noise $x(T) \sim p_T$ and simulating the reverse diffusion process.
-Below we briefly remind key steps of score-based diffusion models.
-
-\subsection{Forward process: perturbating data}
-
-% \todo[inline]{existence}
-A diffusion process $\{X(t)\}_{t\in[0,T]}$ can be modelled as the solution to an Ito SDE [REF?]:
-\begin{definition}[Ito SDE]
-    \label{def:ito-sde}
-    \begin{align}
-    dX_t = f(X_t, t) dt + G(X_t, t) dB_t
-    \end{align}
-    with $B_t$ is a standard Brownian motion, $f: \R^d \times \R \rightarrow \R^d$ a drift coefficient and $G:\R^d \R \rightarrow \R^{d \times d}$ a diffusion coefficient.
-\end{definition}
-SDEs of this form can be discretised and simulated via a Euler-Maruyama discretisation:
-\begin{definition}[Ito SDE discretisation]
-For the set of times $0=\tau_0 < \dots < \tau_N = T$, $\Delta t_i = \tau_{i+1} - \tau_{i}$, and $\Delta B_k \sim B_{\tau_{k+1}} - B_{\tau_k}$, an Ito SDE can be discretised as 
-\begin{align}
-    X_{k+1} = X_k + f(X_k, \tau_k) \Delta t_k + G(X_k, \tau_k) \Delta B_k
-\end{align}
-\end{definition}
-
-\subsection{Reverse process: generating data}
-The reverse-time process $\{ Y_t \}_{t\in [0,T]} = \{ X_{T-t} \}_{t\in [0,T]}$ is also a diffusion, and we have the following result
-\begin{theorem}[Reverse-time Ito SDE \cite{anderson1982reverse}]
-For a Ito SDE of the form of \cref{def:ito-sde}, with certain smoothness and growth properties which guarantee existence and uniqueness of a solution  \cite{kushner1974stochastic}, and the existence of the probability density $p(X_t,t)$ for $t_0 \leq t\leq T$ as a smooth and unique solution of its associated Kolmogorov equation. Also define an $\bar{W}_t$ by $\bar{W}_{0} = 0$ and 
-\begin{align}
-    d\bar{W}_t = dW_t + \nabla \cdot g(Y_t, t) + g(Y_t, t) \nabla \log p(Y_t, t)
-\end{align}
-then (skipping some) a reverse time model for $X_t$ is defined by 
-\begin{align}
-    dY_t = \bar{f}(Y_t, t) dt + G(Y_t, t) d\bar{W}_t
-\end{align}
-%where $$\bar{f}^i(X_t, t) = f^i(X_t, t) - \frac{1}{p(X_t, t)}\sum_{j, k} \frac{\partial}{\partial X_t^j}[p(X_t, t) g^{ik}(X_t, t)g^{jk}(X_t, t)]$$
-where
-\begin{align}
-    \bar{f}(Y_t, t) &= f(Y_t, t) - \nabla \cdot[G(Y_t, t)G(Y_t, t)^\top] - G(Y_t,t)G(Y_t, t)^\top \nabla \log p(Y_t, t) \label{eqn:rev_time_drift} 
-\end{align}
-\end{theorem}
-
-
-% %
-% The reverse-time process $\{Y_t\}_{t \in [0,T]} = \{X_{T-t}\}_{t \in [0,T]}$ is also a diffusion and satisfies \cite{u.g.haussmann1986Time}
-
-% \begin{align}
-% dY_t = [-f(Y_t, T-t) - g(T-t)^2 \nabla \log p_{T-t}(Y_t)] dt + g(T-t) dW.
-% \end{align}
-% This process can be simulated as
-% \begin{align}
-% X_k = (1 - \beta) X_{k+1} + 2 \nabla\log p_{k+1}(X_{k+1}) + \sqrt{2\beta} Z_{k+1}, \quad Z_{k+1} \sim \N(0, 1).
-% \end{align}
-\subsection{Density estimation}
-%
-For all diffusion processes, there exists a corresponding deterministic process whose trajectories share the same marginal probability densities $\{p(X_t)\}_{t\in[0,T]}$.
-
-This deterministic process satisﬁes an ODE \cite{song2020score}
-\begin{align} \label{eq:ode}
-dX_t = { f(X_t, t) - \frac{1}{2} \nabla \cdot[G(X_t, t)G(X_t, t)^\top] - \frac{1}{2} G(X_t,t)G(X_t, t)^\top \nabla \log p(X_t, t) } dt
-\end{align}
-which can be solved with classical numerical solvers (e.g.\ Runge-Kutta).
-
-
-\subsection{A special case}
-
-A particular case of interest comes when we set
-\begin{align}
-    f(X_t, t) &= -\beta(t) X_t \\
-    G(X_t, t) &= \sqrt{2 \beta(t)} \\
-    \beta(t) &: \R \to \R
-\end{align}
-I.e.
-\begin{align}
-dX_t = - \beta(t) X_t dt + \sqrt{2\beta(t)} dB_t
-\end{align}
-For this SDE we can compute the following limiting invariant distribution
-\begin{lemma}[Limiting distribution]
-For an Ito SDE of the form 
-\begin{align}
-dX_t = - \beta X_t dt + \sqrt{2\beta} dB_t
-\end{align}
-the limiting distribution
-\begin{align}
-    \lim_{t \to \infty} p({X}_t) = \c{N}(0, I)
-\end{align}
-\end{lemma}
-The form of \cref{eqn:rev_time_drift} becomes
-\begin{align}
-    \bar{f}(Y_t, t) &= - \beta(t) X_t - 2\beta(t) \nabla \log p(Y_t, t)\\
-    &= -\beta(t)(X_t + 2 \nabla \log p(Y_t, t)
-\end{align}
-
-This special case will be of interest for what follows.
-
-
-%
-\subsection{Training}
-In theory, we should be able to simply reverse the diffusion process from noise distribution to data to get our generative model.
-
-Unfortunately the \emph{Stein score} $\nabla \log p(X_t, t)$ is not available analytically, and therefore needs to be estimated, typically by a neural network, $s_\theta: \R^n \times [0,T] \rightarrow \R^n$ such that $s_\theta^\star(X_t, t) \approx \nabla_{X_t} \log p_t(X_t)$.
-As Stein score satisfies
-\begin{align}
-\nabla{X_t} \log p_t(X_t) = \E_{p_{0|t}}\left[ \nabla_{X_t} \log p(X_t|X_0) \right]
-\end{align}
-the optimal parameter $\theta^\star$ is optimised for such that
-%
-\begin{align}
-\theta^\star \in \argmin_\theta ~\E_t \left\{ \lambda(t) \E_{X(0), X(t)} \left[ \left| s_\theta(X(t),t) - \nabla_{X(t)} \log p(X(t)|X(0))\right|^2_2  \right] \right\}
-\end{align}
-with $\lambda: [0,T] \rightarrow \R^+$ (a tuneable hyperparameter), $t \sim U([0, T])$, $X(0) \sim p_0(X) \triangleq p_{\text{data}}(X)$ and $X(t) \sim p(X(t)|X(0))$ (which is normally distributed with closed-form mean and variance).
-
-\section{Extension: Generative modelling for functions}
-
-We now move to the setting where we care about modelling a stochastic process $\bf{f} \sim p_{data}(\bf{f})$ with $f: \R^n \rightarrow \R^d$.
-Let's assume we have access to sets of  samples 
-\begin{align}
-    { \{ \{x_1^j, \bf{y}_1^j\}, \dots,  \{x_{N_j}^j, \bf{y}_{N_j}^j}\} \}_{j=1}^N    
-\end{align}
-with $\bf{y}^j \sim f^j(x^j) + \bf{\epsilon}$ for several functions $\bf{f}^j \sim p_{data}(\bf{f})$.
-
-We want to construct a diffusion process $\{\bf{f}_t\}_{t=0}^T$ on the space of stochastic processes such that $p_0=p_{data}$ and $p_T \approx p_{prior} = p_{\text{GP}}(0, \kappa)$ (wlog the mean function is assume to be 0). One could chose a white noise kernel for this GP prior, similar to the previous section, or a more structured kernel, which may specify a prior distribution closer to the data distribution of interest, and build in some prior knowledge about the data distribution.
-As $\{\bf{f}_t\}_{t=0}^T$ is infinite dimensional, and in practice we only have access to finite data, we aim to construct a diffusion process on the \emph{finite-dimensional marginals} of the stochastic process $d\mY_t|\mX_{t=0}^T$ with $\mY_t = (\bf{y}_t^1, \dots, \bf{y}_t^M)$ conditioned on queries $\mX = (x^1, \dots, x^M)$. % so that $p_T(\mY_T | \x) \approx \N(\mY_T; 0,\kappa(\x)).$
-
-\subsection{Forward process: perturbing data}
-%
-Let's construct the following Ito SDE
-\begin{definition}[Finite-dimensional marginal Ito SDE]
-\begin{align}
-d\mY_t|\mX = -\beta \mY_t dt + \sqrt{2\beta} \kappa(\mX)^{1/2} ~d\mB_t
-\end{align}
-\end{definition}
-We can compute the invariant distribution of this diffusion
-\begin{align}
-    \lim_{t \to \infty} p(\mY_t | \mX_t) = \c{N}(0, \kappa(\mX))
-\end{align}
-Note this is not white noise, but a structured distribution specified by the kernel.
-
-% This diffusion can be discretised as
-% \begin{align}
-% p(\mY_{k+1}|\mY_{k},\mX) = \c{N}(\mY_{k+1}; - \beta \mY_{k} , 2\beta \kappa(\mX))
-% \end{align}
-% or equivalently
-% \begin{align}
-% \mY_{k+1} = (1 - \beta) \mY_{k} + \sqrt{2\beta} \kappa(\mX)^{1/2} \mz_{k}, \quad \text{with} \ \mz_{k} \sim \N(0, I_M).
-% \end{align}
-% \begin{align}
-% p(\mY_{k+1}|\mY_{k},\mX) = \N(\mY_{k+1}; - \beta \mY_{k} ,\sqrt{2} \kappa(\mX))
-% \end{align}
-
-\subsection{Reverse process: generating data}
-The reverse process is given by \cite{anderson1982reverse}.
-\begin{align}
-d\mY_{T-t}|\mX = \left[ -\beta \mY_{T-t} + 2 \kappa(\mX) \nabla_{\mY_{T-t}} \log p_{T-t}(\mY_{T-t} | \mX)\right] dt + \sqrt{2\beta} \kappa(\mX)^{1/2} ~d\mB_t
-\end{align}
-%
-% which can be discretised as
-% \begin{align}
-% p(\mY_{k}|\mY_{k+1},\x) = \c{N}(\mY_{k+1}; - \beta \mY_{k+1} + 2 \kappa(\mX) \nabla_{\mY_{k+1}} \log p(\mY_{k+1} | \mX), 2\beta \kappa(\mX))
-% \end{align}
-% or equivalently
-% \begin{align}
-% \mY_{k} = (1 - \beta) \mY_{k+1} + 2 \kappa(\mX) \nabla_{\mY_{k+1}} \log p(\mY_{k+1} | \mX) + \sqrt{2\beta} \kappa(\mX)^{1/2} \mz_{k+1}.
-% \end{align}
-
-\paragraph{Sampling}
-So as to sample $f_0 \sim p_0$ evaluated at $\mY_0=f_0(x)$, we first sample $f_T \sim p_{\text{GP}}(0, \kappa)$, evaluate $\mY_T = f(x)$, and eventually diffuse $\mY|x$ as described above.
-
-\subsection{Training}
-
-\paragraph{Score network}
-The Stein score is estimated by a neural network $s_\theta: (\R^n \times \R)^M \times [0,T] \rightarrow \R^M$ such that $s_\theta^\star(\mY_t, x, t) \approx \nabla_{\mY_t} \log p_t(\mY_t|x)$.
-The ordering of the samples is arbitrary so $s_\theta$ should be permutation invariant so that $s_\theta^\star(\sigma(\mY_t), \sigma(x), t) = s_\theta^\star(\mY_t, x, t)$ for any permutation $\sigma \in \Sigma_M$.
-Self-attention should also help.
-The architecture should be inspired from the ones used in Neural Processes \cite{garnelo2018conditional,garnelo2018neural}.
-
-\todo[inline]{Constraints from consistency?}
-
-\paragraph{Loss}
-The score network parameters $\theta$ are optimised such that
-\begin{align}
-\theta^\star \in \argmin_\theta ~\E_t \left\{ \lambda(t) \E_{\mY(0),\mY(t)|\mX} \left[ \left| s_\theta(\mY(t),\mX,t) - \nabla_{\mY(t)|\mX} \log p(\mY(t)|\mY(0),\mX) \right|^2_2  \right] \right\}
-\end{align}
-with $\lambda: [0,T] \rightarrow \R^+$, $t \sim U([0, T])$.
-
-\subsection{Stationary}
-
-One typically common equivairance in SPs is stationarity, it might be good to see how one would build this in give some data. We can also extend to more general symmetries al la \cite{holderrieth2021equivariant}. 
-
-Thinking in euclidean terms:
-\begin{itemize}
-    \item Translation equivariace can possibly come from a similar method to the point clopud paper with Marcel
-    \item Rotations etc from constraining the score function
-\end{itemize}
-
-\subsection{Conditioning on observations}
-
-Consider that we have observed some already observed data points $\{(\mathbf{x}_i, y_i)\}_{i=1}^{N_i}$ and we want to evaluated the SP on the
-SP on some new points $\{ \mathbf{x}_i \}_{i=1}^{\bar{N}_i}$. Can we construct such a process?
-
-Two directions maybe
-\begin{itemize}
-    \item Can we train the SP only on observed samples, then condition the "noise" GP on the observations, sample from this, then evolve those to get conditioned samples?
-    \item We could train the score functions conditionally, $s_\theta^\star(X_t, t, \{(\mathbf{x}_i, y_i)\}_{i=1}^{N_i}) \approx \nabla_{X_t} \log p_t(X_t | \{(\mathbf{x}_i, y_i)\}_{i=1}^{N_i})$, which should be trainable in the same way as before, just using a split context/target set.
-\end{itemize}
-
-
-\subsection{Applications}
-\cite{dupont2021Generative}
-
-
-
-
-\section{Extension: Learning distributions on manifolds}
-\todo[inline]{(Credits to Valentin and Arnaud for the original idea)}
-We assume that we are given a compact Riemannian manifold $(\M, g)$, with a data distribution $p_0$ having support on $\M$.
-We also assume $\M$ to be isometrically embedded in a Euclidean space, i.e.\ $\M \subset \R^d$, which always exists thanks to Nash embedding theorem (although it may be impractical in some cases).
-
-\subsection{Forward process: perturbating data}
-
-Let's construct the diffusion process $\{X(t)\}_{t=0}^T  \in \M$, as the solution to the following Stratonovich SDE %\footnote{\url{https://people.math.harvard.edu/~ctm/math219/home/sources/hsu.pdf}}\cite{elworthy1982Stochastic}:
-% with $W$ a standard Wiener process, $f: \R^d \times \R \rightarrow \R^d$ a drift coefficient and $g: \R \rightarrow \R$ a diffusion coefficient.
-\begin{align}
-dX_t = P(X_t) \circ dB_t
-\end{align}
-with $B_t$ a standard Brownian process in the ambient Euclidean space and $P(X):\R^d \rightarrow T_{X}\M$ the projection onto the tangent space. For instance one has $P(X) \xi = \xi - \langle \xi, X \rangle X$, for $X \in \M = \mathbb{S}^n, \xi \in \R^{n+1}$.
-%
-\todo[inline]{Is it better or simpler to define the Brownian motion intrinsically?}
-\mjh{probably? you can define Brownian motion as the process whose transition density function is the heat kernel on the manifold, which you get at via the Laplace-Beltrami operator. This will probably only matter if the time steps you do to approx the diffusion are large compared to the scale of the manifold.}
-\emile{Yes, they define the same object, so the considerations around the time step concerns more the discretisation scheme.}
-
-In local coordinates one can define a diffusion process as the solution of the Ito SDE
-\begin{align}
-dX_t =  G^{-1/2}(X_t) dB_t
-\end{align}
-with $G(X)$ the metric tensor in matrix form and $G^{-1/2}$ the (unique) symmetric square root of $G(X)^{-1}$.
-%
-
-\paragraph{Invariant prior}
-Since we assume $\M$ to be compact, $\text{Vol}(\M)$ is finite so the uniform distribution with density (w.r.t. to the measure induced by the Riemmanian metric) $p \propto 1/\text{Vol}(\M)$ is proper.
-It is invariant with respect to the Brownian diffusion.
-% \todo[inline]{What is the invariant prior? Riemmanian vs Warped Gaussian?}
-% The invariant prior is the Riemannian Gaussian which density (w.r.t. to the measure induced by the Riemmanian metric) is given by
-% \begin{align}
-% p(\x) \propto \exp\left\{ \frac{d_{\M}()^2}{} \right\}
-% \end{align}
-
-%
-This process can be simulated as \cite{hairer2011Solving} %via a Euler-Maruyama discretisation:
-\begin{align}
-% X_{k+1} = (1 - \beta) X_{k} + Z_k, \quad Z_k \sim \N(0, 1).
-% X_{k+1} = R_{X_{k}} \left( - \beta X_{k} + Z_k \right), \quad Z_k \sim \N(0, 1)
-X_{k+1} = R_{X_{k}} \left(G^{-1/2}(X_k) Z_k \right), \quad Z_k \sim \N(0, I_d) \in T_{X_{k}}\M 
-\end{align}
-with $R_{X_k}$ a \emph{retraction}\footnote{A \emph{retraction} $R_{x}$ is defined as a map $R_{x}: T_{x}\M \rightarrow \M$ such that there exists $r>0$ s.t.\ $d(R_{x}(s\bm{v}),\exp_{x}(s\bm{v})) \le rs^2$ for $s$ sufficiently small, $\|\bm{v}\|=1$.}.
-\emile{With $R_{x}=\exp_{x}$, $X_{k+1}$ is distributing according a \emph{Wrapped }Normal distribution (and not the \emph{Riemannian} normal, i.e. the maximum entropy Gaussian generalisation). This should not matter though.}
-% \begin{align}
-% % p(x(t+\delta)|x(t)) &\propto \exp \left\{ - \frac{\| \log_{x(t)}(x(t+\delta)) - f(x(t)) \|^2_{x(t)}}{2 \delta^2} \right\} \\
-% p(x(t+\delta)|x(t)) &\propto \exp \left\{ - \frac{d_\M(x(t+\delta), x(t))^2}{2 \delta^2} \right\}  \\
-%  &\propto \exp \left\{ -\frac{\| \log_{x(t)}(x(t+\delta)) \|^2_{x(t)}}{2 \delta^2} \right\}
-% \end{align}
-
-
-% \paragraph{Discretisation}
-
-% \begin{align}
-% p(x(t)|x(t+\delta)) 
-% &= p(x(t+\delta)|x(t)) p(x(t)) / p(x(t+\delta)) \\
-% &= p(x(t+\delta)|x(t)) \exp \left\{ \log  p(x(t)) - \log p(x(t+\delta)) \right\}  \\
-% &\propto \exp \left\{ - \frac{\| \log_{x(t)+\delta)}(x(t) \|^2_{x(t+\delta)}}{2 \delta^2}  \right\} \times \exp \left\{ -\langle \nabla \log p(x(t+\delta),\log_{x(t)+\delta}(x(t)) \rangle_{x(t+\delta)} \right\} \\
-% &\propto \exp \left\{ - \frac{\| \log_{x(t+\delta)}(x(t)) - 2 \delta^2 \nabla \log p(x(t+\delta)) \|^2_{x(t+\delta)}}{2 \delta^2}  \right\}
-% \end{align}
-
-\subsection{Reserve process: generating data}
-Can we prove that the reverse-time process $(Y_t)_{t \in [0,T]} = (X_{T-t})_{t \in [0,T]}$ is also a diffusion? [PROOF?] which satisfies
-%
-\begin{align}
-% dY_t = [-f(Y_t, T-t) - g(T-t)^2 \nabla \log p_{T-t}(Y_t)] dt + g(T-t) dW.
-dY_t = \nabla \log p_{T-t}(Y_t) ~dt + P(X_t) \circ dB_t
-\end{align}
-with $\nabla: f \mapsto  G^{-1} df$ the (Riemannian) gradient.
-%
-This process can be simulated via
-\begin{align}
-X_{k} = R_{X_{k+1}} \left(G^{-1}(X_{k+1}) \nabla_E \log p_{k+1}(X_{k+1}) + G^{-1/2}(X_{k+1}) Z_{k+1} \right), \quad Z_{k+1} \sim \N(0, I_d) \in T_{X_{k+1}}\M \cong \R^d
-\end{align}
-
-
-\subsection{Training}
-%
-The score neural network is a function defined as $s_\theta: \M \times [0, T] \rightarrow T\M$.
-It is a time-dependent vector-field.
-
-\paragraph{Denoising score matching}
-
-The DSM loss requires computing $\nabla_{X_t} \log p(X_t|X_0)$ which is non trivial for non-Euclidean manifolds.
-% A closed-form expression may be known in some cases like $\mathbb{S}^d$ but it involves an infinite sum.
-
-If $\mathcal{M}$ is a $d$-dimensional smooth compact manifold, the heat kernel can be expanded as a uniformly and absolutely convergent power series \citep{jones2008Manifold,li2019Variational}
-\begin{align} \label{eq:heat_kernel}
-p_t(x, y) = \sum^\infty_n e^{-t \lambda_n} \psi_n(x) \psi_n(y)
-\end{align}
-with $\{\lambda_n\}_n$ and $\{\psi_n\}_n$ respectively the eigenvalues and eigenfunctions of the Laplace-Beltrami operator $\Delta_\mathcal{M}$.
-For instance with $\mathbb{S}^d$, we know \citep{borovitskiy2020Matern,devito2019Reproducing,zhao2018Exact} that $\lambda_n = n(n + d - 1)$ and $$\psi_n(x) \psi_n(y) = \frac{2n+d-1}{d-1} \frac{1}{A_{\mathbb{S}^n}} \mathcal{C}_n^{(d-1)/2}(x \cdot y)$$  where $\mathcal{C}_n^{(d-1)/2}$ are Gegenbauer polynomials.
-An exact sampling scheme exists for $\mathbb{S}^d$ \cite{mijatovic2020note} but it is non trivial to implement \footnote{https://github.com/konkam/ExactWrightFisher.jl}.
-
-When $d=2$, then the eigenfunctions are the spherical harmonics and the Gegenbauer polynomials are the Legendre polynomials $P_n$, we thus get \citep{jammalamadaka2019Harmonic,mardia2000Directional}: 
-$$p_t(x, y) = \sum^\infty_{n=0} e^{- n(n+1) \cdot t } ~\frac{2n + 1}{4 \pi} P_n(x \cdot y).$$
-When $d=1$, the heat kernel and Wrapped normal density coincide which means one can easily sample $X_t|X_0$.
-Additionally, around $t \approx 0$, \cref{eq:heat_kernel} can be expended as
-$$p_t(x, y) = (4\pi t)^{-d/2} G(r)^{-1/2} \exp \left(-\frac{r^2}{4t}\right) + \mathcal{O}(1)$$
-with $r=d_\mathcal{M}(x,y)$.
-Higher order expansions can be obtained \cite{rey2019diffusion,zhao2018Exact}.
-One could get an unbiased estimator of \cref{eq:heat_kernel} via the Russian roulette estimator $\sum_n \Delta_n = \mathbb{E}_{N \sim p} \left[ \sum^N_n \frac{\Delta_n}{\mathbb{P}(N \ge n)} \right]$, although what we care in practice about $\nabla_x \log p_t(x, y)$ where the $\log$ would bias the estimator.
-
-
-
-\paragraph{Implicit score matching}
-We can alternatively rely on the \emph{implicit} or \emph{sliced} (which is an unbiased stochastic estimator of the former) score matching losses which are shown in \cref{tab:sm_losses}.
-Relying on a particular choice of generator for the vector field can simplify the estimation of the divergence as discussed in the next paragraph.
-
-\begin{table}[h]
-\centering
-\begin{tabular}{cc}
-\toprule
-Method                     & Loss \\ \hline
-$\mathcal{L}_{\text{ESM}}$ &  $\frac{1}{2} \E \left[ \| \bm{s}_\theta(X_t, t) - \nabla \log p(X_t) \|^2_\Lambda \right]$    \\
-$\mathcal{L}_{\text{DSM}}$ &  $\frac{1}{2} \E \left[ \| \bm{s}_\theta(X_t, t) + \nabla \log p(X_t|X_0) \|^2_\Lambda \right]$ \\
-$\mathcal{L}_{\text{DSM??}}$ &  $\frac{1}{2} \E \left[ \| \bm{s}_\theta(X_t, t) + \nabla \log p(X_t|X_{t-dt}) \|^2_\Lambda \right]$ \\
-$\mathcal{L}_{\text{ISM}}$ &  $\E \left[\frac{1}{2} \| \bm{s}_\theta(X_t, t)\|^2_\Lambda + \nabla \cdot \left(\Lambda^\top \bm{s}_\theta(X_t, t) \right) \right]$    \\
-$\mathcal{L}_{\text{SSM}}$ &  $\frac{1}{2} \E \left[\frac{1}{2} \| \bm{s}_\theta(X_t, t)\|^2_\Lambda + v^\top \nabla \left(\Lambda^\top \bm{s}_\theta(X_t, t) \right) v \right]$    \\
-\bottomrule
-\end{tabular}
-\caption{Score matching losses. $v$ follows the Rademacher distribution. Expectations are taken w.r.t.\ $p(X_t|X_0)$.}
-\label{tab:sm_losses}
-\end{table}
-
-
-\paragraph{Parametrisation}
-For $\M \neq \R^n$, it is non trivial to parametrise the output of $s_\theta(x, t)$ which is a tangent vector which lives in $T_{x}\M$ and this can be done in several ways.
-%
-\begin{itemize}
-    \item \textbf{Projected vector field}: $s_\theta(x, t) = \text{proj}_{T_{x}\M}(\tilde{s}_\theta(x, t)) = P_x \tilde{s}_\theta(x, t) $ with $\tilde{s}_\theta(x, t) \in \R^d$ an ambient vector and $P_x$ the linear projection over the tangent space at $x$. Likely the easiest.
-    According to \cite{rozen2021moser}, then $\text{div}(s_\theta(x, t)) = \text{div}_E(s_\theta(x, t))$, where $\text{div}_E$ denotes the standard Euclidean divergence.
-    % \mjh{We use the same trick in the vec field GP paper} \emile{We also do that in practice in \cite{mathieu2020riemannian}}
-    
-    \item \textbf{Coordinates vector fields}: $s_\theta(\bf{z}, t) = \sum_i s^i_\theta(\bf{z}, t) f^i(\bf{z})$ with $f^i(\bf{z})$ the vector fields induced by a choice of local coordinates $x(\mz)$ with $\mz \in U \subset \R^d$. For the sphere, think of the sperhical coordinates $x(\mz)=x(\theta, \varphi))$ with $f^\theta = \partial x/ \partial_\theta$ and $f^\varphi = \partial x/ \partial_\varphi$.
-    Then the divergence can be computed in these local coordinates: $\text{div}(s_\theta(x, t)) = \frac{1}{\sqrt{|\det g|}} \sum_i \frac{\partial}{\partial_i} \sqrt{|\det g|} s^i_\theta(\bf{z}, t)$. One recovers the standard divergence in spherical coordinates with this formula.
-    If the manifold is not parallelisable, there does not exist a global frame, which implies that $\{f^i(x)\}$ is not a basis for any $x \in \M$. Think of the Hairy-ball theorem for the (n-)sphere.
-    % \mjh{but we don't actually need a basis. We just need a smooth set of basis vector fields that span the tangent space. The fields can be redundant e.g. 3 axis fields on the sphere. We then just mix these smooth fields with smooth scalar coeffs from an nn and mix them to get a smooth field}
-    % \emile{Agree that we don't need a basis, we only need a generator of the tangent bundle.}
-    
-    \item \textbf{Divergence-free vector fields}: $s_\theta(x, t) = \sum_i s^i_\theta(x, t) f^i(x)$  with $f^i(x)$ the vector fields induced by the isometries of $\M$ which are defined as $f_i(x) = \left.\frac{d}{dt}\right|_{t=0} \exp(t\,\xi_i)\cdot x$ (with some basis $(\xi_i\,;i=1,\ldots,n)$ of $T_{x}\M$ ). For any homogeneous space, this family of vector fields generatez the tangent bundle and is divergence-free.
-    % \mjh{Is the divergence free nature of the field going to impose some extra constraint on the score function, and the resulting process?}
-    % The $f^i$ are divergence free but the linear combination which yields $s_\theta$ has no constraint.
-    Then we have $\text{div}(s_\theta(x, t)) = \sum_i \text{div}(s^i_\theta(x, t) f^i(x)) = \sum_i s^i_\theta(x, t) ~\text{div}(f^i(x)) + \langle \frac{\partial s^i_\theta(x, t)}{\partial x}, f^i(x)\rangle =  \sum_i \langle \frac{\partial s^i_\theta(x, t)}{\partial x}, f^i(x)\rangle$.
-    This can similarly be estimated stochastically.
-%    For more information see notes at \url{https://www.overleaf.com/read/thvfprqwmkjq}.
-    % Not sure whether this can be useful here but this term does appear in standard (non-denoising) score matching \cite{hyvarinenEstimation,song2019Sliced}. }
-\end{itemize}
-
-
-\subsection{Applications}
-Applications from \cite{mathieu2020riemannian,cohen2021riemannian,rezende2020Normalizing,rezende2021Implicit,falorsi2021Continuous}.
-
-
-\section{Equivariance}
-\todo[inline]{(Credits to Michael for the original idea)}
-Following \cite{kohler2020Equivariant}, a probability density $p=p_0\circ\phi^{-1}$ (i.e.\ pushforward distribution) is invariant w.r.t.\ group action $\rho(g)$ for $g \in G$ if (sufficient condition) $p_0$ is invariant and $\phi$ is equivariant w.r.t. to G.
-
-Given that diffusion processes satisfy the ODE given in \cref{eq:ode}, this implies that if $p_T$ is invariant and $\left[ f(X_t, t) - \frac{1}{2} g(t)^2 \nabla_{X_t} \log p_t(X_T) \right]$ is equivariant, then $p_t$ is invariant.
-
-We have
-\begin{align}
-\left[f(\cdot, t) - \frac{1}{2} g(t)^2 \nabla_{\cdot} \log p_t(\cdot) \right] \left(\rho(g) X_t \right) 
-&= \left[- \beta \rho(g) X_t  - \frac{1}{2} g(t)^2  \nabla_{\rho(g) X_t} \log p_t\left(\rho(g) X_t\right)) \right] \\
-& \approx \left[- \beta \rho(g) X_t - \frac{1}{2} g(t)^2 s_\theta\left(\rho(g) X_t, t\right) \right]
-\end{align}
-assuming an Ornstein–Uhlenbeck process and plugging the score approximation.
-
-It is then sufficient to parametrise the score network so that it is equivariant w.r.t. its first argument as we have that $\rho(g)$ and the drift commute, i.e.\
-\begin{align}
-    \left[- \beta - \frac{1}{2} g(t)^2 s_\theta\left(\cdot, t\right) \right] \left(\rho(g) X_t \right) = 
-    \rho(g) \left[- \beta - \frac{1}{2} g(t)^2 s_\theta\left(\cdot, t\right)\right] (X_t).
-\end{align}
-
-\paragraph{Architecture}
-In practice the score network $s_\theta$ could be parametrised with an EMLP \cite{finzi2021Practicala}.
-
-\section{Exploring the hypercube!}
-Equivariant stochatic vector fields on manifolds?
-
-Application climate data \cite{holderrieth2021equivariant}: $\mathcal{P}_+^1\left(\mathcal{C}\left(\mathbb{S}^2, T\mathbb{S}^2\right)\right)$ with $G=SO(3)$.
-
-\section{possibly some more useful refs}
-\cite{song2020score} Score-based generative modelling through SDEs
-
-% \url{https://link.springer.com/chapter/10.1007/978-1-4612-0209-7_6} Time reversal of diffusion on manifolds \url{https://d-nb.info/962215848/04}
-% \emile{Do they give the reverse diffusion formula? I don't have access.}
-
-\cite{oksendal2003stochastic} SDE book cited in the above
-
-\cite{anderson1982reverse} Reverse time diffusion equations results
-
-\cite{anonymous2022geodiff} - equivalence in diff processes for 3d molecules
-
-\cite{anonymous2022pseudo} - RK methods for diffusion processes to stay closer to the data manifolds (?)
-
-Statement:
-
-For a Ito sde of the form $$dX_t = f(X_t, t) dt + g(X_t, t)dW_t$$, where $f: \R \to \R^n$, $g: \R \to \R^{n \times n}$, with certain smoothness and growth properties which guarantee existence and uniqueness of a solution (Anderson cites \cite{kushner1974stochastic} for this), and the existance of the probability density $p(X_t,t)$ for $t_0 \leq t\leq T$ as a smooth and unique solution of its associated Kolmonogorv eqn. Also define an $\bar{W}_t$ by $W_{t_0} = 0$ and $$d\bar{W}_t^i = dW_t^i + \frac{1}{p(X_t, t)} \sum_j \frac{\partial}{\partial X_t^j} [ p(X_t, t) g^{ji}(X_t, t)]dt $$ then (skipping some) a reverse time model for $X_t$ is defined by $$dX_t = \bar{f}(X_t, t) dt + g(X_t, t) d\bar{W}_t$$ where $$\bar{f}^i(X_t, t) = f^i(X_t, t) - \frac{1}{p(X_t, t)}\sum_{j, k} \frac{\partial}{\partial X_t^j}[p(X_t, t) g^{ik}(X_t, t)g^{jk}(X_t, t)]$$
-\mjh{Translate above in to matrix maths and understand relation to the above}
-$$dW_t = dW_t + \frac{1}{p(X_t, t)}\nabla \cdot [p(X_t, t) g(X_t, t)] = dW_t + \nabla \cdot g(X_t, t) + g(X_t, t) \nabla \log p(X_t, t)$$
-\mjh{this $\bar{W}_t$ is a standard Weiner process / Browninan motion?}
-$$\bar{f}(X_t, t) = f(X_t, t) - \frac{1}{p(X_t, t)} \nabla \cdot [p(X_t, t) g(X_t, t) g(X_t, t)^\top] = f(X_t, t) - \nabla \cdot[g(X_t, t)g(X_t, t)^\top] - g(X_t,t)g(X_t, t)^\top \nabla \log p(X_t, t)$$
-
-%%% Local Variables:
-%%% mode: latex
-%%% TeX-master: "main_arxiv"
-%%% End:
diff --git a/doc/time_reversal.tex b/doc/time_reversal.tex
deleted file mode 100644
index 8653740..0000000
--- a/doc/time_reversal.tex
+++ /dev/null
@@ -1,439 +0,0 @@
-\section{Time-reversal formula: extension to compact Riemannian manifolds}
-\label{sec:time-reversal}
-
-In this section, we  provide the proof of
-\cref{thm:time_reversal_manifold}.  The proof follows the arguments of
-\citet[Theorem 4.9]{cattiaux2021time}. We could have also applied the abstract
-results of \citet[Theorem 5.7]{cattiaux2021time} to obtain our results. Note that
-the time-reversal on manifold could also be obtained by readily extending
-arguments from \citet{haussmann1986time}, however the entropic conditions found
-by \citet{cattiaux2021time} are more natural when it comes to the study of the
-Schr\"odinger Bridge problem. For the interested reader we provide an informal
-derivation of the time-reversal formula obtained by \citet{haussmann1986time} in
-\cref{sec:informal-derivation}. The proof of \cref{thm:time_reversal_manifold}
-is given in \cref{sec:proof-crefthm:t}. Finally, we emphasize that
-\citet{garcia2021brenier} develops a Girsanov theory for stochastic processes
-defined on compact manifolds with boundary in order to study the
-Brenier-Schr\"odinger problem.
-
-\subsection{Informal derivation}
-\label{sec:informal-derivation}
-
-In this section, we provide a non-rigorous derivation of
-\cref{thm:time_reversal_manifold} following the approach of
-\citet{haussmann1986time}. Let $(\bfX_t)_{t \in \ccint{0,T}}$ be a continuous
-process such that for any $f \in \rmc^2(\M)$ we have that
-$(\bfM_t^{\bfX, f})_{t \in \ccint{0,T}}$ is a $\bfX$-martingale where for any
-$t \in \ccint{0,T}$
-  \begin{equation}
-    \label{eq:martingale_forward}
-    \textstyle{ \bfM_t^{\bfX, f} = f(\bfX_t) - \int_0^t \{ \langle b(\bfX_s), \nabla f(\bfX_s) \rangle_\M + (1/2) \Delta f(\bfX_s) \} \rmd s  . }
-  \end{equation}
-  Let $(\bfY_t)_{t \in \ccint{0,T}} = (\bfX_{T-t})_{t \in \ccint{0,T}}$. Our goal is to show that for any $f \in \rmc^2(\M)$, 
-  $(\bfM_t^{\bfY, f})_{t \in \ccint{0,T}}$ is a $\bfY$-martingale where for any
-  $t \in \ccint{0,T}$
-  \begin{equation}
-    \textstyle{ \bfM_t^{\bfY, f} = f(\bfY_t) - \int_0^t \{ \langle b(\bfY_s) + \nabla \log p_{T-s}(\bfY_s), \nabla f(\bfY_s) \rangle_\M + (1/2) \Delta f(\bfY_s) \} \rmd s  . }
-  \end{equation}
-  Note that here we implicitly assume that for any $t \in \ccint{0,T}$, $\bfX_t$
-  admits a smooth positive density w.r.t. $\piinv$ denoted $p_t$.  In other
-  words, we want to show that for any $g \in \rmc^2(\M)$ and
-  $s, t \in \ccint{0,T}$ with $t \geq s$ we have
-  \begin{equation}
-    \label{eq:time_reversal_manifold_haussman}    
-    \textstyle{\expeLigne{g(\bfY_s)(f(\bfY_t) - f(\bfY_s))} = \expeLigne{g(\bfY_s)\int_s^t \{ \langle b(\bfY_u) + \nabla \log p_{T-u}(\bfY_u), \nabla f(\bfY_u) \rangle_\M + (1/2) \Delta f(\bfY_u) \} \rmd u}  . }
-  \end{equation}
-  We introduce the infinitesimal generator
-  $\generator: \  \rmc^2(\M) \to \rmc(\M)$ given for any $f \in \rmc^2(\M)$ and $x \in \M$ by
-  \begin{equation}
-    \generator (f)(x) = \langle b(x) , \nabla f(x) \rangle_\M + (1/2) \Delta f(x)  . 
-  \end{equation}
-  Similarly, we introduce the infinitesimal generator
-  $\generatort: \  \ccint{0,T} \times \rmc^2(\M) \to \rmc(\M)$ given for any $f \in \rmc^2(\M)$, $t \in \ccint{0,T}$ and $x \in \M$ by
-  \begin{equation}
-    \generatort (t, f)(x) = \langle b(x) + \nabla \log p_{T-t}(x), \nabla f(x) \rangle_\M + (1/2) \Delta f(x)  . 
-  \end{equation}
-  With these notations, \eqref{eq:time_reversal_manifold_haussman} can be written as follows:  we want to show that for any $g \in \rmc^2(\M)$ and
-  $s, t \in \ccint{0,T}$ with $t \geq s$ we have 
-  \begin{equation}
-    \label{eq:time_reversal_manifold_haussman}    
-    \textstyle{\expeLigne{g(\bfY_s)(f(\bfY_t) - f(\bfY_s))} = \expeLigne{g(\bfY_s)\int_s^t \generatort(u, \bfY_u) \rmd u}  . }
-  \end{equation}
-  The rest of this section follows the first part of the proof of \citet[Theorem 2.1]{haussmann1986time}.
-  Let $t, s \in \ccint{0,T}$ with $t \geq s$. We have
-  \begin{align}
-    \textstyle{\expeLigne{g(\bfY_s)(f(\bfY_t) - f(\bfY_s))}} &= \textstyle{\expeLigne{g(\bfX_{T-s})(f(\bfX_{T-t}) - f(\bfX_{T-t}))}} \\
-                                                             &= \textstyle{\expeLigne{\CPELigne{g(\bfX_{T-s})}{\bfX_{T-t}}f(\bfX_{T-t})} - \expeLigne{g(\bfX_{T-s})f(\bfX_{T-s})}} \\
-                                                             &= \textstyle{\expeLigne{v(T-t,\bfX_{T-t})f(\bfX_{T-t})} - \expeLigne{v(T-s,\bfX_{T-s})f(\bfX_{T-s})}}  ,
-                                                               \label{eq:first_der}
-  \end{align}
-  with $v: \ \ccint{0,T-s} \times \M \to \rset$ given for any $u \in \ccint{0,T-s}$
-  and $x \in \M$ by $v(u,x) = \CPELigne{g(\bfX_{T-s})}{\bfX_u=x}$. We have that $v$
-  satisfies the backward Kolmogorov equation, i.e. we have for any
-  $u \in \ccint{0,T-s}$ and $x \in \M$
-  \begin{equation}
-    \label{eq:backward_kolmogorov}
-    \partial_u v(u,x) = -\generator v(u,x) . 
-  \end{equation}
-  Note that it is not trivial to show that $v$ is regular enough to satisfy the
-  backward Kolmogorov equation. In this informal derivation, we assume that $v$
-  is regular enough and will provide a different rigorous proof of the
-  time-reversal formula in \cref{sec:proof-crefthm:t}. However, note that it is
-  possible to show that $v$ indeed satisfies the backward Kolmogorov equation by
-  adapting arguments from \citet{haussmann1986time} to the manifold framework.
-
-  Let $h: \ \ccint{0,T-s} \times \M \to \rset$ given for any
-  $u \in \ccint{0,T-s}$ and $x \in \M$ by $h(u,x) = v(u,x) f(x)$. Using
-  \eqref{eq:backward_kolmogorov}, we have for any $u \in \ccint{0,T-s}$ and
-  $x \in \M$
-  \begin{align}
-    \label{eq:def_h}
-    \partial_u h(u,x) + \generator h(u, x) &= f(x) \partial_u v(u,x)  + f(x) \generator v(u,x) + v(u,x) \generator f(x) +  \langle \nabla f(x), \nabla v(u,x)\rangle \\
-    &=  v(u,x) \generator f(x) + \langle \nabla f(x), \nabla v(u,x)\rangle_\M  . 
-  \end{align}
-  In addition, using the
-      divergence theorem \citep[see][p.51]{lee2018introduction}, we have for any $u \in \ccint{0,T-s}$
-  \begin{align}
-    &\expeLigne{\langle \nabla f(\bfX_u), \nabla v(u,\bfX_u)\rangle_\M} = \textstyle{\int_{\M} \langle \nabla f(x_u), \nabla v(u,x_u) p_u(x_u) \rangle_\M \rmd \piinv(x_u) } \\
-                                                                    & \qquad \qquad = - \textstyle{\int_{\M} v(u,x_u) \dive(p_u \nabla f) (x_u) \rmd \piinv(x_u) } \\
-                                                                    & \qquad \qquad = - \textstyle{\int_{\M} v(u,x_u) \Delta f(x_u) p_u(x_u) \rmd \piinv(x_u) - \int_{\M} v(u,x_u) \langle \nabla f(x_u), \nabla \log p_u(x_u) \rangle_\M p_u(x_u) \rmd \piinv(x_u) } \\
-                                                                    & \qquad \qquad = - \textstyle{\expeLigne{ v(u,\bfX_u) \Delta f(\bfX_u)}  - \expeLigne{ v(u,\bfX_u) \langle \nabla f(\bfX_u), \nabla \log p_u(\bfX_u) \rangle_\M} }  .
-  \end{align}
-  Therefore, using this result and \eqref{eq:def_h} we get that for any
-  $u \in \ccint{0,T-s}$
-  \begin{align}
-    \expeLigne{\partial_u h(u,\bfX_u) + \generator h(u, \bfX_u)} &= \expeLigne{v(u,\bfX_u)\{ \langle b(\bfX_u) - \nabla \log p_u(\bfX_u), \nabla f(\bfX_u) \rangle_\M -(1/2) \Delta f(\bfX_u)\}} \\
-    &= -\expeLigne{v(u,\bfX_u)\generatort(T-u,f)(\bfX_u)}  . 
-  \end{align}
-  Combining this result and \eqref{eq:martingale_forward} and that for any
-  $u \in \ccint{0,T-s}$ and $x \in \M$,
-  $v(u,x) = \CPELigne{g(\bfX_{T-s})}{\bfX_u=x}$ we get
-  \begin{align}
-    \expeLigne{v(T-t,\bfX_{T-t})f(\bfX_{T-t})} - \expeLigne{v(T-s,\bfX_{T-s})f(\bfX_{T-s})} &= \expeLigne{h(T-t, \bfX_{T-t}) - h(T-s, \bfX_{T-s})} \\
-                                                                                            &= \textstyle{\int_{T-t}^{T-s} \expeLigne{v(u,\bfX_u)\generatort(T-u, \bfX_u)} \rmd u } \\
-    &= \textstyle{\expeLigne{g(\bfX_{T-s})\int_{T-t}^{T-s} \generatort(T-u, \bfX_u) \rmd u } .}\\
-  \end{align}
-  Using this result, \eqref{eq:first_der} and the change of variable $u \mapsto T-u$ we obtain 
-  \begin{equation}
-    \expeLigne{g(\bfY_s)(f(\bfY_t) - f(\bfY_s))} = \textstyle{\expeLigne{g(\bfX_{T-s})\int_{T-t}^{T-s} \generatort(u, \bfX_u) \rmd u } } = \textstyle{\expeLigne{g(\bfY_{s})\int_{s}^{t} \generatort(u, \bfY_u) \rmd u } }  .
-  \end{equation}
-  Hence, \eqref{eq:time_reversal_manifold_haussman} holds and we have proved
-  \cref{thm:time_reversal_manifold}. Again, we emphasize that in order to make
-  the proof completely rigourous one needs to derive regularity properties of $v$.
-
-  
-\subsection{Proof of \cref{thm:time_reversal_manifold}}
-\label{sec:proof-crefthm:t}
-
-In this section, we follow another approach to prove the time-reversal
-formula. We are going to use the integration by part formula of \citet[Theorem
-3.17]{cattiaux2021time} in a similar spirit as \citet[Theorem
-4.9]{cattiaux2021time} in the Euclidean setting. In order to adapt arguments
-from \citet{cattiaux2021time} to our Riemannian setting, we use the Nash
-embedding theorem in order to embed our processes in a Euclidean space and
-leverage tools from Girsanov theory. The rest of the section is organized as
-follows. First in \cref{sec:diff-proc-stoch}, we recall basic properties of
-infinitesimal generators and recall the integration by part formula of
-\citet[Theorem 3.17]{cattiaux2021time}. Then in \cref{sec:girs-theory-comp}, we
-extend some Girsanov theory to compact Riemannian manifolds using the Nash
-embedding theorem. We conclude the proof in \cref{sec:concluding-proof}.
-
-\subsubsection{Diffusion processes and integration by part formula}
-\label{sec:diff-proc-stoch}
-
-In this section, we state a simplified version of \citet[Theorem
-3.17]{cattiaux2021time} for Markov continuous path (probability) measure on
-Polish spaces. Let $(\msx, \mcx)$ be a Polish space. We say that $\Pbb$ is a
-path measure if $\Pbb \in \Pens(\rmc(\ccint{0,T}, \msx))$. Let
-$(\bfX_t)_{t \in \ccint{0,T}}$ with distribution $\Pbb$. We denote
-$(\mcf_t)_{t \in \ccint{0,T}}$ the filtration such that for any
-$t \in \ccint{0,T}$, $\mcf_t = \sigma(\bfX_s, \ s \in \ccint{0,t})$. Let
-$(\bfM_t)_{t \in \ccint{0,T}}$ be a Polish-valued stochastic process. We say that
-$(\bfM_t)_{t \in \ccint{0,T}}$ is a $\Pbb$-local martingale if it is a local
-martingale w.r.t. the filtration $(\mcf_t)_{t \in \ccint{0,T}}$. A function
-$u: \ \ccint{0,T} \times \msx \to \rset$ is said to be in the domain of the
-extended generator of $\Pbb$ if there exists a process
-$(\generatorb_\Pbb u(t, \bfX_{\ccint{0,t}}))_{t \in \ccint{0,T}}$ such that:
-\begin{enumerate}[label= (\alph*),  wide, labelwidth=!, labelindent=0pt]
-\item $(\generatorb_\Pbb u(t, \bfX_{\ccint{0,t}}))_{t \in \ccint{0,T}}$ is adapted w.r.t. $(\mcf_t)_{t \in \ccint{0,T}}$.
-\item $\int_0^T \absLigne{\generatorb_\Pbb u(t, \bfX_{\ccint{0,t}})} \rmd t < +\infty$, $\Pbb$-a.s.
-\item The process $(\bfM_t)_{t \in \ccint{0,T}}$ is a $\Pbb$-local martingale,
-  where for any $t \in \ccint{0,T}$
-  \begin{equation}
-    \textstyle{\bfM_t = u(t,\bfX_t) - u(0, \bfX_0) - \int_0^t \generatorb_\Pbb u(s, \bfX_{\ccint{0,s}}) \rmd s   .}
-  \end{equation}
-\end{enumerate}
-The domain of the extended generator is denoted $\dom(\generatorb_\Pbb)$. We say
-that $(u,v)$ with $u,v : \ \ccint{0,T} \times \msx \to \rset$ is in the domain
-of the carr\'e du champ if $u,v, uv \in \dom(\generatorb_\Pbb)$. In this case, we
-define the carr\'e du champ $\carrechampb_\Pbb$ as
-\begin{equation}
-  \carrechampb_\Pbb(u,v) = \generatorb_\Pbb(uv) - \generatorb_\Pbb(u)v - \generatorb_\Pbb(v)u  . 
-\end{equation}
-Note that if $\msx = \M$ is a Riemannian manifold,
-$\rmc^2(\M) \subset \dom(\generatorb_\Pbb)$ and for any $u \in \rmc^2(\M)$
-$\generatorb_\Pbb(u) = \langle \nabla u, X \rangle_\M + (1/2)\Delta u$ with
-$X \in \Gamma(\TM)$  then we have that $\rmc^2(\M) \times \rmc^2(\M) \subset \dom(\carrechampb_\Pbb)$
-and for any $u, v \in \rmc^2(\M)$,
-$\carrechampb_\Pbb(u,v) = \langle \nabla u, \nabla v \rangle_\M$. Assume that there exists
-$\mathcal{U}_\Pbb \subset \dom(\generatorb_\Pbb) \cap \rmc_b(\msx)$ such that
-$\mathcal{U}_\Pbb$ is an algebra. We denote $\mathcal{U}_{\Pbb,2}$ such that
-\begin{equation}
-  \mathcal{U}_{\Pbb,2} = \ensembleLigne{u \in \mathcal{U}_\Pbb}{\generatorb_\Pbb u \in \mathrm{L}^2(\Pbb), \ \carrechampb_\Pbb(u,u) \in \mathrm{L}^1(\Pbb)}  . 
-\end{equation}
-Finally we denote $R(\Pbb)$ the time-reverse path measure, i.e. for any
-$\msa \in \mcb{\rmc(\ccint{0,T}, \msx)}$ we have
-$R(\Pbb)(\msa) = \Pbb(R(\msa))$, where
-$R(\msa) = \ensembleLigne{t \mapsto \omega_{T-t}}{\omega \in \msa}$.  In what
-follows, we assume $\Pbb$ is Markov. It is well-known, see \citep[Theorem
-1.2]{leonard2014reciprocal} for instance, that in this case $R(\Pbb)$ is also
-Markov. In addition, since $\Pbb$ is Markov, for any $u \in \mathrm{dom}(\generatorb_\Pbb)$ and
-$t \in \ccint{0,T}$ there exists $\generator_\Pbb$ such that
-$\generatorb_\Pbb u(t, \bfX_{\ccint{0,t}}) = \generator_\Pbb u(t, \bfX_t)$ with
-$\generator_\Pbb u: \ \ccint{0,T} \times \msx \to \rset$. Similarly, we define
-$\carrechamp_\Pbb(u,v): \ \ccint{0,T} \times \msx \to \rset$ from $\carrechampb_\Pbb(u,v)$.
-
-We are now ready to state the integration by part formula,
-\citep[Theorem 3.17]{cattiaux2021time}. 
-
-\begin{theorem}
-  \label{thm:ibp_cattiaux}
-  Let $u, v \in \mathcal{U}_{\Pbb, 2}$. The following hold:
-  \begin{enumerate}[label= (\alph*),  wide, labelwidth=!, labelindent=0pt]    
-  \item If
-  $u \in \dom(\generator_{R(\Pbb)})$ and
-  $\generator_{R(\Pbb)}u \in \mathrm{L}^1(\Pbb)$ then for almost any $t \in \ccint{0,T}$
-  \begin{equation}
-    \expeLigne{\{\generator_\Pbb u(t, \bfX_t) + \generator_{R(\Pbb)} u (T-t, \bfX_t)\}v(\bfX_t) + \carrechamp_\Pbb(u,v)(t, \bfX_t)} = 0  .       
-  \end{equation}  
-\item If the following hold:
-  \begin{enumerate}[label=\roman*)]
-  \item $\carrechamp_\Pbb(u,v) \in \rmc(\ccint{0,T} \times \msx, \rset)$.
-  \item $\mathcal{U}_{2, \Pbb}$ determines the weak convergence of Borel measure.
-  \item $\mu$ defines a finite measure on $\ccint{0,T} \times \msx$ where for
-    any $\omega \in \bar{\mathcal{U}}_{2, \Pbb}$ we have
-    \begin{equation}
-      \textstyle{\mu[\omega] = \expeLigne{\int_0^T \carrechamp_\Pbb(u,\omega_t)(t, \bfX_t) \rmd t  ,}}
-    \end{equation}
-    where
-    $\bar{\mathcal{U}}_{2, \Pbb} = \ensembleLigne{\omega \in \rmc(\ccint{0,T}
-      \times \msx, \rset)}{\omega(t, \cdot) \in \mathcal{U}_{2, \Pbb}\ \
-      \text{for any $t \in \ccint{0,T}$}}$.
-  \end{enumerate}
-  Then $u \in \dom(\generator_{R(\Pbb)})$ and
-  $\generator_{R(\Pbb)}u \in \mathrm{L}^1(\Pbb)$.
-  \end{enumerate}
-\end{theorem}
-
-Note that this theorem is a simplified version of \citet[Theorem
-3.17]{cattiaux2021time} where we restrict ourselves to the case of Markov path
-measures. In what follows, we wish to apply \cref{thm:ibp_cattiaux} to diffusion
-processes on manifolds. To do so, we will verify that under a finite entropy
-assumption, the conditions $u \in \dom(\generator_{R(\Pbb)})$ and
-$\generator_{R(\Pbb)}u \in \mathrm{L}^1(\Pbb)$ are fullfilled for a class of
-regular functions $u$. These integrability results are obtained using Girsanov
-theory.
-
-\subsubsection{Girsanov theory on compact Riemannian manifolds}
-\label{sec:girs-theory-comp}
-
-In this section, we will consider two types of martingale problems: one on
-Euclidean spaces and one on the compact Riemannian manifold $\M$. Let
-$\Pbb \in \Pens(\rmc(\ccint{0,T}, \rset^p))$. We say that $\Pbb$ satisfies the
-(Euclidean) martingale problem with infinitesimal generator
-$\generator: \ \ccint{0,T} \times \rmc^2(\rset^p) \times \rset^p \to \rset$ if
-for any $u \in \rmc_c^2(\rset^p)$, $(\bfM_t)_{t \in \ccint{0,T}}$ is a
-$\Pbb$-martingale where for any $t \in \ccint{0,T}$ we have
-\begin{equation}
-  \textstyle{
-    \bfM_t = \bfM_0 + \int_0^t \generator(t, u)(\bfX_s) \rmd s  ,
-    }
-  \end{equation}
-  where $(\bfX_t)_{t \in \ccint{0,T}}$ has distribution $\Pbb$ and
-  $\int_0^T \absLigne{\generator(t, u)(\bfX_s) \rmd t} <+\infty$, $\Pbb$-a.s.
-  Let $\Pbb \in \Pens(\rmc(\ccint{0,T}, \M))$. We say that $\Pbb$ satisfies the
-  (Riemannian) martingale problem with infinitesimal generator
-  $\generatort: \ \ccint{0,T} \times \rmc^2(\M) \times \M \to \rset$ if for any
-  $u \in \rmc^2(\M)$, $(\bfM_t)_{t \in \ccint{0,T}}$ is a $\Pbb$-martingale
-  where for any $t \in \ccint{0,T}$ we have
-\begin{equation}
-  \textstyle{
-    \bfM_t = \bfM_0 + \int_0^t \generatort(t, u)(\bfX_s) \rmd s  ,
-    }
-  \end{equation}
-  where $(\bfX_t)_{t \in \ccint{0,T}}$ has distribution $\Pbb$ and 
-  $\int_0^T \absLigne{\generatort(t, u)(\bfX_s) \rmd t} <+\infty$, $\Pbb$-a.s.
-  We now prove the following theorem.
-
-  \begin{proposition}
-    \label{prop:girsanov_manifold}
-    Let $\Qbb$ be the path measure of a Brownian motion on $\M$. Let $\Pbb$ be a
-    Markov path measure on $\rmc(\ccint{0,T}, \M)$ such that $\KL{\Pbb}{\Qbb} < +\infty$. Then there exists
-    $\beta$ such that for any $t \in \ccint{0,T}$ and
-    $x \in \M$, $\beta(t,x) \in \mathrm{T}_x \M$. In addition, we have that
-    $\Pbb$ satisfies the martingale problem with infinitesimal generator
-    $\generator$ where for any $t \in \ccint{0,T}$, $u \in \rmc^2(\M)$ and
-    $x \in \M$ we have
-    \begin{equation}
-      \generator(t,u)(x) = \langle \beta(t,x), \nabla u(x) \rangle_\M + (1/2) \Delta u(x)  . 
-    \end{equation}
-    In addition, we have that
-    \begin{equation}
-      \textstyle{\KL{\Pbb}{\Qbb} = \KL{\Pbb_0}{\Qbb_0} + (1/2) \int_0^T \expeLigne{\norm{\beta(t, \bfX_t)}^2} \rmd t  ,}
-    \end{equation}
-    where $(\bfX_t)_{t \in \ccint{0,T}}$ has distribution $\Pbb$.
-  \end{proposition}
-
-
-  \begin{proof}
-    First, we extend $(\bfB_t^\M)_{t \in \ccint{0,T}}$ to $\rset^p$ using the
-    Nash embedding theorem \citep[see][]{gunther1991isometric}.
-    $(\bfB_t^\M)_{t \in \ccint{0,T}}$ can be seen as a process on $\rset^p$ (for
-    some $p \in \nset$) which satisfies in a weak sense
-    \begin{equation}
-      \textstyle{
-        \rmd \bfB_t^\M = \sum_{i=1}^p P_i(\bfB_t^\M) \circ \rmd \bfB_t^i  = P(\bfB_t^\M) \circ \rmd \bfB_t  ,
-        }
-    \end{equation}
-    where $(\bfB_t)_{t \in \ccint{0,T}}$ is a $p$-dimensional Brownian motion
-    and $P \in \rmc^\infty(\rset^p, \rset^{p\times p})$ is such that for any
-    $x \in \M$, $P(x)$ is the projection onto $\mathrm{T}_x \M$ and for any
-    $i \in \{1, \dots, p\}$, $P_i \in \rmc^\infty(\rset^p, \rset^p)$ with
-    $P_i = P e_i$ where $\{e_j\}_{j=1}^d$ is the canonical basis of $\rset^p$.
-    Using the link between Stratanovitch and It\^o integral, there exists
-    $\bar{b} \in \rmc^\infty(\rset^p, \rset^p)$ such that
-    $(\bfB_t^\M)_{t \in \ccint{0,T}}$ can be seen as a process on $\rset^p$
-    which satisfies in a weak sense
-    \begin{equation}
-      \textstyle{
-        \rmd \bfB_t^\M = \bar{b}(\bfB_t^\M) \rmd t +  P(\bfB_t^\M)  \rmd \bfB_t  .
-        }
-      \end{equation}
-      For any $u \in \rmc^2(\M)$, we consider $\bar{u}$ an extension to $\rmc^2_c(\rset^p)$ and we have for any $s, t \in \ccint{0,T}$
-      \begin{align}
-        &\textstyle{\expeLigne{\bar{v}(\bfB_s^\M) \int_s^t (1/2) \Delta u(\bfB_u^\M) \rmd u}} \\
-        & \qquad =  \textstyle{\expeLigne{\bar{v}(\bfB_s^\M) \int_s^t \{ \langle \nabla \bar{u}(\bfB_u^\M), \bar{b}(\bfB_u^\M) \rangle + (1/2) \langle P(\bfB_u^\M), \nabla^2 \bar{u}(\bfB_u^\M) \rangle \} \rmd u}  . }
-      \end{align}
-      In particular, we get that for any $x \in \M$,
-      $\Delta u(x) = 2 \langle \bar{u}(x), \bar{b}(x) \rangle + \Delta
-      \bar{u}(x)$ \valentin{prove that for the projection this is okay}. Note
-      that $(\bfB_t^\M)_{t \in \ccint{0,T}}$ (seen as a process on $\rset^p$)
-      satisfies the condition $\mathrm{(U)}$ in
-      \cite{leonard2012girsanov}. Therefore applying \cite[Theorem
-      2.1]{leonard2012girsanov}, \citep[Claim 4.5]{cattiaux2021time}, there
-      exists $\bar{\beta}: \ \ccint{0,T} \times \rset^p \to \rset^p$ such that
-      \begin{equation}
-        \label{eq:KL_ineq}
-      \textstyle{\KL{\Pbb}{\Qbb} = \KL{\Pbb_0}{\Qbb_0} + (1/2) \int_0^T \expeLigne{\normLigne{P(\bfX_t) \bar{\beta}(t, \bfX_t)}^2} \rmd t  .}
-    \end{equation}
-    In addition, $\Pbb$ (seen as a process on $\rset^p$) satisfies a martingale
-    problem with infinitesimal generator
-    $\generatorb: \ \ccint{0,T} \times \rmc^2_c(\rset^p) \times \rset^p \to \rset$ such that
-    for any $t \in \ccint{0,T}$, $\bar{u} \in \rmc^2(\rset^p)$ and $x \in \rset^p$
-    \begin{equation}
-      \generatorb(t,\bar{u})(x) = \langle \bar{b}(x) + P(x)\bar{\beta}(t,x), \nabla \bar{u}(x) \rangle + (1/2) \Delta \bar{u}(x)  . 
-    \end{equation}
-    Let $\beta: \ \ccint{0,T} \times \M$ such that for any $t \in \ccint{0,T}$
-    and $x \in \M$ we have $\beta(t,x) = P(x) \bar{\beta}(t,x)$. In particular,
-    we have that for any $x \in \M$, $\beta(t,x) \in \mathrm{T}_x\M$. Let
-    $u \in \rmc^2(\M)$ \valentin{dire que c'est okay pour le delta et pour le
-      gradient si on prend u bar = u circ p} and consider an extension $\bar{u}$
-    to $\rmc^2(\rset^p)$. For any $t \in \ccint{0,T}$ and $x \in \M$ we have
-    \begin{align}
-      \generatorb(t,\bar{u})(x) &= \langle \bar{b}(x) + P(x)\bar{\beta}(t,x), \nabla \bar{u}(x) \rangle + (1/2) \Delta \bar{u}(x) \\
-                               &= \langle  \beta(t,x), \nabla \bar{u}(x) \rangle + (1/2) \Delta u(x) \\
-                               &= \langle P(x) \beta(t,x), P(x) \nabla \bar{u}(x) \rangle + (1/2) \Delta u(x) = \langle \beta(t,x), \nabla u(x) \rangle + (1/2) \Delta u(x)  . 
-    \end{align}
-    In particular, we have that $\Pbb$ (seen as a process on $\M$) satisfies a
-    martingale with infinitesimal generator
-    $\generatorb: \ \ccint{0,T} \times \rmc^2_c(\M) \times \M \to \rset$ such that
-    for any $t \in \ccint{0,T}$, $u \in \rmc^2(\rset^p)$ and $x \in \M$
-    \begin{equation}
-      \generator(t,\bar{u})(x) = \langle \beta(t,x), \nabla u(x) \rangle_\M + (1/2) \Delta u(x)  . 
-    \end{equation}
-    In addition, rewriting \eqref{eq:KL_ineq} we have
-      \begin{equation}
-        \label{eq:KL_ineq}
-      \textstyle{\KL{\Pbb}{\Qbb} = \KL{\Pbb_0}{\Qbb_0} + (1/2) \int_0^T \expeLigne{\normLigne{\beta(t, \bfX_t)}^2} \rmd t  ,}
-    \end{equation}
-    which concludes the proof.
-  \end{proof}
-  
-  Once this proposition is established, we can obtain the following
-  straightforward extension of \citet[Proposition 4.6]{cattiaux2021time}.
-
-  \begin{proposition}
-    \label{prop:hyp_317}
-    Let $\Qbb$ be a Brownian motion with $\Qbb_0 = \piinv$ and $\Pbb$ a path
-    measure on $\rmc(\ccint{0,T}, \M)$ such that $\KL{\Pbb}{\Qbb} <
-    +\infty$. Then, there exist $\beta_\Pbb, \beta_{R(\Pbb)}: \ \ccint{0,T} \times \M \to $
-    such that for any $t \in \ccint{0,T}$ and $x \in \M$,
-    $\beta_\Pbb(t,x), \beta_{R(\Pbb)}(t,x) \in \mathrm{T}_x \M$. In addition, we have that
-    $\Pbb$ and $R(\Pbb)$ satisfy martingale problems with infinitesimal generator
-    $\generator_{\Pbb}$, respectively $\generator_{R(\Pbb)}$ where for any $t \in \ccint{0,T}$, $u \in \rmc^2(\M)$ and
-    $x \in \M$ we have
-    \begin{align}
-      &\generator_{\Pbb}(t,u)(x) = \langle \beta_\Pbb(t,x), \nabla u(x) \rangle_\M + (1/2) \Delta u(x)  , \\
-      &\generator_{R(\Pbb)}(t,u)(x) = \langle \beta_{R(\Pbb)}(t,x), \nabla u(x) \rangle_\M + (1/2) \Delta u(x)  . 
-    \end{align}
-    Finally, we have that
-    \begin{equation}
-      \textstyle{
-        \int_0^T \expeLigne{\norm{\beta_\Pbb(t, \bfX_t)}^2} \rmd t + \int_0^T \expeLigne{\norm{\beta_{R(\Pbb)}(t, \bfX_{T-t})}^2} \rmd t < +\infty  ,
-        }
-    \end{equation}
-    where $(\bfX_t)_{t \in \ccint{0,T}}$ has distribution $\Pbb$.
-  \end{proposition}
-
-  \begin{proof}
-    The proof is straightforward upon combining \cref{prop:girsanov_manifold}
-    and the fact that
-    $\KL{\Pbb}{\Qbb} = \KL{R(\Pbb)}{R(\Qbb)} = \KL{R(\Pbb)}{\Qbb} < +\infty$,
-    using that $\Qbb$ is stationary.
-  \end{proof}
-
-  We conclude this section, with the following application of \cref{thm:ibp_cattiaux}.
-
-  \begin{proposition}
-    \label{prop:cattiaux_spec}
-    For any $u, v \in \rmc^\infty(\M)$, we have that for almost any $t \in \ccint{0,T}$
-    \begin{equation}
-      \label{eq:equalitu}
-      \expeLigne{v(\bfX_t) \langle \beta_\Pbb(t, \bfX_t) + \beta_{R(\Pbb)}(T-t, \bfX_t), \nabla u(\bfX_t) \rangle_\M + \langle \nabla u(\bfX_t), \nabla v(\bfX_t) \rangle} = 0  . 
-    \end{equation}
-  \end{proposition}
-
-  \begin{proof}
-  Remark that $\rmc^2(\M) \subset \dom(\carrechamp_\Pbb)$ and
-  $\rmc^2(\M) \subset \dom(\carrechamp_{R(\Pbb)})$. In addition, we have that for any
-  $u,v \in \rmc^2(\M)$,
-  $\carrechamp_\Pbb(u,v) = \carrechamp_{R(\Pbb)}(u,v) = \langle u, v \rangle$. Note that
-  by \cref{prop:hyp_317} and \cref{thm:ibp_cattiaux} we immediately have that
-  for any $u, v \in \rmc^\infty(\M)$, \eqref{eq:equalitu} holds.    
-  \end{proof}
-\subsubsection{Concluding the proof}
-\label{sec:concluding-proof}
-
-Using \cref{prop:cattiaux_spec} we can now conclude the proof of \cref{thm:time_reversal_manifold}.
-First, remark that we can identify $\beta_\Pbb = b$. Let $u, v \in \rmc^\infty(\M)$, we have that 
-    \begin{equation}
-      \label{eq:equality_fin}
-      \expeLigne{v(\bfX_t) \langle b(\bfX_t) + \beta_{R(\Pbb)}(T-t, \bfX_t), \nabla u(\bfX_t) \rangle + \Delta u(\bfX_t) v(\bfX_t)+ \langle \nabla u(\bfX_t), \nabla v(\bfX_t) \rangle} = 0  . 
-    \end{equation}
-    Using that for any $t \in \ccint{0,T}$, $\Pbb_t$ admits a smooth positive
-    density w.r.t. $\piinv$ denoted $p_t$ and the divergence theorem, see
-    \citep[p.51]{lee2018introduction}, we have that for any $t \in \ccint{0,T}$,
-\begin{align}
-  &    \textstyle{\int_{\M} \{ \langle \beta_{R(\Pbb)}(T-t, x), \nabla u(x) \rangle + \langle b(x), \nabla u(x) \rangle \} v(x) p_t(x) \rmd \piinv(x)} \\
-    & \qquad \qquad \qquad \qquad = \textstyle{\int_\M \langle \nabla u(x) p_t(x), \nabla v(x) \rangle \rmd \piinv(x) } \\
-   & \qquad \qquad \qquad \qquad = - \textstyle{\int_\M \{ \Delta u (x) + \langle \nabla \log p_t(x), \nabla u(x) \rangle \} v(x) p_t(x)\rmd \piinv(x) }  . 
-\end{align}
-Therefore, we get that for any $t \in \ccint{0,T}$ and $x \in \M$,
-$\langle \beta_{R(\Pbb)}(T-t, x), \nabla u(x) \rangle = \langle 
--b(x) + \nabla\log p_t(x), \nabla u(x) \rangle$, which concludes the proof.
-
-    
-%%% Local Variables:
-%%% mode: latex
-%%% TeX-master: "main"
-%%% End:
diff --git a/doc/tmlr.sty b/doc/tmlr.sty
deleted file mode 100644
index 75d8e28..0000000
--- a/doc/tmlr.sty
+++ /dev/null
@@ -1,188 +0,0 @@
-%%%% TMLR Macros (LaTex)
-%%%% Adapted by Hugo Larochelle and Fabian Pedregosa from the 
-%%%% ICLR stylefile Macros and borrowing from the JMLR Macros
-%%%% Style File
-
-%%%% Last edited, January 2021 by Chris J. Maddison
-%%%%      Change font choice
-
-% Change the overall width of the page.  If these parameters are
-%       changed, they will require corresponding changes in the
-%       maketitle section.
-%
-\usepackage{eso-pic} % used by \AddToShipoutPicture
-\RequirePackage{fancyhdr}
-\RequirePackage{natbib}
-
-\usepackage[T1]{fontenc}
-\usepackage{lmodern}
-
-% modification to natbib citations
-\setcitestyle{authoryear,round,citesep={;},aysep={,},yysep={;}}
-
-\renewcommand{\topfraction}{0.95}   % let figure take up nearly whole page
-\renewcommand{\textfraction}{0.05}  % let figure take up nearly whole page
-
-
-%%%%%%%% Options
-\newif\if@accepted\@acceptedfalse
-\DeclareOption{accepted}{%
-    \@acceptedtrue
-}
-
-\DeclareOption*{\PackageWarning{tmlr}{Unknown ‘\CurrentOption’}}
-\ProcessOptions\relax
-
-% Specify the dimensions of each page
-
-\setlength{\paperheight}{11in}
-\setlength{\paperwidth}{8.5in}
-
-
-\oddsidemargin 0in    %   Note \oddsidemargin = \evensidemargin
-\evensidemargin 0in
-\marginparwidth 0.07 true in
-%\marginparwidth 0.75 true in
-%\topmargin 0 true pt           % Nominal distance from top of page to top of
-%\topmargin 0.125in
-\topmargin -0.625in
-\addtolength{\headsep}{0.25in}
-\textheight 9.0 true in       % Height of text (including footnotes & figures)
-\textwidth 6.5 true in        % Width of text line.
-\widowpenalty=10000
-\clubpenalty=10000
-
-
-% \thispagestyle{empty}        \pagestyle{empty}
-\flushbottom \sloppy
-
-% We're never going to need a table of contents, so just flush it to
-% save space --- suggested by drstrip@sandia-2
-\def\addcontentsline#1#2#3{}
-
-% Title stuff, taken from deproc.
-\def\maketitle{\par
-\begingroup
-   \def\thefootnote{\fnsymbol{footnote}}
-   \def\@makefnmark{\hbox to 0pt{$^{\@thefnmark}$\hss}} % for perfect author
-                                                        % name centering
-%   The footnote-mark was overlapping the footnote-text,
-%   added the following to fix this problem               (MK)
-   \long\def\@makefntext##1{\parindent 1em\noindent
-                            \hbox to1.8em{\hss $\m@th ^{\@thefnmark}$}##1}
-   \@maketitle \@thanks
-\endgroup
-\setcounter{footnote}{0}
-\let\maketitle\relax \let\@maketitle\relax
-\gdef\@thanks{}\gdef\@author{}\gdef\@title{}\let\thanks\relax}
-
-\newlength\aftertitskip     \newlength\beforetitskip
-\newlength\interauthorskip  \newlength\aftermaketitskip
-
-%% Changeable parameters.
-\setlength\aftertitskip{0.3in plus 0.2in minus 0.2in}
-\setlength\beforetitskip{0.05in plus 0.08in minus 0.08in}
-\setlength\interauthorskip{0.08in plus 0.1in minus 0.1in}
-\setlength\aftermaketitskip{0.3in plus 0.1in minus 0.1in}
-
-\def\@startauthor{\noindent \normalsize\bf}
-\def\@endauthor{}
-
-\def\addr{\small\it}%
-\def\email{\hfill\small\it}%
-\def\name{\normalsize\bf}%
-\def\name{\normalsize\bf}%
-\def\AND{\@endauthor\rm\hss \vskip \interauthorskip \@startauthor}
-
-% The toptitlebar has been raised to top-justify the first page
-
-\usepackage{fancyhdr}
-\pagestyle{fancy}
-\fancyhead{}
-
-% Title (includes both anonimized and non-anonimized versions)
-\def\@maketitle{\vbox{\hsize\textwidth
-%\linewidth\hsize \vskip 0.1in \toptitlebar \centering
-{\LARGE\bf\sffamily \@title\par}\vskip \aftertitskip
-%\bottomtitlebar % \vskip 0.1in %  minus
-\if@accepted
-    \lhead{Published in Transations on Machine Learning Research (\month/\year)}
-    \@startauthor \@author \\ \\ {\bf Reviewed on OpenReview:} \openreview \@endauthor
-\else
-       \lhead{Under review as submission to TMLR}
-   
-    \@startauthor Anonymous authors\\Paper under double-blind review \@endauthor
-\fi
-\vskip 0.3in minus 0.1in}}
-
-\renewenvironment{abstract}{\vskip.075in\centerline{\large\bf\sffamily
-Abstract}\vspace{0.5ex}\begin{quote}}{\par\end{quote}\vskip 1ex}
-
-% sections with less space
-\def\section{\@startsection {section}{1}{\z@}{-2.0ex plus
-    -0.5ex minus -.2ex}{1.5ex plus 0.3ex
-minus0.2ex}{\large\bf\raggedright\sffamily}}
-
-\def\subsection{\@startsection{subsection}{2}{\z@}{-1.8ex plus
--0.5ex minus -.2ex}{0.8ex plus .2ex}{\normalsize\bf\raggedright\sffamily}}
-\def\subsubsection{\@startsection{subsubsection}{3}{\z@}{-1.5ex
-plus      -0.5ex minus -.2ex}{0.5ex plus
-.2ex}{\normalsize\bf\raggedright\sffamily}}
-\def\paragraph{\@startsection{paragraph}{4}{\z@}{1.5ex plus
-0.5ex minus .2ex}{-1em}{\normalsize\bf}}
-\def\subparagraph{\@startsection{subparagraph}{5}{\z@}{1.5ex plus
-  0.5ex minus .2ex}{-1em}{\normalsize\bf}}
-\def\subsubsubsection{\vskip
-5pt{\noindent\normalsize\rm\raggedright\sffamily}}
-
-
-% Footnotes
-\footnotesep 6.65pt %
-\skip\footins 9pt plus 4pt minus 2pt
-\def\footnoterule{\kern-3pt \hrule width 12pc \kern 2.6pt }
-\setcounter{footnote}{0}
-
-% Lists and paragraphs
-\parindent 0pt
-\topsep 4pt plus 1pt minus 2pt
-\partopsep 1pt plus 0.5pt minus 0.5pt
-\itemsep 2pt plus 1pt minus 0.5pt
-\parsep 2pt plus 1pt minus 0.5pt
-\parskip .5pc
-
-
-%\leftmargin2em
-\leftmargin3pc
-\leftmargini\leftmargin \leftmarginii 2em
-\leftmarginiii 1.5em \leftmarginiv 1.0em \leftmarginv .5em
-
-%\labelsep \labelsep 5pt
-
-\def\@listi{\leftmargin\leftmargini}
-\def\@listii{\leftmargin\leftmarginii
-   \labelwidth\leftmarginii\advance\labelwidth-\labelsep
-   \topsep 2pt plus 1pt minus 0.5pt
-   \parsep 1pt plus 0.5pt minus 0.5pt
-   \itemsep \parsep}
-\def\@listiii{\leftmargin\leftmarginiii
-    \labelwidth\leftmarginiii\advance\labelwidth-\labelsep
-    \topsep 1pt plus 0.5pt minus 0.5pt
-    \parsep \z@ \partopsep 0.5pt plus 0pt minus 0.5pt
-    \itemsep \topsep}
-\def\@listiv{\leftmargin\leftmarginiv
-     \labelwidth\leftmarginiv\advance\labelwidth-\labelsep}
-\def\@listv{\leftmargin\leftmarginv
-     \labelwidth\leftmarginv\advance\labelwidth-\labelsep}
-\def\@listvi{\leftmargin\leftmarginvi
-     \labelwidth\leftmarginvi\advance\labelwidth-\labelsep}
-
-\abovedisplayskip 7pt plus2pt minus5pt%
-\belowdisplayskip \abovedisplayskip
-\abovedisplayshortskip  0pt plus3pt%
-\belowdisplayshortskip  4pt plus3pt minus3pt%
-
-
-\def\toptitlebar{\hrule height4pt\vskip .25in\vskip-\parskip}
-
-\def\bottomtitlebar{\vskip .29in\vskip-\parskip\hrule height1pt\vskip
-.09in} %
diff --git a/doc/xp.tex b/doc/xp.tex
deleted file mode 100644
index f90d5ae..0000000
--- a/doc/xp.tex
+++ /dev/null
@@ -1,8 +0,0 @@
-\section{Experiments}
-\label{sec:experiments}
-
-
-%%% Local Variables:
-%%% mode: latex
-%%% TeX-master: "main_arxiv"
-%%% End:
diff --git a/requirements.txt b/requirements.txt
index 8b376ee..2a9406f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,8 +1,5 @@
 --find-links https://storage.googleapis.com/jax-releases/jax_releases.html
-# jax[cuda11_cudnn805]
-# jaxlib==0.1.74+cuda11.cudnn805  # not sure that version will work on other zizgpu0x
-jax==0.3.1+cuda11.cudnn805
-# jaxlib==0.3.0+cuda11.cudnn805  # not sure that version will work on other zizgpu0x
+jax[cuda11_cudnn805]==0.3.1
 dm-haiku
 optax
 fsspec[http]>=2021.05.0, !=2021.06.0
diff --git a/score_sde/losses.py b/score_sde/losses.py
index 9aa107d..11bf2d9 100644
--- a/score_sde/losses.py
+++ b/score_sde/losses.py
@@ -200,7 +200,7 @@ def step_fn(carry_state: Tuple[jax.random.KeyArray, TrainState], batch: dict):
             updates, new_opt_state = optimizer.update(grad, train_state.opt_state)
             new_parmas = optax.apply_updates(params, updates)
 
-            new_params_ema = jax.tree_multimap(
+            new_params_ema = jax.tree_map(
                 lambda p_ema, p: p_ema * train_state.ema_rate
                 + p * (1.0 - train_state.ema_rate),
                 train_state.params_ema,
diff --git a/score_sde/ode.py b/score_sde/ode.py
index 660b5dc..a727d51 100644
--- a/score_sde/ode.py
+++ b/score_sde/ode.py
@@ -17,7 +17,6 @@
 import jax
 import jax.numpy as np
 from jax import lax
-from jax import ops
 from jax.util import safe_map, safe_zip
 from jax.flatten_util import ravel_pytree
 from jax.tree_util import tree_map
@@ -109,9 +108,10 @@ def body_fun(i, k):
     ti = t0 + dt * alpha[i-1]
     yi = y0 + dt * np.dot(beta[i-1, :], k)
     ft = func(yi, ti)
-    return ops.index_update(k, jax.ops.index[i, :], ft)
+    return k.at[i, :].set(ft)
 
-  k = ops.index_update(np.zeros((7, f0.shape[0])), ops.index[0, :], f0)
+  # k = ops.index_update(np.zeros((7, f0.shape[0])), ops.index[0, :], f0)
+  k = np.zeros((7, f0.shape[0])).at[0, :].set(f0)
   k = lax.fori_loop(1, 7, body_fun, k)
 
   y1 = dt * np.dot(c_sol, k) + y0
@@ -134,9 +134,9 @@ def body_fun(i, k):
     ti = t0 + dt * alpha[i-1]
     yi = y0 + dt * np.dot(beta[i-1, :], k)
     ft = func(yi, ti)
-    return ops.index_update(k, jax.ops.index[i, :], ft)
+    return k.at[i, :].set(ft)
 
-  k = ops.index_update(np.zeros((4, f0.shape[0])), ops.index[0, :], f0)
+  k = np.zeros((4, f0.shape[0])).at[0,:].set(f0)
   k = lax.fori_loop(1, 4, body_fun, k)
 
   y1 = dt * np.dot(c_sol, k) + y0
@@ -157,9 +157,9 @@ def body_fun(i, k):
     ti = t0 + dt * alpha[i-1]
     yi = y0 + dt * np.dot(beta[i-1, :], k)
     ft = func(yi, ti)
-    return ops.index_update(k, jax.ops.index[i, :], ft)
+    return k.at[i, :].set(ft)
 
-  k = ops.index_update(np.zeros((2, f0.shape[0])), ops.index[0, :], f0)
+  k = np.zeros((2, f0.shape[0])).at[0, :].set(f0)
   k = lax.fori_loop(1, 2, body_fun, k)
 
   y1 = dt * np.dot(c_sol, k) + y0
@@ -181,9 +181,9 @@ def body_fun(i, k):
     ti = t0 + dt * alpha[i-1]
     yi = y0 + dt * np.dot(beta[i-1, :], k)
     ft = func(yi, ti)
-    return ops.index_update(k, jax.ops.index[i, :], ft)
+    return k.at[i, :].set(ft) 
 
-  k = ops.index_update(np.zeros((3, f0.shape[0])), ops.index[0, :], f0)
+  k = np.zeros((3, f0.shape[0])).at[0, :].set(f0)
   k = lax.fori_loop(1, 3, body_fun, k)
 
   y1 = dt * np.dot(c_sol, k) + y0
@@ -208,9 +208,9 @@ def body_fun(i, k):
     ti = t0 + dt * alpha[i-1]
     yi = y0 + dt * np.dot(beta[i-1, :], k)
     ft = func(yi, ti)
-    return ops.index_update(k, jax.ops.index[i, :], ft)
+    return k.at[i, :].set(ft)
 
-  k = ops.index_update(np.zeros((6, f0.shape[0])), ops.index[0, :], f0)
+  k = np.zeros((6, f0.shape[0])).at[0, :].set(f0)
   k = lax.fori_loop(1, 6, body_fun, k)
 
   y1 = dt * np.dot(c_sol, k) + y0
@@ -235,9 +235,9 @@ def body_fun(i, k):
     ti = t0 + dt * alpha[i-1]
     yi = y0 + dt * np.dot(beta[i-1, :], k)
     ft = func(yi, ti)
-    return ops.index_update(k, jax.ops.index[i, :], ft)
+    return k.at[i, :].set(ft)
 
-  k = ops.index_update(np.zeros((6, f0.shape[0])), ops.index[0, :], f0)
+  k = np.zeros((6, f0.shape[0])).at[0, :].set(f0)
   k = lax.fori_loop(1, 6, body_fun, k)
 
   y1 = dt * np.dot(c_sol, k) + y0
@@ -262,9 +262,9 @@ def body_fun(i, k):
     ti = t0 + dt * alpha[i-1]
     yi = y0 + dt * np.dot(beta[i-1, :], k)
     ft = func(yi, ti)
-    return ops.index_update(k, jax.ops.index[i, :], ft)
+    return k.at[i, :].set(ft)
 
-  k = ops.index_update(np.zeros((6, f0.shape[0])), ops.index[0, :], f0)
+  k = np.zeros((6, f0.shape[0])).at[0, :].set(f0)
   k = lax.fori_loop(1, 6, body_fun, k)
 
   y1 = dt * np.dot(c_sol, k) + y0
@@ -291,9 +291,9 @@ def body_fun(i, k):
     ti = t0 + dt * alpha[i-1]
     yi = y0 + dt * np.dot(beta[i-1, :], k)
     ft = func(yi, ti)
-    return ops.index_update(k, jax.ops.index[i, :], ft)
+    return k.at[i, :].set(ft)
 
-  k = ops.index_update(np.zeros((8, f0.shape[0])), ops.index[0, :], f0)
+  k = np.zeros((8, f0.shape[0])).at[0, :].set(f0)
   k = lax.fori_loop(1, 8, body_fun, k)
 
   y1 = dt * np.dot(c_sol, k) + y0
@@ -353,9 +353,9 @@ def body_fun(i, k):
     ti = t0 + dt * alpha[i-1]
     yi = y0 + dt * np.dot(beta[i-1, :], k)
     ft = func(yi, ti)
-    return ops.index_update(k, jax.ops.index[i, :], ft)
+    return k.at[i, :].set(ft)
 
-  k = ops.index_update(np.zeros((10, f0.shape[0])), ops.index[0, :], f0)
+  k = np.zeros((10, f0.shape[0])).at[0, :].set(f0)
   k = lax.fori_loop(1, 10, body_fun, k)
 
   y1 = dt * np.dot(c_sol, k) + y0
@@ -370,27 +370,27 @@ def _g_and_explicit_phi(prev_t, next_t, implicit_phi, k):
   beta = 1.
 
   explicit_phi = np.zeros_like(implicit_phi)
-  explicit_phi = jax.ops.index_update(explicit_phi, 0, implicit_phi[0])
+  explicit_phi = explicit_phi.at[0].set(implicit_phi[0])
 
   c = 1 / np.arange(1, _ADAMS_MAX_ORDER + 2)
 
   g = np.zeros(_ADAMS_MAX_ORDER + 1)
-  g = jax.ops.index_update(g, 0, 1)
+  g = g.at[0].set(1)
 
   def body_fun(i, val):
     beta, explicit_phi, c, g = val
 
     beta = (next_t - prev_t[i - 1]) / (curr_t - prev_t[i]) * beta
-    explicit_phi = jax.ops.index_update(explicit_phi, i, implicit_phi[i] * beta)
+    explicit_phi = explicit_phi.at[i].set(implicit_phi[i] * beta)
 
     idxs = np.arange(_ADAMS_MAX_ORDER + 1)
     c_q = np.where(idxs < k - i + 1, c, 0)   # c[:k - i + 1]
     c_q_1 = np.where(idxs < k + 1 - i + 1, np.where(idxs >= 1, c, 0), 0)  # c[1:k + 1 - i + 1]
     # shift so that it lines up with diff1
-    c_q_1 = jax.ops.index_update(c_q_1, jax.ops.index[:-1], c_q_1[1:])
+    c_q_1 = c_q_1.at[:-1].set(c_q_1[1:])
     # c[:k - i + 1] - c[1:k + 1 - i + 1]
     c = lax.cond(i == 1, None, lambda _: c_q - c_q_1, None, lambda _: c_q - c_q_1 * dt / (next_t - prev_t[i - 1]))
-    g = jax.ops.index_update(g, i, c[0])
+    g = g.at[i].set(c[0])
 
     val = beta, explicit_phi, c, g
     return val
@@ -398,18 +398,18 @@ def body_fun(i, val):
   beta, explicit_phi, c, g = lax.fori_loop(1, k, body_fun, (beta, explicit_phi, c, g))
 
   # do the c and g update for i = k
-  c = jax.ops.index_update(c, jax.ops.index[:1], c[:1] - c[1:2] * dt / (next_t - prev_t[k - 1]))
-  g = jax.ops.index_update(g, k, c[0])
+  c = c.at[:1].set(c[:1] - c[1:2] * dt / (next_t - prev_t[k - 1]))
+  g = g.at[k].set(c[0])
 
   return g, explicit_phi
 
 def _compute_implicit_phi(explicit_phi, f_n, phi_order, k):
   k = lax.min(phi_order + 1, k)
   implicit_phi = np.zeros_like(explicit_phi)
-  implicit_phi = jax.ops.index_update(implicit_phi, 0, f_n)
+  implicit_phi = implicit_phi.at[0].set(f_n)
   def body_fun(i, val):
     implicit_phi = val
-    implicit_phi = jax.ops.index_update(implicit_phi, i, implicit_phi[i - 1] - explicit_phi[i - 1])
+    implicit_phi = implicit_phi.at[i].set(implicit_phi[i - 1] - explicit_phi[i - 1])
     return implicit_phi
   implicit_phi = lax.fori_loop(1, k, body_fun, implicit_phi)
   return implicit_phi
@@ -486,11 +486,11 @@ def accept(tpl):
 
     # shift right and insert at 0
 
-    prev_f = jax.ops.index_update(prev_f, jax.ops.index[1:], prev_f[:-1])
-    prev_f = jax.ops.index_update(prev_f, 0, next_f0)
+    prev_f = prev_f.at[1:].set(prev_f[:-1])
+    prev_f = prev_f.at[0].set(next_f0)
 
-    prev_t = jax.ops.index_update(prev_t, jax.ops.index[1:], prev_t[:-1])
-    prev_t = jax.ops.index_update(prev_t, 0, next_t)
+    prev_t = prev_t.at[1:].set(prev_t[:-1])
+    prev_t = prev_t.at[0].set(next_t)
 
     return p_next, prev_f, prev_t, next_t + dt_next, implicit_phi, next_order, 2
 
@@ -1402,13 +1402,13 @@ def body_fun(state):
   dt = initial_step_size(func_, ts[0], y0, 4, rtol, atol, f0)
 
   prev_f = np.empty((_ADAMS_MAX_ORDER + 1, ode_dim))
-  prev_f = jax.ops.index_update(prev_f, 0, f0)
+  prev_f = prev_f.at[0].set(f0)
 
   prev_t = np.empty(_ADAMS_MAX_ORDER + 1)
-  prev_t = jax.ops.index_update(prev_t, 0, t0)
+  prev_t = prev_t.at[0].set(t0)
 
   prev_phi = np.empty((_ADAMS_MAX_ORDER, ode_dim))
-  prev_phi = jax.ops.index_update(prev_phi, 0, f0)
+  prev_phi = prev_phi.at[0].set(f0)
 
   next_t = t0 + dt
   init_order = 1