\documentclass[10pt,a4paper]{article}
\usepackage[utf8]{inputenc}
\usepackage{amsmath}
\usepackage{amsfonts}
\usepackage{amssymb}
\usepackage{dsfont}
\usepackage{fancyhdr}
\usepackage{indentfirst}
\usepackage{graphicx}
\usepackage{newlfont}
\usepackage{amssymb}
\usepackage{amsmath}
\usepackage{latexsym}
\usepackage{amsthm}
\usepackage{mathtools}
\usepackage{nicefrac}
\usepackage{epstopdf}
\usepackage{caption}
\usepackage{subcaption}
\usepackage{color}
\usepackage{dsfont}
\usepackage{comment}
\usepackage[left=2cm,right=2cm,top=2cm,bottom=2cm]{geometry}
\usepackage{hyperref}
%\hypersetup{
%    colorlinks=true,
%    linkcolor=blue,
%    filecolor=magenta,      
%    urlcolor=cyan,
%    pdftitle={Overleaf Example},
%    pdfpagemode=FullScreen,
%    }


\newtheorem{theorem}{Theorem}
\newtheorem{proposition}[theorem]{Proposition}
\newtheorem{corollary}[theorem]{Corollary}
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{definition}[theorem]{Definition}
\newtheorem{assumption}[theorem]{Assumption}


\newtheorem{remark}[theorem]{Remark}
\newtheorem{example}[theorem]{Example}
\newtheorem{notation}{Notation}

\def \R {\mathbb{R}}
\def \N {\mathbb{N}}
\def \d {\mathrm{d}}
\newcommand{\red}[1]{\textcolor{red}{#1}}

\begin{document}
To start off we define the notation for the objects we will work on
$$
dX_t = B(t,X_t,\mu_{X_t})dt + \Sigma(t,X_t,\mu_{X_t})dW_t,\qquad X_0\sim\mu_0.
$$
Given the flow of marginals $\mu_t$ we can fix the coefficients and linearize the SDE with the linearized coefficients $B^\mu(t,x)$ and $\Sigma^\mu(t,x)$. Using this, we may define the infinitesimal generator
$$
\mathcal{A}_t^\mu=\frac{1}{2}\sum_{i,j=1}^N c^\mu_{ij}(t,x)\partial_{x_ix_j}+\sum_{i=1}^NB^\mu_i(t,x)\partial_{x_i}.
$$
Given this operator, under reasonable assumptions we have the existance of $p(s,x;t,y)$ fundamental solution of 
\begin{align*}
(\partial_s+\mathcal{A}^\mu_s)p^\mu(s,x;t,y)&=0,\\
(\partial_t-(\mathcal{A}^\mu_t)^*)p^\mu(s,x;t,y)&=0.
\end{align*}
Having the transition density $p^\mu$ we may define the forward translation operator
$$
U_\mu^{t,s}\phi(y)=\int p^\mu(s,x;t,y)\phi(x)dx,
$$
whose definition may be easily extended to $\mathcal{P}^2(\R^N)$ due to the gaussian estimates on $p$ (which are uniform over the choice of $\mu_t$):
$$
U_\mu^{t,s}u(y)=\int p^\mu(s,x;t,y)u(dx),\qquad u\in\mathcal{P}^2(\R^N).
$$
Via this operator we may define
$$
u^\mu_t(x)=U_\mu^{t,0}\mu_0,
$$
the density of the solution of the linearized SDE via the marginal flow $(\mu_t)_{t\in[0,T]}$ with initial law $\mu_0$.
Via this density we are able to construct a new flow of marginals (the one of the solution of the linearized SDE via marginal flow $\mu_t$ and initial law $\mu_0$):
$$
\mathcal{L}^\mu_t(dy) = u^\mu_t(y)dy = \left(\int p^\mu(0,x;t,y)\mu_0(dx)\right)dy.
$$
\red{
Here we will briefly state what \cite{Kolokoltsov} does to study the contraction properties on $L^1$ norm of $u^\mu$.
To start we need the identity\footnote{In my calculations I get the adjoint operator $(\mathcal{A}_s)^*$ but in Kolokoltsov's paper there is the backward one.} (28) of \cite{Kolokoltsov}:
\begin{equation}\label{e1}
U^{t,0}_\mu - U^{t,0}_\nu = \int_0^t\frac{d}{ds}U^{t,s}_\nu U^{s,0}_\mu ds = \int_0^t U^{t,s}_\nu((\mathcal{A}^{\mu}_s)^*-(\mathcal{A}^{\nu}_s)^*)U^{s,0}_\mu ds.
\end{equation}
Then we need to observe that
\begin{align}
||U^{t,s} f||_{L^1} &= \int \left| \int p(s,x;t,y)f(x)dx\right|dy\nonumber\\
&\stackrel{Gaussian\ estimates}{\leq}C\int\int |f(x+y)|\Gamma^+(y)dy dx = C||f||_{L^1}.\label{e2}
\end{align}
Also observe that
\begin{align*}
||(\mathcal{A}^\mu_s-\mathcal{A}^\nu_s)f||_{L^1}\leq C\sup_{t,x}\left(|c^\mu(t,x)-c^\nu(t,x)|+|B^\mu(t,x)-B^\nu(t,x)|\right)||f||_{W^{1,2}},
\end{align*}
and by the fact that the operator $U^{s,0}_\mu$ is a bounded operator in $W^{1,2}$, check footnote\footnote{I am unsure of this passage, we don't actually need the $0$-derivative and due to gaussian estimates we should get something similar to $||\partial U\ f||_{L^1}\leq s^{-1/2}||f||_{L^1}$ and $||\partial^2 U\ f||_{L^1}\leq s^{-1}||f||_{L^1}$ where the second derivative is no longer integrable wrt $s$. I think something similar to (24.2.7) in dispense is happening.} we get
\begin{equation}\label{e3}
||U^{s,0}_\mu f||_{W^{1,2}}\leq C\cdot s^{-1/2}||f||_{W^{1,2}}.
\end{equation}
By \eqref{e1}, \eqref{e2} and \eqref{e3} we get 
\begin{align}\nonumber
||u^\mu_t-u^\nu_t||_{L^1}=||U^{t,0}_\mu\mu_0-U^{t,0}_\mu\mu_0||_{L^1}&\leq C\int_0^t s^{-1/2}ds ||\mu_0||_{L^1}\sup_{t,x}\left(|c^\mu(t,x)-c^\nu(t,x)|+|B^\mu(t,x)-B^\nu(t,x)|\right)\\
&\leq C\sqrt{t} ||\mu_0||_{L^1}\sup_{t,x}\left(|c^\mu(t,x)-c^\nu(t,x)|+|B^\mu(t,x)-B^\nu(t,x)|\right).\label{e4}
\end{align}
Now that we briefly stated the ideas of \cite{Kolokoltsov} we can begin.}

To leave as many doors open as possible we first define

$$
I(f) := \int f(x)(u^\mu_t(x)-u^\nu_t(x))dx,
$$

which at the moment may be seen as an indicator of closeness between the two densities, at a later moment we will take the $\sup$ for $f$ in some bounded functional space like the bounded Holder functions or the bounded functions. 

%Now we will change a bit what \cite{Kolokoltsov} did to work in the Wasserstein framework. To start off we apply the duality formula for the Kantorovich–Rubinstein distance (Remark 6.5 of \cite{Villani}):
%\begin{align*}
%W^{(1)}(\mathcal{L}^\mu_t,\mathcal{L}^\nu_t) = \sup_{||f||_{Lip}\leq1}\left(\int f(x)(u^\mu_t(x)-u^\nu_t(x))dx\right)=:\sup_{||f||_{Lip}\leq1}I(f).
%\end{align*}
Now we apply this useful trick: if we write explicitly the definition of $u^\mu$ as the evaluation of $p^\mu$ on the distribution $\mu_0$ in $I(f)$ we can change the order of integration to evaluate $p^\mu$ on the regular distribution $f(x)dx$, this is useful because it switches the operators in Kolokoltsov's formula \eqref{e1} from being forward to being backwards while changing only marginally everything else. If we define the backward propagator operator
\begin{equation}\label{e4-1}
V^{s,t}_\mu g(y):=\int p^\mu(s,y;t,x)g(x)dx,
\end{equation}
we can expand $I(f)$ this way
\begin{align*}
I(f) &= \int\int f(x)(p^\mu-p^\nu)(0,y;t,x)\mu_0(dy)dx = \mu_0\left(\int f(x)(p^\mu-p^\nu)(0,\cdot;t,x)dx\right)\\
&=\mu_0\left(\int_0^t\frac{d}{ds}\left(\int\int p^\mu(0,\cdot;s,z)p^\nu(s,z;t,x)f(x)dxdz\right)ds\right)\\
&=\mu_0\left(\int_0^t \int\int\partial_{t_2}p^\mu(0,\cdot;s,z)p^\nu(s,z;t,x)f(x)dxdz + \int\int p^\mu(0,\cdot;s,z)\partial_{t_1}p^\nu(s,z;t,x)f(x)dxdz ds\right)
\intertext{by the fact that $p$ is the fundamental solution for both the forward and backward PDEs}
&=\mu_0\left(\int_0^t \int\int(\mathcal{A}_s^\mu)^* p^\mu(0,\cdot;s,z)p^\nu(s,z;t,x)f(x)dxdz - \int\int p^\mu(0,\cdot;s,z)\mathcal{A}_s^\nu p^\nu(s,z;t,x)f(x)dxdz ds\right)\\
&=\mu_0\left(\int_0^t \int(\mathcal{A}_s^\mu)^* p^\mu(0,\cdot;s,z)V_\nu^{s,t}f(z)dz - \int (\mathcal{A}_s^\nu)^*p^\mu(0,\cdot;s,z) V_\nu^{s,t}f(z)dz ds\right)\\
&=\mu_0\left(\int_0^t \int p^\mu(0,\cdot;s,z)\left(\mathcal{A}_s^\mu - \mathcal{A}_s^\nu\right)V_\nu^{s,t}f(z)dz ds\right)\\
&=\mu_0\left(\int_0^t V_\mu^{0,s}\left(\mathcal{A}_s^\mu - \mathcal{A}_s^\nu\right)V_\nu^{s,t}f ds\right)=\int_0^t \mu_0\left(V_\mu^{0,s}\left(\mathcal{A}_s^\mu - \mathcal{A}_s^\nu\right)V_\nu^{s,t}f \right) ds.
\end{align*}
Using similar arguments it is possible to obtain also Kolokoltsov's formula \eqref{e1}:
\begin{equation*}
I(f) = \int_0^t f\left(U_\nu^{t,s}\left((\mathcal{A}_s^\mu)^* - (\mathcal{A}_s^\nu)^*\right)U_\mu^{s,0}\mu_0 \right) ds.
\end{equation*}

Now we can try to estimate $I(f)$:
\begin{align}\nonumber
I(f)&\leq \int_0^t \left|\mu_0\left( V^{0,s}_\mu\left( \mathcal{A}^\mu_s - \mathcal{A}^\nu_s \right)V^{s,t}_\nu f\right)\right| ds,
\intertext{since $\mu_0$ is a probability measure we can bound $\mu_0(g)$ with the uniform bound of $g$: $\mu_0(g)\leq |g|_\infty$:}
&\leq \int_0^t \sup_x\left(V^{0,s}_\mu\left( \mathcal{A}^\mu_s - \mathcal{A}^\nu_s \right)V^{s,t}_\nu f(x)\right) ds.\label{f1}
\end{align}
We observe that due to Holder's inequality and the fact that $p^\mu(0,x;s,y)dy$ is a probability measure for any fixed $x$ we have uniformly in $x$
$$
|V^{0,s}_\mu g(x)|=\left| \int p^\mu(0,x;s,y)g(y)dy \right|\leq ||g||_{L^\infty}.
$$
Thus continuing from \eqref{f1} we have
\begin{align*}
I(f)&\leq \int_0^t ||\left( \mathcal{A}^\mu_s - \mathcal{A}^\nu_s \right)V^{s,t}_\nu f(x)||_{L^\infty} ds\leq \\
&\leq \sup_{s,x}\left(|B^\mu(s,x)-B^\nu(s,x)|+|c^\mu(s,x)-c^\nu(s,x)|\right)\int_0^t ||\bigtriangledown V^{s,t}_\nu f||_{L^\infty} + ||Hess\ V^{s,t}_\nu f||_{L^\infty} ds.
\end{align*}

\begin{theorem}\label{a1}
\begin{comment}
If $f$ is a Lipschitz function with $[f]_{Lip}\leq 1$ then
\begin{enumerate}
\item $||Hess\ V^{s,t}_\nu f||_{L^\infty}\leq \frac{C}{\sqrt{t-s}}$.
\item $||\bigtriangledown V^{s,t}_\nu f||_{L^\infty}\leq C$.
\end{enumerate}
\end{comment}
If $f$ is a $C^\alpha_B$ function with $||f||_{C^\alpha_B}\leq 1$ then
\begin{enumerate}
\item $||Hess\ V^{s,t}_\nu f||_{L^\infty}\leq \frac{C}{|t-s|^{1-\frac{\alpha}{2}}}$.
\end{enumerate}
\end{theorem}
\begin{proof}
\begin{comment}
Let's first tackle the case with $f$ Lipschitz. Let $x\in\R^N$. We have
\begin{align*}
\left| \partial_{x_ix_j}V^{s,t}_\nu f (x) \right|&=\left| \int\partial_{x_i x_j}p^\nu(s,x;t,y)f(y)dy \right|\leq \left| \int\partial_{x_i x_j}p^\nu(s,x;t,y)(f(y)-f(x))dy \right|+ \left| \int\partial_{x_i x_j}p^\nu(s,x;t,y)dyf(x) \right|\\
&\leq \int |\partial_{x_ix_j}p^\nu(s,x;t,y)||x-y|dy + \left| \partial_{x_ix_j}\underbrace{\int p^\nu(s,x;t,y) dy}_{=1} \right||f(x)|\\
&\leq \frac{C}{|t-s|}\int \Gamma^+(t-s,x-y)|x-y|dy + 0\leq \frac{C}{\sqrt{t-s}}.
\end{align*}
The second inequality for the Lipschitz case is done in a completely analogous manner. Let's consider the Holder case
\end{comment}
\begin{align*}
\left| \partial_{x_ix_j}V^{s,t}_\nu f (x) \right|&=\left| \int\partial_{x_i x_j}p^\nu(s,x;t,y)f(y)dy \right|\leq \left| \int\partial_{x_i x_j}p^\nu(s,x;t,y)(f(y)-f(e^{(t-s)B}x))dy \right|+ \left| \int\partial_{x_i x_j}p^\nu(s,x;t,y)dyf(e^{(t-s)B}x) \right|\\
&\leq \int |\partial_{x_ix_j}p^\nu(s,x;t,y)||e^{(t-s)B}x-y|^\alpha_B dy + \left| \partial_{x_ix_j}\underbrace{\int p^\nu(s,x;t,y) dy}_{=1} \right||f(e^{(t-s)B}x)|\\
&\leq \frac{C_{B,\alpha}}{|t-s|}\int \Gamma^+(t-s,x-y)|x-e^{-(t-s)B}y|^\alpha_B dy + 0\leq \frac{C_{B,\alpha}}{|t-s|^{1-\frac{\alpha}{2}}}.
\end{align*}
by Lemma (A.5) of \cite{LucePagliaPascu}.
\end{proof}

We will now define
$$
d_{C^{\alpha}_B}(\mu,\nu)=\sup_{||f||_{C^{\alpha}_B}\leq 1}\left|\int f(x)\left(\mu(dx)-\nu(dx)\right) \right|
$$
the bounded anisotropic $\alpha$-Holder distance.
\begin{theorem}
The bounded anisotropic $\alpha$-Holder distance metrizes weak convergence of measures. More precisely given $(\mu_n)_{n\in\N}$ and $\mu$ probability measures
$$
d_{C^{\alpha}_B}(\mu_n,\mu)\rightarrow 0 \Leftrightarrow \mu_n\stackrel{d}{\rightarrow}\mu.
$$
\end{theorem}
\begin{proof}
The proof will be divided in two steps and is mostly taken from \url{https://sites.stat.washington.edu/jaw/COURSES/520s/522/HO.522.20/ch11c.pdf}

1) First we prove that
$$
\mu_n\stackrel{d}{\rightarrow}\mu \Leftrightarrow \int f d\mu_n\rightarrow\int f d\mu,\ \forall f\in C^{\alpha}_B.
$$
If $\mu_n\stackrel{d}{\rightarrow}\mu$ then equivalently $\int f\mu_n\rightarrow\int f\mu$ for any function $f\in bC$ which in particular means that it is true for any $f\in C^{\alpha}_B$. The converse is true because if $\int f\mu_n\rightarrow\int f\mu$ for any function $f\in C^{\alpha}_B$ then in particular it is true for any $f\in bLip$ which by Portmanteau's theorem implies weak convergence.

2) We will now prove that
$$
\int f d\mu_n\rightarrow\int fd\mu \ \forall f\in C^{\alpha}_B \Leftrightarrow d_{C^{\alpha}_B}(\mu_n,\mu)\rightarrow0.
$$
The easy implication is the right-to-left one: indeed by comparison theorem
$$
\lim_n \int f(x) \left( \mu_n(dx)-\mu(dx) \right)\leq \lim_n \sup_{||f||_{C^{\alpha}_B}\leq 1}\left|\int f(x) \left( \mu_n(dx)-\mu(dx) \right)\right| = \lim_n d_{C^{\alpha}_B}(\mu_n,\mu)\rightarrow0.
$$
The other way is more challenging, first by continuity from below of probability measures for any fixed $\epsilon>0$ there exists $K$ a compact set such that $\mu(K)>1-\epsilon$. Let $\mathcal{H}=\left\lbrace f\in C^{\alpha}_B\ |\ ||f||_{C^{\alpha}_B}\leq 1 \right\rbrace$, if we restrict each of these functions on $K$ we have that $\mathcal{H}\vert_K$ is totally bounded with respect to the $||\cdot||_\infty$ norm by Ascoli-Arzelà's theorem, in particular $\exists k$ finite and $f_1,\cdots f_k\in \mathcal{H}\vert_K$ such that for any $f\in\mathcal{H}$ $\exists j$ such that $\sup_K|f-f_j|\leq\epsilon$.

Now if we consider $d_B(x,y)=|x-y|_B$ and $K^\epsilon=\left\lbrace x\in\R^N\ |\ d_B(x,K)\leq\epsilon \right\rbrace$ and $f,f_j$ as before we have
$$
\sup_{x\in K^\epsilon}|f(x)-f_j(x)|\leq \sup_{x\in K^\epsilon}\left( |f(x)-f(y_x)| + |f(y_x)-f_j(y_x)| + |f_j(y_x)-f_j(x)|\right)\leq \sup_{x\in K^\epsilon}\left( 2\epsilon^\alpha + \epsilon \right)\leq C_\alpha\epsilon^\alpha.
$$
where $y_x$ is a point in $K$ such that $|x-y|_B<\epsilon$. $C_\alpha$ may be taken uniformly of $\epsilon$ as long as $\epsilon\leq 1$.

Let $g(x)=\max\left( 0, 1-\frac{d_B(x,K)}{\epsilon} \right)$, evidently $g\in bLip\subseteq C^{\alpha}_B$ and $\mathds{1}_K\leq g\leq \mathds{1}_{K^\epsilon}$. Thus by taking $n$ big enough we have by convergence against $C^{\alpha}_B$ functions that
$$
\mu_n(K^\epsilon)\geq\int g(x) \mu_n(dx) > 1 - 2\epsilon.
$$
Thus by taking $f\in\mathcal{H}$ and the associated $f_j$ we have
\begin{align*}
\left| \int f(x)\left(\mu_n(dx)-\mu(dx)\right) \right|&= \left| \int (f(x)-f_j(x))\left(\mu_n(dx)-\mu(dx)\right) \right| + \left| \int f_j(x)\left(\mu_n(dx)-\mu(dx)\right) \right|\\
&\leq \left| \int (f(x)-f_j(x))\mu_n(dx) \right| + \left| \int (f(x)-f_j(x))\mu(dx) \right| + \left| \int f_j(x)\left(\mu_n(dx)-\mu(dx)\right) \right|\\
&\leq \left| \int_{K^\epsilon} (f(x)-f_j(x))\mu_n(dx) \right| + \left| \int_{(K^\epsilon)^c} (f(x)-f_j(x))\mu_n(dx) \right| + \left| \int_{K^\epsilon} (f(x)-f_j(x))\mu(dx) \right| + \\
&\qquad + \left| \int_{(K^\epsilon)^c} (f(x)-f_j(x))\mu(dx) \right| + \left| \int f_j(x)\left(\mu_n(dx)-\mu(dx)\right) \right|\\
&\leq C_\alpha\epsilon^\alpha + 4\epsilon + C_\alpha\epsilon^\alpha+2\epsilon+\epsilon\leq C_\alpha\epsilon^\alpha,
\end{align*}
where the last term gets bounded by taking $n$ big enough and by using convergence against $C^{\alpha}_B$ functions, this gives us the final result.
\end{proof}
\begin{theorem}
For small values of $T$; if the coefficients of the SDE are $C^\alpha_B$ functions of $y$ uniformly in $(t,x)$ (the $C^\alpha_B$ norm is uniformly bounded in $(t,x)$) we have that the application $\mathcal{L}:C([0,T],\mathcal{P}(\R^N))\rightarrow C([0,T],\mathcal{P}(\R^N))$ that $\mathcal{L}((\mu_t)_{t\in[0,T]})=(\mathcal{L}^\mu_t)_{t\in[0,T]}$ is a contraction wrt the distance
$$
d((\mu_t)_{t\in[0,T]},(\nu_t)_{t\in[0,T]})=\sup_{t\in[0,T]}d_{C^\alpha_B}(\mu_t,\nu_t).
$$
\end{theorem}
\begin{proof}
We have
\begin{align*}
d_{C^{\alpha}_B}(\mathcal{L}^\mu_t,\mathcal{L}^\nu_t)&=\sup_{||f||_{C^{\alpha}_B}\leq 1}|I(f)|\leq\\
&\stackrel{Th.\ \ref{a1}}{\leq} C\sup_{s,x} \left(|B^\mu(s,x)-B^\nu(s,x)|+|c^\mu(s,x)-c^\nu(s,x)|\right)\int_0^t \left(\frac{1}{|t-s|^{1-\frac{\alpha}{2}}} + \frac{1}{\sqrt{t-s}}\right)ds\\
&\leq C_T|t|^\frac{\alpha}{2}\sup_{s,x} \left(|B^\mu(s,x)-B^\nu(s,x)|+|c^\mu(s,x)-c^\nu(s,x)|\right).
\end{align*}
Now we observe that since the coefficients are uniformly $C^\alpha_B$ we have
$$
|B^\mu(s,x)-B^\nu(s,x)|=\left| \int b(s,x,y)\mu_s(dy) - \int b(s,x,y)\nu_s(dy) \right|\leq C d_{C^{\alpha}_B}(\mu_s,\nu_s),
$$
where $C=||b||_{C^{\alpha}_B}$, a priori it depends on $(s,x)$ but since $b$ uniformly $C^\alpha_B$ it can be taken uniformly in $(s,x)$. It is also possible to prove that
$$
|c^\mu(s,x)-c^\nu(s,x)|\leq C ||\sigma||_{\infty}d_{C^{\alpha}_B}(\mu_s,\nu_s).
$$
with these we can conclude that
$$
d_{C^{\alpha}_B}(\mathcal{L}^\mu_t,\mathcal{L}^\nu_t)\leq C|t|^\frac{\alpha}{2}\sup_{s\in[0,t]} d_{C^{\alpha}_B}(\mu_s,\nu_s),
$$
and thus
$$
\sup_{t\in[0,T]}d_{C^{\alpha}_B}(\mathcal{L}^\mu_t,\mathcal{L}^\nu_t)\leq C|T|^\frac{\alpha}{2}\sup_{t\in[0,T]}d_{C^{\alpha}_B}(\mu_t,\nu_t).
$$
which proves contraction for small valus of $T$.
\end{proof}
\begin{comment}
\red{
We will now define\footnote{to be precise the Wasserstein metric is defined differently, still by the duality formula for the Wasserstein $1$-distance (Remark 6.5 of \cite{Villani}) the two definitions are equivalent}
\begin{align*}
d_{bL}(\mu,\nu)&=\sup_{||f||_{bLip}\leq 1} \left|\int f(x)\left(\mu(dx)-\nu(dx)\right)\right|,\\
W^{(1)}(\mu,\nu)&=\sup_{||f||_{Lip}\leq 1} \left|\int f(x)\left(\mu(dx)-\nu(dx)\right)\right|,
\end{align*}
respectively the bounded Lipschitz distance and the Wasserstein $1$-distance. It is possible to prove that convergence in these distances implies weak convergence of measures.
First let's concentrate on the bounded Lipschitz case, consider the coefficients of the SDE to be bounded and globally Lipschitz in the $y$ variable ($b(t,x,y)$), then we have
\begin{align*}
d_{bL}(\mathcal{L}^\mu_t,\mathcal{L}^\nu_t)&= \sup_{||f||_{bLip}\leq 1}|I(f)|\leq\\
&\stackrel{Assumption\ 1}{\leq} C\sup_{s,x} \left(|B^\mu(s,x)-B^\nu(s,x)|+|c^\mu(s,x)-c^\nu(s,x)|\right)\int_0^t \frac{1}{\sqrt{t-s}}\left(||f||_{L^\infty}+||f||_{Lip}\right)ds\\
\intertext{but the sum of these norms of $f$ is equal to $||f||_{bLip}$ which is less than $1$,}
&\leq C\sqrt{t}\sup_{s,x} \left(|B^\mu(s,x)-B^\nu(s,x)|+|c^\mu(s,x)-c^\nu(s,x)|\right).
\end{align*}
Now we most importantly observe that since the coefficients are bounded and Lipschitz we have
$$
|B^\mu(s,x)-B^\nu(s,x)|=\left| \int b(s,x,y)\mu_s(dy) - \int b(s,x,y)\nu_s(dy) \right|\leq C d_{bL}(\mu_s,\nu_s),
$$
where $C=||b||_{bLip}$, a priori it depends on $(s,x)$ but since $b$ is globally Lipschitz and bounded it can be taken uniformly in $(s,x)$. It is also possible to prove that
$$
|c^\mu(s,x)-c^\nu(s,x)|\leq C ||\sigma||_{\infty}d_{bL}(\mu_s,\nu_s).
$$
with these we can conclude that
$$
d_{bL}(\mathcal{L}^\mu_t,\mathcal{L}^\nu_t)\leq C\sqrt{t}\sup_{s\in[0,t]} d_{bL}(\mu_s,\nu_s),
$$
and thus
$$
\sup_{t\in[0,T]}d_{bL}(\mathcal{L}^\mu_t,\mathcal{L}^\nu_t)\leq C\sqrt{T}\sup_{t\in[0,T]}d_{bL}(\mu_t,\nu_t).
$$
which proves contraction for small valus of $T$.
If we consider the case with coefficients globally Lipschitz in $y$ but with $b$ possibly unbounded we have
\begin{align*}
W^{(1)}(\mathcal{L}^\mu_t,\mathcal{L}^\nu_t)&=\sup_{||f||_{Lip}\leq 1}|I(f)|\leq\\
&\stackrel{Assumption\ 1}{\leq} C\sqrt{t} \sup_{s,x} \left(|B^\mu(s,x)-B^\nu(s,x)|+|c^\mu(s,x)-c^\nu(s,x)|\right)
\intertext{using the estimates on $B$ and $c$ as above but with the Wasserstein distance since now $b$ is not bounded}
&\leq C\sqrt{t}\sup_{s\in[0,t]}W^{(1)}(\mu_s,\nu_s)
\end{align*}
and thus
$$
\sup_{t\in[0,T]}W^{(1)}(\mathcal{L}^\mu_t,\mathcal{L}^\nu_t)\leq C\sqrt{T}\sup_{t\in[0,T]}W^{(1)}(\mu_t,\nu_t).
$$
which proves contraction for small valus of $T$.}
\end{comment}
\begin{remark}
This approach of having the $\sup$ in the distance over the space of functions of the same regularity of the coefficients of the SDE seems quite natural ($b$ and $f$ in the same bounded space). It doesn't seem impossible to use these types of techniques for even broader classes of coefficients as long as there are gaussian estimates.
\end{remark}
%At this point we can try to bound $I(f)$:
%\begin{equation}\label{e6}
%\mu_0(g)=\int g(y)\mu_0(dy)\leq ||g||_{L^1(\mu_0)}.
%\end{equation}
%By Gaussian estimates we can observe that the backward propagation operator $V^{0,s}$ is a bounded operator\footnote{Here there is an error in the last equality since $\mu_0$ is not translation invariant. We could try to work with the $W^{(2)}$ distance or make this last inequality work.} in $L^1(\mu_0)$:
%\begin{align}\label{e7}
%||V^{0,s}_\mu g||_{L^1(\mu_0)}\leq \int\left|\int p^\mu(0,x;s,y)g(y)dy\right|\mu_0(dx)\leq C\int\int |g(z+x)|\Gamma^+(|s|,z)dz\mu_0(dx)=C||g||_{L^1(\mu_0)}.
%\end{align}
%Similarly to \cite{Kolokoltsov}, but without using the adjoint operator so without the need to ask for regularity of the coefficients, we can show the following bound:
%\begin{align}\nonumber
%||(\mathcal{A}^\mu_s-\mathcal{A}^\nu_s)g||_{L^1(\mu_0)}&=||(B^\mu_s-B^\nu_s)\bigtriangledown g +\frac{1}{2}\left\langle (c^\mu_s-c^\nu_s)\bigtriangledown, \bigtriangledown\right\rangle g||_{L^1(\mu_0)}\\ \label{e8}
%&\leq \sup_{s,x}\left( |B^\mu_s(x)-B^\nu_s(x)| + |c^\mu_s(x)-c^\nu_s(x)| \right)||g||_{W^{2,1}(\mu_0)}.
%\end{align}
%The following inequality still needs to be properly proved but is stated as (13) in \cite{Kolokoltsov} and seems reasonable
%\begin{equation}\label{e9}
%||V_\nu^{s,t}f||_{W^{2,1}(\mu_0)}\leq Cs^{-\frac{1}{2}}||f||_{W^{1,1}(\mu_0)}.
%\end{equation}
%Now we employ a couple of observations. Firstly $I(f)$ is invariant with respect to translations: $I(f+c)=I(f),\ \forall c\in\R$; for this reason without loss of generality $f(0)=0$ and thus
%\begin{align*}
%||f||_{L^1(\mu_0)}&=\int|f(x)|\mu_0(dx)\leq \int|f(x)-f(0)|\mu_0(dx) + \int|f(0)|\mu_0(dx)\leq ||f||_{Lip}\int |x|\mu_0(dx) = C,\\
%||\bigtriangledown f||_{L^1(\mu_0)}&\leq \int ||f||_{Lip}\mu_0(dx) = 1. 
%\end{align*}
%Thus putting everything together in \eqref{e6}-\eqref{e9} and noticing that the estimate is uniform for any $f$ with Lipschitz constant bounded by $1$ we obtain
%$$
%W^{1}(\mathcal{L}^\mu_t,\mathcal{L}^\nu_t)\leq \int_0^t C\sup\left( |B^\mu-B^\nu| +  |a^\mu-a^\nu| \right)s^{-\frac{1}{2}}ds = Ct^{\frac{1}{2}}\sup\left( |B^\mu-B^\nu| +  |c^\mu-c^\nu| \right).
%$$
%Now, if we are able to prove an inequality in the form $\sup\left( |B^\mu-B^\nu| +  |c^\mu-c^\nu|\right)\leq \sup_t W^{(1)}(\mu_t,\nu_t)$ we have a contraction. If the coefficients are Lipschitz in the third variable this is mostly trivial
%\begin{gather*}
%\sup\left( |B^\mu-B^\nu|\right)\leq \sup_{t,x}\left(\left|\int b(t,x,y_1)\mu_t(dy_1)-\int b(t,x,y_2)\nu_t(dy_2)\right| \right)\leq L\sup_{t,x}\left( \int|y_1-y_2|\gamma(dy_1,dy_2) \right)\\
%\stackrel{\inf}{\Rightarrow} \sup\left( |B^\mu-B^\nu|\right)\leq L\sup_{t,x}W^{(1)}(\mu_t,\nu_t),
%\end{gather*}
%this should also work for $c$ but the matrix form is a bit more challenging. If everything goes according to plan we should achieve that for small $T$ we have the contraction
%$$
%\sup_tW^{(1)}(\mathcal{L}^\mu_t,\mathcal{L}^\nu_t)\leq C\sqrt{T} \sup_tW^{(1)}(\mu_t,\nu_t).
%$$


%\begin{small}
%\red{
%Let's concentrate for a moment on $B$:
%\begin{align*}
%|B^\mu(t,x)-B^\nu(t,x)|&=|B(t,x,\mu_t)-B(t,x,\nu_t)|=\left|\int b(t,x,y)\mu_t(dy)-\int b(t,x,z)\nu_t(dz)\right|
%\intertext{Given $\gamma$ a law of marginals $\mu_t$ and $\nu_t$}
%&\leq \left|\int b(t,x,y)-b(t,x,z)\gamma(dy,dz)\right|
%\stackrel{\alpha-Hold}{\leq} ||b||_{C^{0,\alpha}}\int |y-z|^\alpha\gamma(dy,dz)
%\intertext{This is uniform over $\gamma$, by passing to the inf we arrive to the Wasserstein distance}
%|B^\mu(t,x)-B^\nu(t,x)|&\leq ||b||_{C^{0,\alpha}} W^{(\alpha)}(\mu_t,\nu_t)^\alpha\stackrel{Jensen}{\leq} ||b||_{C^{0,\alpha}}W^{(2)}(\mu_t,\nu_t)^\alpha.
%\end{align*}
%Probably something similar may be proven for $c$, effectively $c=\Sigma\Sigma^*$ and $\Sigma$ is $\alpha$-Holderian wrt the Wasserstein distance.\\
%This should almost prove what we need, we just need an inequality to bound\footnote{to accomplish such a feat the "$\inf$" part of the Wasserstein is fundamental since if for simplicity we consider the product of $Unif_{[0,1]}$ with itself it is very far from $0$ while the Wasserstein distance between a law and itself is $0$. More heuristically with the $\inf$ the Wasserstein considers heavily correlated random variables pair which drastically lower the value of the distance.} $W^{(2)}(\mathcal{L}^\mu_t,\mathcal{L}^\nu_t)$ with $||u^\mu_t-u^\nu_t||_{L^1}$. Alternatively we could bound $\sup_tW^{(2)}(\mu_t,\nu_t)$ with the $L^1$ norm of the difference of the densities associated with $\mu_t$ and $\nu_t$. Depending on the case the contraction is applied on $C([0,T]\times\mathcal{P}^2(\R^n))$ or in $C([0,T]\times L^1(\R^N))$.\\
%A little observation with the $W^{(1)}$ distance: due to Kantarovich's duality theorem we have (\cite{Villani} remark 6.5)
%\begin{align*}
%W^{(1)}(\mu,\nu)&=\sup_{||f||_{Lip}\leq1}\left(\int f\mu-\int f\nu\right)\stackrel{AC}{=}\sup_{||f||_{Lip}\leq1}\left(\int f(x)(\gamma_\mu(x)-\gamma_\nu(x))dx\right)\\
%&\stackrel{Lipschitz}{\leq}\int|x-x_0|\cdot|\gamma_\mu(x)-\gamma_\nu(x)|dx
%\end{align*}
%In some cases the inequality seems an equality. For simplicity let's consider the $1$-d case and define $\beta(dt)=|\gamma_\mu(t)-\gamma_\nu(t)|dt$:
%\begin{align*}
%W^{(1)}(\mu,\nu)&\leq\int_{-\infty}^{+\infty}|t|\beta(dt)=\int_0^{+\infty}t\beta(dt)-\int_{-\infty}^0t\beta(dt)\\
%&=\int_0^{+\infty}\int_0^tdx\beta(dt)+\int_{-\infty}^0\int_t^0dx\beta(dt)=\int_0^{+\infty}\beta([x,+\infty[)dx+\int_{-\infty}^0\beta(]-\infty,x])dx\\
%&=\int_0^{+\infty}\beta(]-\infty,-x]\cup[x,+\infty[)dx=\int_0^{+\infty}\int_{\R\setminus[-x,x]}|\gamma_\mu(t)-\gamma_\nu(t)|dtdx.
%\end{align*}
%Unfortunately here there is no space for bounding the Wasserstein distance with the $L^1$ distance: if we consider the densities uniform in $[n,n+1]$ and $[n+1/2,n+3/2]$ we get that the constant $C$ in $W^{(1)}(\mu,\nu)\leq C||\gamma_\mu-\gamma_\nu||_{L^1}$ needs to be arbitrarily big. The following theorem should shelter us from this type of problems.}
%\end{small}

There is the property of tightness for the family of measures that are solution of an SDE with $\alpha$-Holder coefficients and with initial law with finite $p$-moment:
\begin{theorem}\label{t1}
Let $\mu_0\in\mathcal{P}^p(\R^N)$. Let $p(s,x;t,y)$ be a fundamental solution of a forward Kolmogorov equation with $\alpha$-Holderian coefficients so that Gaussian estimates exist. Then for any $\epsilon>0$ there exists $K>0$ such that
$$
\int_{B_K^c}u_t(x)dx<\epsilon,\qquad \int_{B_K^c}|x|^pu_t(x)dx<\epsilon.
$$
Where $u_t(x)=\int p(0,y;t,x)\mu_0(dy)$.
\end{theorem}
\begin{proof}
The proof is a little variation of (3.2) in \cite{Kolokoltsov}; indeed the first inequality is proved there. Fix $\epsilon>0$. Let $\tilde{\epsilon}>0$ that we will fix later. Since $\mu_0$ is a measure with finite $p$-moment there exists $K>0$ such that
$$
\mu_0(B_K^c)<\tilde{\epsilon},\qquad \int_{B_K^c}|x|^p\mu_0(dx)<\tilde{\epsilon}.
$$
Let $\tilde{K}>0$ that we will fix later.
\begin{align*}
\int_{|x|\geq K+\tilde{K}}|x|^pu_t(x)dx&\stackrel{Gaussian\ estimates}{\leq} C\int_{|x|\geq K+\tilde{K}}|x|^p\int\Gamma^+(x-\xi,t)\mu_0(d\xi)dx\\
&\leq C\int_{|\xi|\geq K,y\in\R^N}|y+\xi|^p\Gamma^+(y,t)\mu_0(d\xi)dy + C\int_{|\xi|\leq K,y\geq\tilde{K}}|y+\xi|^p\Gamma^+(y,t)\mu_0(d\xi)dy\\
&\leq C_p\mu_0(B_K^c)\int |y|^p\Gamma^+(y,t)dy+C_p\int_{B_K^c}|\xi|^p\mu_0(d\xi)\\
&\qquad+C_p\mu_0(B_k)\int_{B_{\tilde{K}}^c}|y|^p\Gamma^+(y,t)dy + C_p\int|\xi|^p\mu_0(d\xi)\int_{B_{\tilde{K}}^c}\Gamma^+(y,t)dy
\intertext{The first two terms get bounded by the preliminary inequalities and the fact that the Gaussian has finite $p$ moment. The last two terms get bounded by a constant $\tilde{C}_{p,\tilde{K},T}$ that goes to $0$ as $\tilde{K}$ goes to $+\infty$.}
&\leq C_{T,p}\tilde{\epsilon} + C_p\tilde{\epsilon} + C_p\tilde{C}_{p,\tilde{K},T} + C_{p,\mu_0}\tilde{C}_{p,\tilde{K},T}.
\end{align*}
if we choose $\tilde{\epsilon}$ small enough and $\tilde{K}$ big enough the final result will be smaller than $\epsilon$. We must also notice that all the estimates and the constant do not depend directly on $u_t(x)$ but on the Gaussian estimates and so they hold uniformly for the whole family of solutions. 
\end{proof}
We may notice that the family of the marginals is bounded in $\mathcal{P}^p$ with the Wasserstein metric.
\begin{theorem}
Let $(\mu_i)_{i\in\mathcal{I}}$ be the family of the marginals of solutions to SDEs with the same initial datum and $\alpha$-Holderian coefficients. (Written like this is not very rigorous but for example given $(\mu_t)_{t\in[0,T]}$ the flow of marginals of a solution to an SDE as in the hypothesis we have that $\mu_t$ is an element of the family for every $t\in[0,T]$).

Then the family is bounded as a subset of $\mathcal{P}^p(\R^N)$ equipped with the Wasserstein metric .
\end{theorem}
\begin{proof}
Let $\mu_1$ and $\mu_2$ be elements of the family. Fix $\epsilon>0$. By theorem \ref{t1} we know that exists $K>0$ such that 
$$
\int_{B^c_K}\mu_i(dx)<\epsilon,\qquad \int_{B^c_K}|x|^p\mu_i(dx)<\epsilon,\qquad i=1,2.
$$
In particular given a measure $\gamma$ on $\R^{2N}$ with marginals $\mu_1$ and $\mu_2$ we have that exists $\tilde{K}$ (uniformly in $\gamma$) such that
\begin{equation*}
\int_{B^c_{\tilde{K}}}\gamma(dy,dz)\leq \int\int_{B^c_{K}\times B^c_{K}}\gamma(dy,dz)\leq \int\int_{B^c_{K}\times \R^N}\gamma(dy,dz)=\int_{B^c_K}\mu_1(dy)<\epsilon.
\end{equation*}
This also works for the $p$-moment and we get
\begin{equation*}
\int_{B^c_{\tilde{K}}}|y|^p\gamma(dy,dz)\leq \int\int_{B^c_{K}\times B^c_{K}}|y|^p\gamma(dy,dz)\leq \int\int_{B^c_{K}\times \R^N}|y|^p\gamma(dy,dz)=\int_{B^c_K}|y|^p\mu_1(dy)<\epsilon.
\end{equation*}
This means that we can bound the Wasserstein distance between the two in the following way:
\begin{align*}
W^{(p)}(\mu_1,\mu_2)^p&=\inf_{\gamma}\int\int(y-z)^p\gamma(dy,dz)\leq\inf_\gamma \int_{B^c_{\tilde{K}}}(y-z)^p\gamma(dy,dz) + \int_{B_{\tilde{K}}}(y-z)^p\gamma(dy,dz)\\
&\leq C_p\left(\int_{B^c_{\tilde{K}}}|y|^p\gamma(dy,dz) + \int_{B^c_{\tilde{K}}}|z|^p\gamma(dy,dz)\right) + \int_{B_{\tilde{K}}}diam(B_{\tilde{K}})^p\gamma(dy,dz)\\
&\leq 2C_p\epsilon + diam(B_{\tilde{K}})^p.
\end{align*}
\end{proof}
The preceding theorem in particular proves that the $p$-moments are uniformly bounded, for this reason the following theorem is valid in our case.
\begin{theorem}
Let $(\mu_i)_{i\in\mathcal{I}}\subset\mathcal{P}^p(\R^N)$ be a family of probability measures with tightness property as of Theorem \ref{t1} and such that the $p$-moments are uniformly bounded. Then for any sequence $(\mu_n)_{n\in\N}$ there exists a subsequence $(\mu_{n_m})_{m\in\N}$ and a measure $\mu\in\mathcal{P}^p(\R^N)$ such that $$\mu_{n_m}\stackrel{Wasserstein}{\rightarrow}\mu.$$
\end{theorem}
\begin{proof}
By theorem (6.9) of \cite{Villani} we have that Wasserstein convergence in $\mathcal{P}^p$ is equivalent to weak convergence and convergence of the $p$-moment. We know that
$$
\mu_i(B_K^c)<\epsilon,\qquad \int_{B_K^c}|x|^p\mu_i(dx)<\epsilon.
$$
Thus if we define $P_i(dx)=|x|^p\mu_i(dx)$ we have that $(P_i)_{i\in\mathcal{I}}$ is a tight family of uniformly finite measures, in particular we may use a generalization of Prokhorov's theorem (Theorem 8.6.2 of \cite{Bogachev}) and get that for any sequence there exists a subsequence $P_{n_m}$ that converges weakly to a finite measure $P$. 
\begin{equation}\label{e10}
P_{n_m}(\phi)\rightarrow P(\phi),\qquad\forall \phi\in C_0^{\infty}(\R^N).
\end{equation}
By using the same theorem on $\mu_{n_m}$ we can find a new subsequence (that will still be witten as $\mu_{n_m}$) such that $\mu_{n_m}$ converges weakly to $\mu$.
\begin{equation}\label{e11}
\mu_{n_m}(\phi)\rightarrow \mu(\phi),\qquad\forall \phi\in C_0^{\infty}(\R^N).
\end{equation}
Consider now $\phi\in C_0^{\infty}(\R^N)$.
\begin{align*}
&\int |x|^p\phi(x)\mu_{n_m}(dx)\stackrel{\eqref{e11}}{\rightarrow} \int |x|^p\phi(x)\mu(dx)\\
&=P_{n_m}(\phi)\stackrel{\eqref{e10}}{\rightarrow} P(\phi).
\end{align*}
By uniqueness of the limit we have for any $\phi\in C_0^{\infty}$ that $P(\phi)=\int|x|^p\phi(x)\mu(dx)$. This proves convergence of the $p$-moment and thus with weak convergence we have Wasserstein convergence.
\end{proof}
%Most probably some of these $L^p$ spaces will need to be changed, for example it could be helpful to only work with $W^{(1)}$ and $L^1$ to follow more closely Kolokoltsov.
\begin{thebibliography}{90}
\bibitem{Bogachev} Vladimir I. Bogachev - Measure Theory (2007)
\bibitem{Kolokoltsov} Vassili N. Kolokoltsov -  Nonlinear Diffusions and Stable-Like Processes
with Coefficients Depending on the Median or VaR (2013)
\bibitem{LucePagliaPascu} G. Lucertini, A. Pagliarani, A. Pascucci - Optimal regularity for degenerate Kolmogorov equations in non-divergence form with rough-in-time coefficients (2024), https://doi.org/10.1007/s00028-023-00916-9
\bibitem{YAOZHONG} Yaozhong Hu, Michael A. Kouritzin, Jiayu Zheng -  Nonlinear McKean-Vlasov diffusions under the weak Hormander condition with quantile-dependent coefficients (2021), https://arxiv.org/abs/2101.04080
\bibitem{Villani} Cédric Villani - Optimal Transport Old and New (2009)
\end{thebibliography}
\end{document}