\(
\newcommand{\A} {\mathbb{A}}
\newcommand{\rma} {\mathrm{a}}
\newcommand{\rmA} {\mathrm{A}}
\newcommand{\ba} {\mathbf{a}}
\newcommand{\bA} {\mathbf{A}}
\newcommand{\cA} {\mathcal{A}}
\newcommand{\bcA} {\boldsymbol{\cA}}
\newcommand{\tnsrA} {\underline{\bA}}
\newcommand{\wta} {\widetilde{a}}
\newcommand{\wtA} {\widetilde{A}}
\newcommand{\wtrma} {\widetilde{\rma}}
\newcommand{\wtrmA} {\widetilde{\rmA}}
\newcommand{\wtba} {\widetilde{\ba}}
\newcommand{\wtbA} {\widetilde{\bA}}
\newcommand{\wha} {\widehat{a}}
\newcommand{\whA} {\widehat{A}}
\newcommand{\whrma} {\widehat{\rma}}
\newcommand{\whrmA} {\widehat{\rmA}}
\newcommand{\whba} {\widehat{\ba}}
\newcommand{\whbA} {\widehat{\bA}}
\newcommand{\whcA} {\widehat{\cA}}
\newcommand{\whtnsrA} {\widehat{\tnsrA}}
\newcommand{\B} {\mathbb{B}}
\newcommand{\rmb} {\mathrm{b}}
\newcommand{\rmB} {\mathrm{B}}
\newcommand{\bb} {\mathbf{b}}
\newcommand{\bB} {\mathbf{B}}
\newcommand{\cB} {\mathcal{B}}
\newcommand{\bcB} {\boldsymbol{\cB}}
\newcommand{\tnsrB} {\underline{\bB}}
\newcommand{\wtb} {\widetilde{b}}
\newcommand{\wtB} {\widetilde{B}}
\newcommand{\wtrmb} {\widetilde{\rmb}}
\newcommand{\wtrmB} {\widetilde{\rmB}}
\newcommand{\wtbb} {\widetilde{\bb}}
\newcommand{\wtbB} {\widetilde{\bB}}
\newcommand{\whb} {\widehat{b}}
\newcommand{\whB} {\widehat{B}}
\newcommand{\whrmb} {\widehat{\rmb}}
\newcommand{\whrmB} {\widehat{\rmB}}
\newcommand{\whbb} {\widehat{\bb}}
\newcommand{\whbB} {\widehat{\bB}}
\newcommand{\whcB} {\widehat{\cB}}
\newcommand{\whtnsrB} {\widehat{\tnsrB}}
\newcommand{\C} {\mathbb{C}}
\newcommand{\rmc} {\mathrm{c}}
\newcommand{\rmC} {\mathrm{C}}
\newcommand{\bc} {\mathbf{c}}
\newcommand{\bC} {\mathbf{C}}
\newcommand{\cC} {\mathcal{C}}
\newcommand{\bcC} {\boldsymbol{\cC}}
\newcommand{\tnsrC} {\underline{\bC}}
\newcommand{\wtc} {\widetilde{c}}
\newcommand{\wtC} {\widetilde{C}}
\newcommand{\wtrmc} {\widetilde{\rmc}}
\newcommand{\wtrmC} {\widetilde{\rmC}}
\newcommand{\wtbc} {\widetilde{\bc}}
\newcommand{\wtbC} {\widetilde{\bC}}
\newcommand{\whc} {\widehat{c}}
\newcommand{\whC} {\widehat{C}}
\newcommand{\whrmc} {\widehat{\rmc}}
\newcommand{\whrmC} {\widehat{\rmC}}
\newcommand{\whbc} {\widehat{\bc}}
\newcommand{\whbC} {\widehat{\bC}}
\newcommand{\whcC} {\widehat{\cC}}
\newcommand{\whtnsrC} {\widehat{\tnsrC}}
\newcommand{\D} {\mathbb{D}}
\newcommand{\rmd} {\mathrm{d}}
\newcommand{\rmD} {\mathrm{D}}
\newcommand{\bd} {\mathbf{d}}
\newcommand{\bD} {\mathbf{D}}
\newcommand{\cD} {\mathcal{D}}
\newcommand{\bcD} {\boldsymbol{\cD}}
\newcommand{\tnsrD} {\underline{\bD}}
\newcommand{\wtd} {\widetilde{d}}
\newcommand{\wtD} {\widetilde{D}}
\newcommand{\wtrmd} {\widetilde{\rmd}}
\newcommand{\wtrmD} {\widetilde{\rmD}}
\newcommand{\wtbd} {\widetilde{\bd}}
\newcommand{\wtbD} {\widetilde{\bD}}
\newcommand{\whd} {\widehat{d}}
\newcommand{\whD} {\widehat{D}}
\newcommand{\whrmd} {\widehat{\rmd}}
\newcommand{\whrmD} {\widehat{\rmD}}
\newcommand{\whbd} {\widehat{\bd}}
\newcommand{\whbD} {\widehat{\bD}}
\newcommand{\whcD} {\widehat{\cD}}
\newcommand{\whtnsrD} {\widehat{\tnsrD}}
\newcommand{\E} {\mathbb{E}}
\newcommand{\rme} {\mathrm{e}}
\newcommand{\rmE} {\mathrm{E}}
\newcommand{\be} {\mathbf{e}}
\newcommand{\bE} {\mathbf{E}}
\newcommand{\cE} {\mathcal{E}}
\newcommand{\bcE} {\boldsymbol{\cE}}
\newcommand{\tnsrE} {\underline{\bE}}
\newcommand{\wte} {\widetilde{e}}
\newcommand{\wtE} {\widetilde{E}}
\newcommand{\wtrme} {\widetilde{\rme}}
\newcommand{\wtrmE} {\widetilde{\rmE}}
\newcommand{\wtbe} {\widetilde{\be}}
\newcommand{\wtbE} {\widetilde{\bE}}
\newcommand{\whe} {\widehat{e}}
\newcommand{\whE} {\widehat{E}}
\newcommand{\whrme} {\widehat{\rme}}
\newcommand{\whrmE} {\widehat{\rmE}}
\newcommand{\whbe} {\widehat{\be}}
\newcommand{\whbE} {\widehat{\bE}}
\newcommand{\whcE} {\widehat{\cE}}
\newcommand{\whtnsrE} {\widehat{\tnsrE}}
\newcommand{\F} {\mathbb{F}}
\newcommand{\rmf} {\mathrm{f}}
\newcommand{\rmF} {\mathrm{F}}
\newcommand{\bff} {\mathbf{f}}
\newcommand{\bF} {\mathbf{F}}
\newcommand{\cF} {\mathcal{F}}
\newcommand{\bcF} {\boldsymbol{\cF}}
\newcommand{\tnsrF} {\underline{\bF}}
\newcommand{\wtf} {\widetilde{f}}
\newcommand{\wtF} {\widetilde{F}}
\newcommand{\wtrmf} {\widetilde{\rmf}}
\newcommand{\wtrmF} {\widetilde{\rmF}}
\newcommand{\wtbf} {\widetilde{\bf}}
\newcommand{\wtbF} {\widetilde{\bF}}
\newcommand{\whf} {\widehat{f}}
\newcommand{\whF} {\widehat{F}}
\newcommand{\whrmf} {\widehat{\rmf}}
\newcommand{\whrmF} {\widehat{\rmF}}
\newcommand{\whbf} {\widehat{\bf}}
\newcommand{\whbF} {\widehat{\bF}}
\newcommand{\whcF} {\widehat{\cF}}
\newcommand{\whtnsrF} {\widehat{\tnsrF}}
\newcommand{\G} {\mathbb{G}}
\newcommand{\rmg} {\mathrm{g}}
\newcommand{\rmG} {\mathrm{G}}
\newcommand{\bg} {\mathbf{g}}
\newcommand{\bG} {\mathbf{G}}
\newcommand{\cG} {\mathcal{G}}
\newcommand{\bcG} {\boldsymbol{\cG}}
\newcommand{\tnsrG} {\underline{\bG}}
\newcommand{\wtg} {\widetilde{g}}
\newcommand{\wtG} {\widetilde{G}}
\newcommand{\wtrmg} {\widetilde{\rmg}}
\newcommand{\wtrmG} {\widetilde{\rmG}}
\newcommand{\wtbg} {\widetilde{\bg}}
\newcommand{\wtbG} {\widetilde{\bG}}
\newcommand{\whg} {\widehat{g}}
\newcommand{\whG} {\widehat{G}}
\newcommand{\whrmg} {\widehat{\rmg}}
\newcommand{\whrmG} {\widehat{\rmG}}
\newcommand{\whbg} {\widehat{\bg}}
\newcommand{\whbG} {\widehat{\bG}}
\newcommand{\whcG} {\widehat{\cG}}
\newcommand{\whtnsrG} {\widehat{\tnsrG}}
\newcommand{\bbH} {\mathbb{H}}
\newcommand{\rmh} {\mathrm{h}}
\newcommand{\rmH} {\mathrm{H}}
\newcommand{\bh} {\mathbf{h}}
\newcommand{\bH} {\mathbf{H}}
\newcommand{\cH} {\mathcal{H}}
\newcommand{\bcH} {\boldsymbol{\cH}}
\newcommand{\tnsrH} {\underline{\bH}}
\newcommand{\wth} {\widetilde{h}}
\newcommand{\wtH} {\widetilde{H}}
\newcommand{\wtrmh} {\widetilde{\rmh}}
\newcommand{\wtrmH} {\widetilde{\rmH}}
\newcommand{\wtbh} {\widetilde{\bh}}
\newcommand{\wtbH} {\widetilde{\bH}}
\newcommand{\whh} {\widehat{h}}
\newcommand{\whH} {\widehat{H}}
\newcommand{\whrmh} {\widehat{\rmh}}
\newcommand{\whrmH} {\widehat{\rmH}}
\newcommand{\whbh} {\widehat{\bh}}
\newcommand{\whbH} {\widehat{\bH}}
\newcommand{\whcH} {\widehat{\cH}}
\newcommand{\whtnsrH} {\widehat{\tnsrH}}
\newcommand{\I} {\mathbb{I}}
\newcommand{\rmi} {\mathrm{i}}
\newcommand{\rmI} {\mathrm{I}}
\newcommand{\bi} {\mathbf{i}}
\newcommand{\bI} {\mathbf{I}}
\newcommand{\cI} {\mathcal{I}}
\newcommand{\bcI} {\boldsymbol{\cI}}
\newcommand{\tnsrI} {\underline{\bI}}
\newcommand{\wti} {\widetilde{i}}
\newcommand{\wtI} {\widetilde{I}}
\newcommand{\wtrmi} {\widetilde{\rmi}}
\newcommand{\wtrmI} {\widetilde{\rmI}}
\newcommand{\wtbi} {\widetilde{\bi}}
\newcommand{\wtbI} {\widetilde{\bI}}
\newcommand{\whi} {\widehat{i}}
\newcommand{\whI} {\widehat{I}}
\newcommand{\whrmi} {\widehat{\rmi}}
\newcommand{\whrmI} {\widehat{\rmI}}
\newcommand{\whbi} {\widehat{\bi}}
\newcommand{\whbI} {\widehat{\bI}}
\newcommand{\whcI} {\widehat{\cI}}
\newcommand{\whtnsrI} {\widehat{\tnsrI}}
\newcommand{\J} {\mathbb{J}}
\newcommand{\rmj} {\mathrm{j}}
\newcommand{\rmJ} {\mathrm{J}}
\newcommand{\bj} {\mathbf{j}}
\newcommand{\bJ} {\mathbf{J}}
\newcommand{\cJ} {\mathcal{J}}
\newcommand{\bcJ} {\boldsymbol{\cJ}}
\newcommand{\tnsrJ} {\underline{\bJ}}
\newcommand{\wtj} {\widetilde{j}}
\newcommand{\wtJ} {\widetilde{J}}
\newcommand{\wtrmj} {\widetilde{\rmj}}
\newcommand{\wtrmJ} {\widetilde{\rmJ}}
\newcommand{\wtbj} {\widetilde{\bj}}
\newcommand{\wtbJ} {\widetilde{\bJ}}
\newcommand{\whj} {\widehat{j}}
\newcommand{\whJ} {\widehat{J}}
\newcommand{\whrmj} {\widehat{\rmj}}
\newcommand{\whrmJ} {\widehat{\rmJ}}
\newcommand{\whbj} {\widehat{\bj}}
\newcommand{\whbJ} {\widehat{\bJ}}
\newcommand{\whcJ} {\widehat{\cJ}}
\newcommand{\whtnsrJ} {\widehat{\tnsrJ}}
\newcommand{\K} {\mathbb{K}}
\newcommand{\rmk} {\mathrm{k}}
\newcommand{\rmK} {\mathrm{K}}
\newcommand{\bk} {\mathbf{k}}
\newcommand{\bK} {\mathbf{K}}
\newcommand{\cK} {\mathcal{K}}
\newcommand{\bcK} {\boldsymbol{\cK}}
\newcommand{\tnsrK} {\underline{\bK}}
\newcommand{\wtk} {\widetilde{k}}
\newcommand{\wtK} {\widetilde{K}}
\newcommand{\wtrmk} {\widetilde{\rmk}}
\newcommand{\wtrmK} {\widetilde{\rmK}}
\newcommand{\wtbk} {\widetilde{\bk}}
\newcommand{\wtbK} {\widetilde{\bK}}
\newcommand{\whk} {\widehat{k}}
\newcommand{\whK} {\widehat{K}}
\newcommand{\whrmk} {\widehat{\rmk}}
\newcommand{\whrmK} {\widehat{\rmK}}
\newcommand{\whbk} {\widehat{\bk}}
\newcommand{\whbK} {\widehat{\bK}}
\newcommand{\whcK} {\widehat{\cK}}
\newcommand{\whtnsrK} {\widehat{\tnsrK}}
\newcommand{\bbL} {\mathbb{L}}
\newcommand{\rml} {\mathrm{l}}
\newcommand{\rmL} {\mathrm{L}}
\newcommand{\bl} {\mathbf{l}}
\newcommand{\bL} {\mathbf{L}}
\newcommand{\cL} {\mathcal{L}}
\newcommand{\bcL} {\boldsymbol{\cL}}
\newcommand{\tnsrL} {\underline{\bL}}
\newcommand{\wtl} {\widetilde{l}}
\newcommand{\wtL} {\widetilde{L}}
\newcommand{\wtrml} {\widetilde{\rml}}
\newcommand{\wtrmL} {\widetilde{\rmL}}
\newcommand{\wtbl} {\widetilde{\bl}}
\newcommand{\wtbL} {\widetilde{\bL}}
\newcommand{\whl} {\widehat{l}}
\newcommand{\whL} {\widehat{L}}
\newcommand{\whrml} {\widehat{\rml}}
\newcommand{\whrmL} {\widehat{\rmL}}
\newcommand{\whbl} {\widehat{\bl}}
\newcommand{\whbL} {\widehat{\bL}}
\newcommand{\whcL} {\widehat{\cL}}
\newcommand{\whtnsrL} {\widehat{\tnsrL}}
\newcommand{\M} {\mathbb{M}}
\newcommand{\rmm} {\mathrm{m}}
\newcommand{\rmM} {\mathrm{M}}
\newcommand{\bm} {\mathbf{m}}
\newcommand{\bM} {\mathbf{M}}
\newcommand{\cM} {\mathcal{M}}
\newcommand{\bcM} {\boldsymbol{\cM}}
\newcommand{\tnsrM} {\underline{\bM}}
\newcommand{\wtm} {\widetilde{m}}
\newcommand{\wtM} {\widetilde{M}}
\newcommand{\wtrmm} {\widetilde{\rmm}}
\newcommand{\wtrmM} {\widetilde{\rmM}}
\newcommand{\wtbm} {\widetilde{\bm}}
\newcommand{\wtbM} {\widetilde{\bM}}
\newcommand{\whm} {\widehat{m}}
\newcommand{\whM} {\widehat{M}}
\newcommand{\whrmm} {\widehat{\rmm}}
\newcommand{\whrmM} {\widehat{\rmM}}
\newcommand{\whbm} {\widehat{\bm}}
\newcommand{\whbM} {\widehat{\bM}}
\newcommand{\whcM} {\widehat{\cM}}
\newcommand{\whtnsrM} {\widehat{\tnsrM}}
\newcommand{\N} {\mathbb{N}}
\newcommand{\rmn} {\mathrm{n}}
\newcommand{\rmN} {\mathrm{N}}
\newcommand{\bn} {\mathbf{n}}
\newcommand{\bN} {\mathbf{N}}
\newcommand{\cN} {\mathcal{N}}
\newcommand{\bcN} {\boldsymbol{\cN}}
\newcommand{\tnsrN} {\underline{\bN}}
\newcommand{\wtn} {\widetilde{n}}
\newcommand{\wtN} {\widetilde{N}}
\newcommand{\wtrmn} {\widetilde{\rmn}}
\newcommand{\wtrmN} {\widetilde{\rmN}}
\newcommand{\wtbn} {\widetilde{\bn}}
\newcommand{\wtbN} {\widetilde{\bN}}
\newcommand{\whn} {\widehat{n}}
\newcommand{\whN} {\widehat{N}}
\newcommand{\whrmn} {\widehat{\rmn}}
\newcommand{\whrmN} {\widehat{\rmN}}
\newcommand{\whbn} {\widehat{\bn}}
\newcommand{\whbN} {\widehat{\bN}}
\newcommand{\whcN} {\widehat{\cN}}
\newcommand{\whtnsrN} {\widehat{\tnsrN}}
\newcommand{\bbO} {\mathbb{O}}
\newcommand{\rmo} {\mathrm{o}}
\newcommand{\rmO} {\mathrm{O}}
\newcommand{\bo} {\mathbf{o}}
\newcommand{\bO} {\mathbf{O}}
\newcommand{\cO} {\mathcal{O}}
\newcommand{\bcO} {\boldsymbol{\cO}}
\newcommand{\tnsrO} {\underline{\bO}}
\newcommand{\wto} {\widetilde{o}}
\newcommand{\wtO} {\widetilde{O}}
\newcommand{\wtrmo} {\widetilde{\rmo}}
\newcommand{\wtrmO} {\widetilde{\rmO}}
\newcommand{\wtbo} {\widetilde{\bo}}
\newcommand{\wtbO} {\widetilde{\bO}}
\newcommand{\who} {\widehat{o}}
\newcommand{\whO} {\widehat{O}}
\newcommand{\whrmo} {\widehat{\rmo}}
\newcommand{\whrmO} {\widehat{\rmO}}
\newcommand{\whbo} {\widehat{\bo}}
\newcommand{\whbO} {\widehat{\bO}}
\newcommand{\whcO} {\widehat{\cO}}
\newcommand{\whtnsrO} {\widehat{\tnsrO}}
\newcommand{\bbP} {\mathbb{P}}
\newcommand{\rmp} {\mathrm{p}}
\newcommand{\rmP} {\mathrm{P}}
\newcommand{\bp} {\mathbf{p}}
\newcommand{\bP} {\mathbf{P}}
\newcommand{\cP} {\mathcal{P}}
\newcommand{\bcP} {\boldsymbol{\cP}}
\newcommand{\tnsrP} {\underline{\bP}}
\newcommand{\wtp} {\widetilde{p}}
\newcommand{\wtP} {\widetilde{P}}
\newcommand{\wtrmp} {\widetilde{\rmp}}
\newcommand{\wtrmP} {\widetilde{\rmP}}
\newcommand{\wtbp} {\widetilde{\bp}}
\newcommand{\wtbP} {\widetilde{\bP}}
\newcommand{\whp} {\widehat{p}}
\newcommand{\whP} {\widehat{P}}
\newcommand{\whrmp} {\widehat{\rmp}}
\newcommand{\whrmP} {\widehat{\rmP}}
\newcommand{\whbp} {\widehat{\bp}}
\newcommand{\whbP} {\widehat{\bP}}
\newcommand{\whcP} {\widehat{\cP}}
\newcommand{\whtnsrP} {\widehat{\tnsrP}}
\newcommand{\Q} {\mathbb{Q}}
\newcommand{\rmq} {\mathrm{q}}
\newcommand{\rmQ} {\mathrm{Q}}
\newcommand{\bq} {\mathbf{q}}
\newcommand{\bQ} {\mathbf{Q}}
\newcommand{\cQ} {\mathcal{Q}}
\newcommand{\bcQ} {\boldsymbol{\cQ}}
\newcommand{\tnsrQ} {\underline{\bQ}}
\newcommand{\wtq} {\widetilde{q}}
\newcommand{\wtQ} {\widetilde{Q}}
\newcommand{\wtrmq} {\widetilde{\rmq}}
\newcommand{\wtrmQ} {\widetilde{\rmQ}}
\newcommand{\wtbq} {\widetilde{\bq}}
\newcommand{\wtbQ} {\widetilde{\bQ}}
\newcommand{\whq} {\widehat{q}}
\newcommand{\whQ} {\widehat{Q}}
\newcommand{\whrmq} {\widehat{\rmq}}
\newcommand{\whrmQ} {\widehat{\rmQ}}
\newcommand{\whbq} {\widehat{\bq}}
\newcommand{\whbQ} {\widehat{\bQ}}
\newcommand{\whcQ} {\widehat{\cQ}}
\newcommand{\whtnsrQ} {\widehat{\tnsrQ}}
\newcommand{\R} {\mathbb{R}}
\newcommand{\rmr} {\mathrm{r}}
\newcommand{\rmR} {\mathrm{R}}
\newcommand{\br} {\mathbf{r}}
\newcommand{\bR} {\mathbf{R}}
\newcommand{\cR} {\mathcal{R}}
\newcommand{\bcR} {\boldsymbol{\cR}}
\newcommand{\tnsrR} {\underline{\bR}}
\newcommand{\wtr} {\widetilde{r}}
\newcommand{\wtR} {\widetilde{R}}
\newcommand{\wtrmr} {\widetilde{\rmr}}
\newcommand{\wtrmR} {\widetilde{\rmR}}
\newcommand{\wtbr} {\widetilde{\br}}
\newcommand{\wtbR} {\widetilde{\bR}}
\newcommand{\whr} {\widehat{r}}
\newcommand{\whR} {\widehat{R}}
\newcommand{\whrmr} {\widehat{\rmr}}
\newcommand{\whrmR} {\widehat{\rmR}}
\newcommand{\whbr} {\widehat{\br}}
\newcommand{\whbR} {\widehat{\bR}}
\newcommand{\whcR} {\widehat{\cR}}
\newcommand{\whtnsrR} {\widehat{\tnsrR}}
\newcommand{\bbS} {\mathbb{S}}
\newcommand{\rms} {\mathrm{s}}
\newcommand{\rmS} {\mathrm{S}}
\newcommand{\bs} {\mathbf{s}}
\newcommand{\bS} {\mathbf{S}}
\newcommand{\cS} {\mathcal{S}}
\newcommand{\bcS} {\boldsymbol{\cS}}
\newcommand{\tnsrS} {\underline{\bS}}
\newcommand{\wts} {\widetilde{s}}
\newcommand{\wtS} {\widetilde{S}}
\newcommand{\wtrms} {\widetilde{\rms}}
\newcommand{\wtrmS} {\widetilde{\rmS}}
\newcommand{\wtbs} {\widetilde{\bs}}
\newcommand{\wtbS} {\widetilde{\bS}}
\newcommand{\whs} {\widehat{s}}
\newcommand{\whS} {\widehat{S}}
\newcommand{\whrms} {\widehat{\rms}}
\newcommand{\whrmS} {\widehat{\rmS}}
\newcommand{\whbs} {\widehat{\bs}}
\newcommand{\whbS} {\widehat{\bS}}
\newcommand{\whcS} {\widehat{\cS}}
\newcommand{\whtnsrS} {\widehat{\tnsrS}}
\newcommand{\T} {\mathbb{T}}
\newcommand{\rmt} {\mathrm{t}}
\newcommand{\rmT} {\mathrm{T}}
\newcommand{\bt} {\mathbf{t}}
\newcommand{\bT} {\mathbf{T}}
\newcommand{\cT} {\mathcal{T}}
\newcommand{\bcT} {\boldsymbol{\cT}}
\newcommand{\tnsrT} {\underline{\bT}}
\newcommand{\wtt} {\widetilde{t}}
\newcommand{\wtT} {\widetilde{T}}
\newcommand{\wtrmt} {\widetilde{\rmt}}
\newcommand{\wtrmT} {\widetilde{\rmT}}
\newcommand{\wtbt} {\widetilde{\bt}}
\newcommand{\wtbT} {\widetilde{\bT}}
\newcommand{\wht} {\widehat{t}}
\newcommand{\whT} {\widehat{T}}
\newcommand{\whrmt} {\widehat{\rmt}}
\newcommand{\whrmT} {\widehat{\rmT}}
\newcommand{\whbt} {\widehat{\bt}}
\newcommand{\whbT} {\widehat{\bT}}
\newcommand{\whcT} {\widehat{\cT}}
\newcommand{\whtnsrT} {\widehat{\tnsrT}}
\newcommand{\U} {\mathbb{U}}
\newcommand{\rmu} {\mathrm{u}}
\newcommand{\rmU} {\mathrm{U}}
\newcommand{\bu} {\mathbf{u}}
\newcommand{\bU} {\mathbf{U}}
\newcommand{\cU} {\mathcal{U}}
\newcommand{\bcU} {\boldsymbol{\cU}}
\newcommand{\tnsrU} {\underline{\bU}}
\newcommand{\wtu} {\widetilde{u}}
\newcommand{\wtU} {\widetilde{U}}
\newcommand{\wtrmu} {\widetilde{\rmu}}
\newcommand{\wtrmU} {\widetilde{\rmU}}
\newcommand{\wtbu} {\widetilde{\bu}}
\newcommand{\wtbU} {\widetilde{\bU}}
\newcommand{\whu} {\widehat{u}}
\newcommand{\whU} {\widehat{U}}
\newcommand{\whrmu} {\widehat{\rmu}}
\newcommand{\whrmU} {\widehat{\rmU}}
\newcommand{\whbu} {\widehat{\bu}}
\newcommand{\whbU} {\widehat{\bU}}
\newcommand{\whcU} {\widehat{\cU}}
\newcommand{\whtnsrU} {\widehat{\tnsrU}}
\newcommand{\V} {\mathbb{V}}
\newcommand{\rmv} {\mathrm{v}}
\newcommand{\rmV} {\mathrm{V}}
\newcommand{\bv} {\mathbf{v}}
\newcommand{\bV} {\mathbf{V}}
\newcommand{\cV} {\mathcal{V}}
\newcommand{\bcV} {\boldsymbol{\cV}}
\newcommand{\tnsrV} {\underline{\bV}}
\newcommand{\wtv} {\widetilde{v}}
\newcommand{\wtV} {\widetilde{V}}
\newcommand{\wtrmv} {\widetilde{\rmv}}
\newcommand{\wtrmV} {\widetilde{\rmV}}
\newcommand{\wtbv} {\widetilde{\bv}}
\newcommand{\wtbV} {\widetilde{\bV}}
\newcommand{\whv} {\widehat{v}}
\newcommand{\whV} {\widehat{V}}
\newcommand{\whrmv} {\widehat{\rmv}}
\newcommand{\whrmV} {\widehat{\rmV}}
\newcommand{\whbv} {\widehat{\bv}}
\newcommand{\whbV} {\widehat{\bV}}
\newcommand{\whcV} {\widehat{\cV}}
\newcommand{\whtnsrV} {\widehat{\tnsrV}}
\newcommand{\W} {\mathbb{W}}
\newcommand{\rmw} {\mathrm{w}}
\newcommand{\rmW} {\mathrm{W}}
\newcommand{\bw} {\mathbf{w}}
\newcommand{\bW} {\mathbf{W}}
\newcommand{\cW} {\mathcal{W}}
\newcommand{\bcW} {\boldsymbol{\cW}}
\newcommand{\tnsrW} {\underline{\bW}}
\newcommand{\wtw} {\widetilde{w}}
\newcommand{\wtW} {\widetilde{W}}
\newcommand{\wtrmw} {\widetilde{\rmw}}
\newcommand{\wtrmW} {\widetilde{\rmW}}
\newcommand{\wtbw} {\widetilde{\bw}}
\newcommand{\wtbW} {\widetilde{\bW}}
\newcommand{\whw} {\widehat{w}}
\newcommand{\whW} {\widehat{W}}
\newcommand{\whrmw} {\widehat{\rmw}}
\newcommand{\whrmW} {\widehat{\rmW}}
\newcommand{\whbw} {\widehat{\bw}}
\newcommand{\whbW} {\widehat{\bW}}
\newcommand{\whcW} {\widehat{\cW}}
\newcommand{\whtnsrW} {\widehat{\tnsrW}}
\newcommand{\X} {\mathbb{X}}
\newcommand{\rmx} {\mathrm{x}}
\newcommand{\rmX} {\mathrm{X}}
\newcommand{\bx} {\mathbf{x}}
\newcommand{\bX} {\mathbf{X}}
\newcommand{\cX} {\mathcal{X}}
\newcommand{\bcX} {\boldsymbol{\cX}}
\newcommand{\tnsrX} {\underline{\bX}}
\newcommand{\wtx} {\widetilde{x}}
\newcommand{\wtX} {\widetilde{X}}
\newcommand{\wtrmx} {\widetilde{\rmx}}
\newcommand{\wtrmX} {\widetilde{\rmX}}
\newcommand{\wtbx} {\widetilde{\bx}}
\newcommand{\wtbX} {\widetilde{\bX}}
\newcommand{\whx} {\widehat{x}}
\newcommand{\whX} {\widehat{X}}
\newcommand{\whrmx} {\widehat{\rmx}}
\newcommand{\whrmX} {\widehat{\rmX}}
\newcommand{\whbx} {\widehat{\bx}}
\newcommand{\whbX} {\widehat{\bX}}
\newcommand{\whcX} {\widehat{\cX}}
\newcommand{\whtnsrX} {\widehat{\tnsrX}}
\newcommand{\Y} {\mathbb{Y}}
\newcommand{\rmy} {\mathrm{y}}
\newcommand{\rmY} {\mathrm{Y}}
\newcommand{\by} {\mathbf{y}}
\newcommand{\bY} {\mathbf{Y}}
\newcommand{\cY} {\mathcal{Y}}
\newcommand{\bcY} {\boldsymbol{\cY}}
\newcommand{\tnsrY} {\underline{\bY}}
\newcommand{\wty} {\widetilde{y}}
\newcommand{\wtY} {\widetilde{Y}}
\newcommand{\wtrmy} {\widetilde{\rmy}}
\newcommand{\wtrmY} {\widetilde{\rmY}}
\newcommand{\wtby} {\widetilde{\by}}
\newcommand{\wtbY} {\widetilde{\bY}}
\newcommand{\why} {\widehat{y}}
\newcommand{\whY} {\widehat{Y}}
\newcommand{\whrmy} {\widehat{\rmy}}
\newcommand{\whrmY} {\widehat{\rmY}}
\newcommand{\whby} {\widehat{\by}}
\newcommand{\whbY} {\widehat{\bY}}
\newcommand{\whcY} {\widehat{\cY}}
\newcommand{\whtnsrY} {\widehat{\tnsrY}}
\newcommand{\Z} {\mathbb{Z}}
\newcommand{\rmz} {\mathrm{z}}
\newcommand{\rmZ} {\mathrm{Z}}
\newcommand{\bz} {\mathbf{z}}
\newcommand{\bZ} {\mathbf{Z}}
\newcommand{\cZ} {\mathcal{Z}}
\newcommand{\bcZ} {\boldsymbol{\cZ}}
\newcommand{\tnsrZ} {\underline{\bZ}}
\newcommand{\wtz} {\widetilde{z}}
\newcommand{\wtZ} {\widetilde{Z}}
\newcommand{\wtrmz} {\widetilde{\rmz}}
\newcommand{\wtrmZ} {\widetilde{\rmZ}}
\newcommand{\wtbz} {\widetilde{\bz}}
\newcommand{\wtbZ} {\widetilde{\bZ}}
\newcommand{\whz} {\widehat{z}}
\newcommand{\whZ} {\widehat{Z}}
\newcommand{\whrmz} {\widehat{\rmz}}
\newcommand{\whrmZ} {\widehat{\rmZ}}
\newcommand{\whbz} {\widehat{\bz}}
\newcommand{\whbZ} {\widehat{\bZ}}
\newcommand{\whcZ} {\widehat{\cZ}}
\newcommand{\whtnsrZ} {\widehat{\tnsrZ}}
\newcommand{\balpha} {\boldsymbol{\alpha}}
\newcommand{\wtalpha} {\widetilde{\alpha}}
\newcommand{\whalpha} {\widehat{\alpha}}
\newcommand{\wtbalpha} {\widetilde{\balpha}}
\newcommand{\whbalpha} {\widehat{\balpha}}
\newcommand{\bbeta} {\boldsymbol{\beta}}
\newcommand{\wtbeta} {\widetilde{\beta}}
\newcommand{\whbeta} {\widehat{\beta}}
\newcommand{\wtbbeta} {\widetilde{\bbeta}}
\newcommand{\whbbeta} {\widehat{\bbeta}}
\newcommand{\bgamma} {\boldsymbol{\gamma}}
\newcommand{\wtgamma} {\widetilde{\gamma}}
\newcommand{\whgamma} {\widehat{\gamma}}
\newcommand{\wtbgamma} {\widetilde{\bgamma}}
\newcommand{\whbgamma} {\widehat{\bgamma}}
\newcommand{\bdelta} {\boldsymbol{\delta}}
\newcommand{\wtdelta} {\widetilde{\delta}}
\newcommand{\whdelta} {\widehat{\delta}}
\newcommand{\wtbdelta} {\widetilde{\bdelta}}
\newcommand{\whbdelta} {\widehat{\bdelta}}
\newcommand{\bepsilon} {\boldsymbol{\epsilon}}
\newcommand{\wtepsilon} {\widetilde{\epsilon}}
\newcommand{\whepsilon} {\widehat{\epsilon}}
\newcommand{\wtbepsilon}{\widetilde{\bepsilon}}
\newcommand{\whbepsilon}{\widehat{\bepsilon}}
\newcommand{\veps} {\varepsilon}
\newcommand{\bveps} {\boldsymbol{\veps}}
\newcommand{\wtveps} {\widetilde{\veps}}
\newcommand{\whveps} {\widehat{\veps}}
\newcommand{\wtbveps} {\widetilde{\bveps}}
\newcommand{\whbveps} {\widehat{\bveps}}
\newcommand{\bEta} {\boldsymbol{\eta}}
\newcommand{\wteta} {\widetilde{\eta}}
\newcommand{\wheta} {\widehat{\eta}}
\newcommand{\wtbEta} {\widetilde{\bEta}}
\newcommand{\whbEta} {\widehat{\bEta}}
\newcommand{\btheta} {\boldsymbol{\theta}}
\newcommand{\wttheta} {\widetilde{\theta}}
\newcommand{\whtheta} {\widehat{\theta}}
\newcommand{\wtbtheta} {\widetilde{\btheta}}
\newcommand{\whbtheta} {\widehat{\btheta}}
\newcommand{\bvtheta} {\boldsymbol{\vartheta}}
\newcommand{\wtvtheta} {\widetilde{\vartheta}}
\newcommand{\whvtheta} {\widehat{\vartheta}}
\newcommand{\wtbvtheta} {\widetilde{\bvtheta}}
\newcommand{\whbvtheta} {\widehat{\bvtheta}}
\newcommand{\biota} {\boldsymbol{\iota}}
\newcommand{\wtiota} {\widetilde{\iota}}
\newcommand{\whiota} {\widehat{\iota}}
\newcommand{\wtbiota} {\widetilde{\biota}}
\newcommand{\whbiota} {\widehat{\biota}}
\newcommand{\bkappa} {\boldsymbol{\kappa}}
\newcommand{\wtkappa} {\widetilde{\kappa}}
\newcommand{\whkappa} {\widehat{\kappa}}
\newcommand{\wtbkappa} {\widetilde{\bkappa}}
\newcommand{\whbkappa} {\widehat{\bkappa}}
\newcommand{\blambda} {\boldsymbol{\lambda}}
\newcommand{\wtlambda} {\widetilde{\lambda}}
\newcommand{\whlambda} {\widehat{\lambda}}
\newcommand{\wtblambda} {\widetilde{\blambda}}
\newcommand{\whblambda} {\widehat{\blambda}}
\newcommand{\bmu} {\boldsymbol{\mu}}
\newcommand{\wtmu} {\widetilde{\mu}}
\newcommand{\whmu} {\widehat{\mu}}
\newcommand{\wtbmu} {\widetilde{\bmu}}
\newcommand{\whbmu} {\widehat{\bmu}}
\newcommand{\bnu} {\boldsymbol{\nu}}
\newcommand{\wtnu} {\widetilde{\nu}}
\newcommand{\whnu} {\widehat{\nu}}
\newcommand{\wtbnu} {\widetilde{\bnu}}
\newcommand{\whbnu} {\widehat{\bnu}}
\newcommand{\bxi} {\boldsymbol{\xi}}
\newcommand{\wtxi} {\widetilde{\xi}}
\newcommand{\whxi} {\widehat{\xi}}
\newcommand{\wtbxi} {\widetilde{\bxi}}
\newcommand{\whbxi} {\widehat{\bxi}}
\newcommand{\bpi} {\boldsymbol{\pi}}
\newcommand{\wtpi} {\widetilde{\pi}}
\newcommand{\whpi} {\widehat{\pi}}
\newcommand{\wtbpi} {\widetilde{\bpi}}
\newcommand{\whbpi} {\widehat{\bpi}}
\newcommand{\bvpi} {\boldsymbol{\varpi}}
\newcommand{\wtvpi} {\widetilde{\varpi}}
\newcommand{\whvpi} {\widehat{\varpi}}
\newcommand{\wtbvpi} {\widetilde{\bvpi}}
\newcommand{\whbvpi} {\widehat{\bvpi}}
\newcommand{\brho} {\boldsymbol{\rho}}
\newcommand{\wtrho} {\widetilde{\rho}}
\newcommand{\whrho} {\widehat{\rho}}
\newcommand{\wtbrho} {\widetilde{\brho}}
\newcommand{\whbrho} {\widehat{\brho}}
\newcommand{\bvrho} {\boldsymbol{\varrho}}
\newcommand{\wtvrho} {\widetilde{\varrho}}
\newcommand{\whvrho} {\widehat{\varrho}}
\newcommand{\wtbvrho} {\widetilde{\bvrho}}
\newcommand{\whbvrho} {\widehat{\bvrho}}
\newcommand{\bsigma} {\boldsymbol{\sigma}}
\newcommand{\wtsigma} {\widetilde{\sigma}}
\newcommand{\whsigma} {\widehat{\sigma}}
\newcommand{\wtbsigma} {\widetilde{\bsigma}}
\newcommand{\whbsigma} {\widehat{\bsigma}}
\newcommand{\bvsigma} {\boldsymbol{\varsigma}}
\newcommand{\wtvsigma} {\widetilde{\varsigma}}
\newcommand{\whvsigma} {\widehat{\varsigma}}
\newcommand{\wtbvsigma} {\widetilde{\bvsigma}}
\newcommand{\whbvsigma} {\widehat{\bvsigma}}
\newcommand{\btau} {\boldsymbol{\tau}}
\newcommand{\wttau} {\widetilde{\tau}}
\newcommand{\whtau} {\widehat{\tau}}
\newcommand{\wtbtau} {\widetilde{\btau}}
\newcommand{\whbtau} {\widehat{\btau}}
\newcommand{\bupsilon} {\boldsymbol{\upsilon}}
\newcommand{\wtupsilon} {\widetilde{\upsilon}}
\newcommand{\whupsilon} {\widehat{\upsilon}}
\newcommand{\wtbupsilon}{\widetilde{\bupsilon}}
\newcommand{\whbupsilon}{\widehat{\bupsilon}}
\newcommand{\bzeta} {\boldsymbol{\zeta}}
\newcommand{\wtzeta} {\widetilde{\zeta}}
\newcommand{\whzeta} {\widehat{\zeta}}
\newcommand{\wtbzeta}{\widetilde{\bzeta}}
\newcommand{\whbzeta}{\widehat{\bzeta}}
\newcommand{\bphi} {\boldsymbol{\phi}}
\newcommand{\wtphi} {\widetilde{\phi}}
\newcommand{\whphi} {\widehat{\phi}}
\newcommand{\wtbphi} {\widetilde{\bphi}}
\newcommand{\whbphi} {\widehat{\bphi}}
\newcommand{\bvphi} {\boldsymbol{\varphi}}
\newcommand{\wtvphi} {\widetilde{\varphi}}
\newcommand{\whvphi} {\widehat{\varphi}}
\newcommand{\wtbvphi} {\widetilde{\bvphi}}
\newcommand{\whbvphi} {\widehat{\bvphi}}
\newcommand{\bchi} {\boldsymbol{\chi}}
\newcommand{\wtchi} {\widetilde{\chi}}
\newcommand{\whchi} {\widehat{\chi}}
\newcommand{\wtbchi} {\widetilde{\bchi}}
\newcommand{\whbchi} {\widehat{\bchi}}
\newcommand{\bpsi} {\boldsymbol{\psi}}
\newcommand{\wtpsi} {\widetilde{\psi}}
\newcommand{\whpsi} {\widehat{\psi}}
\newcommand{\wtbpsi} {\widetilde{\bpsi}}
\newcommand{\whbpsi} {\widehat{\bpsi}}
\newcommand{\bomega} {\boldsymbol{\omega}}
\newcommand{\wtomega} {\widetilde{\omega}}
\newcommand{\whomega} {\widehat{\omega}}
\newcommand{\wtbomega} {\widetilde{\bomega}}
\newcommand{\whbomega} {\widehat{\bomega}}
\newcommand{\bGamma} {\boldsymbol{\Gamma}}
\newcommand{\wtGamma} {\widetilde{\Gamma}}
\newcommand{\whGamma} {\widehat{\Gamma}}
\newcommand{\wtbGamma} {\widetilde{\bGamma}}
\newcommand{\whbGamma} {\widehat{\bGamma}}
\newcommand{\bDelta} {\boldsymbol{\Delta}}
\newcommand{\wtDelta} {\widetilde{\Delta}}
\newcommand{\whDelta} {\widehat{\Delta}}
\newcommand{\wtbDelta} {\widetilde{\bDelta}}
\newcommand{\whbDelta} {\widehat{\bDelta}}
\newcommand{\bLambda} {\boldsymbol{\Lambda}}
\newcommand{\wtLambda} {\widetilde{\Lambda}}
\newcommand{\whLambda} {\widehat{\Lambda}}
\newcommand{\wtbLambda} {\widetilde{\bLambda}}
\newcommand{\whbLambda} {\widehat{\bLambda}}
\newcommand{\bXi} {\boldsymbol{\Xi}}
\newcommand{\wtXi} {\widetilde{\Xi}}
\newcommand{\whXi} {\widehat{\Xi}}
\newcommand{\wtbXi} {\widetilde{\bXi}}
\newcommand{\whbXi} {\widehat{\bXi}}
\newcommand{\bPi} {\boldsymbol{\Pi}}
\newcommand{\wtPi} {\widetilde{\Pi}}
\newcommand{\whPi} {\widehat{\Pi}}
\newcommand{\wtbPi} {\widetilde{\bPi}}
\newcommand{\whbPi} {\widehat{\bPi}}
\newcommand{\bSigma} {\boldsymbol{\Sigma}}
\newcommand{\wtSigma} {\widetilde{\Sigma}}
\newcommand{\whSigma} {\widehat{\Sigma}}
\newcommand{\wtbSigma} {\widetilde{\bSigma}}
\newcommand{\whbSigma} {\widehat{\bSigma}}
\newcommand{\bUpsilon} {\boldsymbol{\Upsilon}}
\newcommand{\wtUpsilon} {\widetilde{\Upsilon}}
\newcommand{\whUpsilon} {\widehat{\Upsilon}}
\newcommand{\wtbUpsilon}{\widetilde{\bUpsilon}}
\newcommand{\whbUpsilon}{\widehat{\bUpsilon}}
\newcommand{\bPhi} {\boldsymbol{\Phi}}
\newcommand{\wtPhi} {\widetilde{\Phi}}
\newcommand{\whPhi} {\widehat{\Phi}}
\newcommand{\wtbPhi} {\widetilde{\bPhi}}
\newcommand{\whbPhi} {\widehat{\bPhi}}
\newcommand{\bPsi} {\boldsymbol{\Psi}}
\newcommand{\wtPsi} {\widetilde{\Psi}}
\newcommand{\whPsi} {\widehat{\Psi}}
\newcommand{\wtbPsi} {\widetilde{\bPsi}}
\newcommand{\whbPsi} {\widehat{\bPsi}}
\newcommand{\bOmega} {\boldsymbol{\Omega}}
\newcommand{\wtOmega} {\widetilde{\Omega}}
\newcommand{\whOmega} {\widehat{\Omega}}
\newcommand{\wtbOmega} {\widetilde{\bOmega}}
\newcommand{\whbOmega} {\widehat{\bOmega}}
\newcommand{\bzero} {\mathbf{0}}
\newcommand{\bone} {\mathbf{1}}
\newcommand{\norm}[1]{\left\lVert #1 \right\rVert}
\newcommand{\abs}[1]{\left\lvert#1\right\rvert}
\newcommand{\ceil}[1]{\left \lceil #1 \right\rceil}
\newcommand{\argmin}{\operatorname*{arg\,min}}
\newcommand{\argmax}{\operatorname*{arg\,max}}
\)
We introduce the basics of gradient descent and its python implementation with convergence for a regularized logistic loss function in our article on “UNRAVELING THE GRADIENT DESCENT ALGORITHM: A STEP-BY-STEP GUIDE” .
Gradient descent is a widely used optimization algorithm that aims to find the minimum of a given function by iteratively updating its parameters in the direction of the negative gradient. In recent years, gradient descent has become particularly popular in the context of training deep neural networks for machine learning tasks. However, many of these problems are characterized by smooth and nonconvex functions , which present unique challenges for convergence .
This article investigates the proof of convergence for batch gradient descent in these settings.
Recall that the gradient descent algorithm is given as:
Algorithm 1 - Gradient Descent (GD)
Input: An arbitrary vector $\bx_0 \in \R^n$ and a decaying step size $\gamma_t > 0$;
1. for $t=1,2,\ldots,$ do
2. $\qquad$ Evaluate the gradient mapping at $\bx_t$, i.e., evaluate $\nabla f(\bx_t)$;
3. $\qquad$ Do the following update:
$\qquad$ $$ \bx_{t+1} := \bx_t - \gamma_t \nabla f(\bx_t) $$
4. end for
Pre-requisite for convergence analysis
Before jumping to convergence for gradient descent algorithm, we first define some basic inequalities that are widely used in optimization theory.
Lipschitz smoothness: By definition, a differentiable function $f : X \to \R$ over the set $X \subseteq R^n$ is called L-smooth if the following property is satisfied, where $L>0$ is a scalar:
$$\norm{\nabla f(\bx)-\nabla f(\by)} \leq L \norm{\bx-\by} \quad \hbox{for all }\bx,\by \in X.$$
The intuitive idea behind is that the change of function is always bounded by some positive constant, and by assumption that the $L < \infty$. The key idea is that there are no abrupt changes in the function.
Imagine walking on a hilly terrain. If the terrain is Lipschitz continuous, it means that there is an upper limit to how steep any hill can be. No matter how far you walk, you will never encounter a sudden, infinitely steep cliff.
Descent Lemma: Let $f : \R^n \to R$ be $L$-smooth over the set $X \subseteq R^n$. Then, for any $\bx,\by \in X$ we have
$$ f(\bx) \leq f(\by) + \nabla f(\by)^T(\bx-\by) +\frac{L}{2} \norm{\bx-\by}^2.$$
The descent lemma comes from the strong convexity of the function where we say the convex function is bounded below by a quadratic function which in itself is convex. The descent lemma says that the function value at any point in $\R^n$ space is bounded above by three terms.
Theorem for smooth and non-convex case
We let function $f(\bx)$ to be the L-smooth and non-convex and define $f^*:= \min_{x \in \mathbb{R}^n} f(x)$. Then, when $\gamma \leq 1/L$, we have:
$$ \boxed{ \min_{t \in {0,\ldots,T-1}} \norm{\nabla f(\bx_t)}^2 \leq \frac{2 \left(f(\bx_0)- f^*\right)} {T \gamma} \qquad \hbox{for all }T \geq 1.}$$
Proof:
We now use the Lipschitz continuity and Descent lemma to prove the above result
$$ \begin{aligned} f(\bx_{t+1}) &\stackrel{(i)}{\leq} f(\bx_t) +\nabla f(\bx_t)^T(\bx_{t+1}-x_k) + \frac{L}{2}\norm{\bx_{t+1}-\bx_t}^2 \\ &\stackrel{(ii)}{=} f(\bx_t) +\nabla f(\bx_t)^T(-\gamma \nabla f(\bx_t)) + \frac{L}{2} \norm{-\gamma \nabla f(x_t)}^2 \\ &\stackrel{(iii)}{=} f(\bx_t) -\gamma \norm{\nabla f(\bx_t)}^2 +\frac{\gamma^2 L}{2} \norm{\nabla f(\bx_t)}^2 \\ &= f(\bx_t) -\gamma \left(1- \frac{\gamma L}{2}\right) \norm{\nabla f(\bx_t)}^2 \\ &\stackrel{(iv)}{\leq} f(\bx_t) -\frac{\gamma}{2} \norm{\nabla f(\bx_t)}^2. \end{aligned}$$
Where, in $(i)$ we have used Descent lemma by substituting $\bx:=\bx_{t+1}, \by:=\bx_t$; in $(ii)$ we applied gradient descent update rule $\bx_{t+1}:=\bx_t-\gamma\nabla f(\bx_t)$; in $(iii)$ the definition of Euclidian norm square, i.e., $\ba^T\ba = \norm{a}^2 \hbox{ for all } \ba\in \R^n$; and finally in $(iv)$ we assume that $\gamma L \leq 1$ which states that $1- \frac{\gamma L}{2} \geq \frac{1}{2}$. So, we have:
$$ f(\bx_{t+1}) \leq f(\bx_t) -\frac{\gamma}{2} \norm{\nabla f(\bx_t)}^2. $$
We can re-arrange the above expression by assuming the fact that $f(\bx_t)$ is nonincreasing in terms of $t$.
$$ \norm{\nabla f(\bx_t)}^2 \leq \frac{2}{\gamma} (f(\bx_t) – f(\bx_{t+1})). $$
Since we run these iterations up to $T$, we can sum both sides from $0 \to T-1$.
$$ \sum_{t=0}^{T-1} \norm{\nabla f(\bx_t)}^2 \leq \sum_{t=0}^{T-1} \frac{2}{\gamma} (f(\bx_t) – f(\bx_{t+1})). $$
Note that $\sum_{t=0}^{T-1} (f(\bx_t) – f(\bx_{t+1}))$ leads to a telescoping series, resulting in multiple terms canceling, so we left with:
By using the fact that $f^*$ is the minimum value of the function over entire set $X$, we can say that $f^* \leq f(\bx_T)$, then
$$ \sum_{t=0}^{T-1} \norm{\nabla f(\bx_t)}^2 \leq \frac{2}{\gamma} (f(\bx_0) – f^*)). $$
Finally using the fact that since $\norm{\nabla f(\bx_t)}^2 > 0$ then we have $T \min_{t \in {0,\ldots,T-1}} \norm{\nabla f(\bx_t)}^2 \leq \sum_{t=0}^{T-1} \norm{\nabla f(\bx_t)}^2$, we can write:
$$ \min_{t \in {0,\ldots,T-1}} \norm{\nabla f(\bx_t)}^2 \leq \frac{2 \left(f(\bx_0)- f^*\right)} {T \gamma} $$
This concludes the proof of convergence of the gradient descent algorithm when a function is L-smooth and nonconvex.