@InProceedings{pmlr-v202-zhang23ad,
  title =    {On Enhancing Expressive Power via Compositions of Single Fixed-Size {R}e{LU} Network},
  author =       {Zhang, Shijun and Lu, Jianfeng and Zhao, Hongkai},
  booktitle =    {Proceedings of the 40th International Conference on Machine Learning},
  pages =    {41452--41487},
  year =     {2023},
  editor =   {Krause, Andreas and Brunskill, Emma and Cho, Kyunghyun and Engelhardt, Barbara and Sabato, Sivan and Scarlett, Jonathan},
  volume =   {202},
  series =   {Proceedings of Machine Learning Research},
  month =    {23--29 Jul},
  publisher =    {PMLR},
  pdf =      {https://proceedings.mlr.press/v202/zhang23ad/zhang23ad.pdf},
  url =      {https://proceedings.mlr.press/v202/zhang23ad.html},
  abstract =     {This paper explores the expressive power of deep neural networks through the framework of function compositions. We demonstrate that the repeated compositions of a single fixed-size ReLU network exhibit surprising expressive power, despite the limited expressive capabilities of the individual network itself. Specifically, we prove by construction that $\mathcal{L}_2\circ \boldsymbol{g}^{\circ r}\circ \boldsymbol{\mathcal{L}}_1$ can approximate $1$-Lipschitz continuous functions on $[0,1]^d$ with an error $\mathcal{O}(r^{-1/d})$, where $\boldsymbol{g}$ is realized by a fixed-size ReLU network, $\boldsymbol{\mathcal{L}}_1$ and $\mathcal{L}_2$ are two affine linear maps matching the dimensions, and $\boldsymbol{g}^{\circ r}$ denotes the $r$-times composition of $\boldsymbol{g}$. Furthermore, we extend such a result to generic continuous functions on $[0,1]^d$ with the approximation error characterized by the modulus of continuity. Our results reveal that a continuous-depth network generated via a dynamical system has immense approximation power even if its dynamics function is time-independent and realized by a fixed-size ReLU network.}
}