@article {Lind2021.04.26.441538,
author = {Lind, Christine H. and Yu, Angela J.},
title = {Understanding double descent through the lens of principal component regression},
elocation-id = {2021.04.26.441538},
year = {2021},
doi = {10.1101/2021.04.26.441538},
publisher = {Cold Spring Harbor Laboratory},
abstract = {A number of recent papers have studied the double-descent phenomenon: as the number of parameters in a supervised learning model increasingly exceeds that of data points ({\textquotedblleft}second-descent{\textquotedblright}), the empirical risk curve has been observed to not overfit, instead decreasing monotonically, sometimes to a level even better than the best {\textquotedblleft}first-descent{\textquotedblright} model (using a subset of features not exceeding the number of data points). Understanding exactly when this happens and why it happens is an important theoretical problem. Focusing on the over-parameterized linear regression setting, a commonly chosen case study in the double-descent literature, we present two theoretical results: 1) final second-descent (regression using all of the predictor variables) and principal component (PC) regression without dimensionality reduction are equivalent; 2) the PCR risk curve can be expected to lower bound not only all linearly transformed first-descent models, but also all linearly transformed second-descent models (including the elimination of features as a special case); 3) if the smallest singular value of the design matrix is {\textquotedblleft}large enough{\textquotedblright} (we will define mathematically), final second-descent can be expected to outperform any first-descent or second-descent model. These insights have important ramifications for a type of semi-supervised learning problem, a scenario which can explain why a face representation trained on unlabeled faces from one race would be better for later supervised-learning tasks on the same race of faces than for faces from another race {\textendash} this can both provide a scientific explanation for the other-race effect seen in humans and give hints for how to mitigate similar issues in the domain of ethical AI.Competing Interest StatementThe authors have declared no competing interest.},
URL = {https://www.biorxiv.org/content/early/2021/06/06/2021.04.26.441538},
eprint = {https://www.biorxiv.org/content/early/2021/06/06/2021.04.26.441538.full.pdf},
journal = {bioRxiv}
}