@article {Zelen{\'y}046946,
author = {Zelen{\'y}, David},
title = {Bias in community-weighted mean analysis relating species attributes to sample attributes: justification and remedy},
elocation-id = {046946},
year = {2016},
doi = {10.1101/046946},
publisher = {Cold Spring Harbor Laboratory},
abstract = {A common way to analyse relationship between matrix of species attributes (like functional traits of indicator values) and sample attributes (e.g. environmental variables) via the matrix of species composition is by calculating community-weighted mean of species attributes (CWM) and relating it to sample attributes by correlation, regression, ANOVA or other method. This weighted-mean approach is used in number of ecological fields (e.g. functional and vegetation ecology, biogeography, hydrobiology or paleolimnology), and represents an alternative to other methods relating species and sample attributes via species composition matrix (like the fourth-corner problem and RLQ analysis).Here, I point out two important problems of weighted-mean approach: 1) in certain cases, which I discuss in detail, the method yields highly biased results in terms of both effect size and significance of the relationship between CWM and sample attributes, and 2) this bias is contingent upon beta diversity of species composition matrix. CWM values calculated from samples of communities sharing some species are not independent from each other and this lack of independence influences the number of effective degrees of freedom. This is usually lower than actual number of samples entering the analysis, and the difference further increases with decreasing compositional heterogeneity of the dataset. Discrepancy between number of effective degrees of freedom and number of samples in analysis turns into biased effect sizes and inflated Type I error rate in case that significance of the relationship is tested by standard tests, a problem which is analogous to analysis of two spatially autocorrelated variables.Consequences of the bias is that reported results of studies using rather homogeneous (although not necessarily small) compositional datasets may be overly optimistic, and results of studies based on datasets differing by their compositional heterogeneity are not directly comparable. I describe the reason for this bias and suggest guidelines how to decide in which situations the bias is actually a problem for interpretation of results. I also introduce analytical solution accounting for the bias, test its validity on simulated data and compare it with an alternative approach based on the fourth-corner approach.},
URL = {https://www.biorxiv.org/content/early/2016/04/05/046946},
eprint = {https://www.biorxiv.org/content/early/2016/04/05/046946.full.pdf},
journal = {bioRxiv}
}