@article {Zelen{\'y}046946, author = {David Zelen{\'y}}, title = {Bias in community-weighted mean analysis of plant functional traits and species indicator values}, elocation-id = {046946}, year = {2017}, doi = {10.1101/046946}, publisher = {Cold Spring Harbor Laboratory}, abstract = {One way to analyze the relationship between species attributes and sample attributes via the matrix of species composition is to calculate the community-weighted mean of species attributes (CWM) and relate it to sample attributes by correlation, regression or ANOVA. This weighted-mean approach is frequently used by vegetation ecologists to relate species attributes like plant functional traits or Ellenberg-like species indicator values to sample attributes like measured environmental variables, biotic properties, species richness or sample scores in ordination analysis.The problem with the weighted-mean approach is that, in certain cases, it yields biased results in terms of both effect size and P-values, and this bias is contingent upon the beta diversity of the species composition data. The reason is that CWM values calculated from samples of communities sharing some species are not independent of each other. This influences the number of effective degrees of freedom, which is usually lower than the actual number of samples, and the difference further increases with decreasing beta diversity of the data set. The discrepancy between the number of effective degrees of freedom and the number of samples in analysis turns into biased effect sizes and an inflated Type I error rate in those cases where the significance of the relationship is tested by standard tests, a problem which is analogous to analysis of two spatially autocorrelated variables. Consequently, results of studies using rather homogeneous (although not necessarily small) compositional data sets may be overly optimistic, and effect sizes of studies based on data sets differing by their beta diversity are not directly comparable.Here, I introduce guidelines on how to decide in which situation the bias is actually a problem when interpreting results, recognizing that there are several types of species and sample attributes with different properties and that ecological hypotheses commonly tested by the weighted-mean approach fall into one of three broad categories. I also compare available analytical solutions accounting for the bias (modified permutation test and sequential permutation test using the fourth-corner statistic) and suggest rules for their use.CWM {\textendash}community-weighted mean.CWM {\textendash}community-weighted mean.}, URL = {https://www.biorxiv.org/content/early/2017/04/09/046946}, eprint = {https://www.biorxiv.org/content/early/2017/04/09/046946.full.pdf}, journal = {bioRxiv} }