@article {Mas-Herrero097873, author = {Ernest Mas-Herrero and Guillaume Sescousse and Roshan Cools and Josep Marco-Pallar{\'e}s}, title = {The contribution of striatal pseudo-reward prediction errors to value-based decision-making}, elocation-id = {097873}, year = {2017}, doi = {10.1101/097873}, publisher = {Cold Spring Harbor Laboratory}, abstract = {Most studies that have investigated the brain mechanisms underlying learning have focused on the ability to learn simple stimulus-response associations. However, in everyday life, outcomes are often obtained through complex behavioral patterns involving a series of actions, rather than one simple response. In such scenarios, parallel learning systems are important to reduce the complexity of the learning problem, as proposed in the framework of hierarchical reinforcement learning (HRL). One of the main strategies used by HRL algorithms is the computation of pseudo-reward prediction errors (PRPE) which allow the reinforcement of actions that led to a sub-goal before the final goal itself is achieved. Here we wanted to test two hypotheses. First, we hypothesized that, despite not carrying any rewarding value per se, pseudo-rewards might generate a bias in choice behavior when reward contingencies are not well-known or uncertain. Second, we hypothesized that such a behavioral bias might be related to the strength of PRPE striatal representations. In order to test these ideas, we developed a novel fMRI decision-making paradigm to assess reward prediction errors (RPE) and PRPE in a group of 23 healthy participants. Our results show that participants developed a preference for the most pseudo-rewarding option throughout the task, even though it did not lead to more reward overall. Analysis of fMRI data indicated that this preference was predicted by individual differences in the striatal sensitivity to PRPE vs RPE. In sum, our results indicate that pseudo-rewards can bias our choices despite their lack of association with actual reward. Thus, although they are critical for speeding learning in complex environments, they might potentially lead to irrational behavior in the absence of reward.}, URL = {https://www.biorxiv.org/content/early/2017/01/03/097873}, eprint = {https://www.biorxiv.org/content/early/2017/01/03/097873.full.pdf}, journal = {bioRxiv} }