%Aigaion2 BibTeX export from Idiap Publications
%Thursday 04 December 2025 04:46:09 PM
@INPROCEEDINGS{dimitrakakis:pascal:2005,
author = {Dimitrakakis, Christos and Bengio, Samy},
projects = {Idiap},
title = {Gradient estimates of return distributions},
booktitle = {{PASCAL} Workshop on Principled Methods of Trading Exploration and Exploitation},
year = {2005},
note = {IDIAP-RR 05-29},
crossref = {dimitrakakis:rr05-29},
abstract = {We present a general method for maintaining estimates of the distribution of parameters in arbitrary models. This is then applied to the estimation of probability distributions over actions in value-based reinforcement learning. While this approach is similar to other techniques that maintain a confidence measure for action-values, it nevertheless offers an insight into current techniques and hints at potential avenues of further research.},
pdf = {https://publications.idiap.ch/attachments/papers/2005/dimitrakakis-pascal-2005.pdf},
postscript = {ftp://ftp.idiap.ch/pub/papers/2005/dimitrakakis-pascal-2005.ps.gz},
ipdmembership={learning},
}
crossreferenced publications:
@TECHREPORT{dimitrakakis:rr05-29,
author = {Dimitrakakis, Christos and Bengio, Samy},
projects = {Idiap},
title = {Gradient estimates of return},
type = {Idiap-RR},
number = {Idiap-RR-29-2005},
year = {2005},
institution = {IDIAP},
note = {Published in PASCAL Workshop in Principled Methods of Trading Exploration and Exploitation, London, UK, 2005},
abstract = {The exploration-exploitation trade-off that arises when one considers simple point estimates of expected returns no longer appears when full distributions are considered. This work develops a simple gradient-based approach for mainting such distributions and investigates methods for using them to direct exploration.},
pdf = {https://publications.idiap.ch/attachments/reports/2005/dimitrakakis-idiap-rr-05-29.pdf},
postscript = {ftp://ftp.idiap.ch/pub/reports/2005/dimitrakakis-idiap-rr-05-29.ps.gz},
ipdmembership={learning},
}