%Aigaion2 BibTeX export from Idiap Publications %Tuesday 10 December 2024 11:59:09 PM @INPROCEEDINGS{dimitrakakis:pascal:2005, author = {Dimitrakakis, Christos and Bengio, Samy}, projects = {Idiap}, title = {Gradient estimates of return distributions}, booktitle = {{PASCAL} Workshop on Principled Methods of Trading Exploration and Exploitation}, year = {2005}, note = {IDIAP-RR 05-29}, crossref = {dimitrakakis:rr05-29}, abstract = {We present a general method for maintaining estimates of the distribution of parameters in arbitrary models. This is then applied to the estimation of probability distributions over actions in value-based reinforcement learning. While this approach is similar to other techniques that maintain a confidence measure for action-values, it nevertheless offers an insight into current techniques and hints at potential avenues of further research.}, pdf = {https://publications.idiap.ch/attachments/papers/2005/dimitrakakis-pascal-2005.pdf}, postscript = {ftp://ftp.idiap.ch/pub/papers/2005/dimitrakakis-pascal-2005.ps.gz}, ipdmembership={learning}, } crossreferenced publications: @TECHREPORT{dimitrakakis:rr05-29, author = {Dimitrakakis, Christos and Bengio, Samy}, projects = {Idiap}, title = {Gradient estimates of return}, type = {Idiap-RR}, number = {Idiap-RR-29-2005}, year = {2005}, institution = {IDIAP}, note = {Published in PASCAL Workshop in Principled Methods of Trading Exploration and Exploitation, London, UK, 2005}, abstract = {The exploration-exploitation trade-off that arises when one considers simple point estimates of expected returns no longer appears when full distributions are considered. This work develops a simple gradient-based approach for mainting such distributions and investigates methods for using them to direct exploration.}, pdf = {https://publications.idiap.ch/attachments/reports/2005/dimitrakakis-idiap-rr-05-29.pdf}, postscript = {ftp://ftp.idiap.ch/pub/reports/2005/dimitrakakis-idiap-rr-05-29.ps.gz}, ipdmembership={learning}, }