%Aigaion2 BibTeX export from Idiap Publications
%Saturday 21 December 2024 07:32:54 PM

@INPROCEEDINGS{dimitrakakis:pascal:2005,
         author = {Dimitrakakis, Christos and Bengio, Samy},
       projects = {Idiap},
          title = {Gradient estimates of return distributions},
      booktitle = {{PASCAL} Workshop on Principled Methods of Trading Exploration and Exploitation},
           year = {2005},
           note = {IDIAP-RR 05-29},
       crossref = {dimitrakakis:rr05-29},
       abstract = {We present a general method for maintaining estimates of the distribution of parameters in arbitrary models. This is then applied to the estimation of probability distributions over actions in value-based reinforcement learning. While this approach is similar to other techniques that maintain a confidence measure for action-values, it nevertheless offers an insight into current techniques and hints at potential avenues of further research.},
            pdf = {https://publications.idiap.ch/attachments/papers/2005/dimitrakakis-pascal-2005.pdf},
     postscript = {ftp://ftp.idiap.ch/pub/papers/2005/dimitrakakis-pascal-2005.ps.gz},
ipdmembership={learning},
}



crossreferenced publications: 
@TECHREPORT{dimitrakakis:rr05-29,
         author = {Dimitrakakis, Christos and Bengio, Samy},
       projects = {Idiap},
          title = {Gradient estimates of return},
           type = {Idiap-RR},
         number = {Idiap-RR-29-2005},
           year = {2005},
    institution = {IDIAP},
           note = {Published in PASCAL Workshop in Principled Methods of Trading Exploration and Exploitation, London, UK, 2005},
       abstract = {The exploration-exploitation trade-off that arises when one considers simple point estimates of expected returns no longer appears when full distributions are considered. This work develops a simple gradient-based approach for mainting such distributions and investigates methods for using them to direct exploration.},
            pdf = {https://publications.idiap.ch/attachments/reports/2005/dimitrakakis-idiap-rr-05-29.pdf},
     postscript = {ftp://ftp.idiap.ch/pub/reports/2005/dimitrakakis-idiap-rr-05-29.ps.gz},
ipdmembership={learning},
}