@techreport{2001266, institution={University of Bristol}, author={Tim Kovacs}, number={CSTR-10-007}, title={Elaboration on the policy improvement theorem for soft policies in reinforcement learning}, bpages={5}, month={October}, year={2010}, abstract={In section 5.4 of their book on reinforcement learning Sutton and Barto show that the policy improvement theorem applies to soft policies, that is, when making a soft policy greedier (but still soft) with respect to its Q-function we obtain an improved policy. I found this material difficult to follow and wrote this short document to elaborate on their proof. Familiarity with the material up until that section is assumed.}, abstract-url={http://www.cs.bris.ac.uk/Publications/pub_master.jsp?id=2001266}, keyword={Artificial Intelligence,Machine Learning}, pubtype={117} }