all.bib

@patent{buessow2012patent,
  title = {Performing pre-aggregation and re-aggregation using the same query language},
  nationality = {US},
  number = {US20120173519 A1},
  year = {2012},
  yearfiled = {2011},
  author = {Buessow, R. and Stolle, M. and Vlasyuk, B. and Bachmann, O.},
  month = jul # {~5},
  note = {US Patent App. 13/388,487},
  url = {https://www.google.com/patents/US20120173519},
  abstract = {A method includes: obtaining, at time intervals, subsets of data from a database using code of a query language; performing a pre-aggregation on the subsets of data to produce pre-aggregated data; storing the pre-aggregated data in the database; obtaining, in response to a query, at least some of the pre-aggregated data from the database, where the at least some of the pre-aggregated data is obtained using code from the query language used to obtain the subsets of data; and performing a re-aggregation on the pre-aggregated data to produce re-aggregated data.},
  owner = {sarah},
  publisher = {Google Patents},
  timestamp = {2015.07.14}
}
@misc{rivest2001lnsc,
  title = {{LNSC} Cascade-correlation Simulator Applet},
  author = {Francois Rivest and Martin Stolle and Thomas Shulz},
  howpublished = {{WWW}},
  year = {2001},
  tags = {web},
  url = {http://www.psych.mcgill.ca/perpg/fac/shultz/cdp/lnsc_applet.htm}
}
@phdthesis{Stolle_2008_6098,
  title = {Finding and Transferring Policies Using Stored Behaviors},
  author = {Martin Stolle},
  school = {Robotics Institute, Carnegie Mellon University},
  year = {2008},
  address = {Pittsburgh, PA},
  month = {May},
  abstract = {We present several algorithms that aim to advance the state-of-the-art in reinforcement learning and planning algorithms. One key idea is to transfer knowledge across problems by representing it using local features. This idea is used to speed up a dynamic programming based generalized policy iteration. We then present a control approach that uses a library of trajectories to establish a control law or policy. This approach is an alternative to methods for finding policies based on value functions using dynamic programming and also to using plans based on a single desired trajectory. Our method has the advantages of providing reasonable policies much faster than dynamic programming and providing more robust and global policies than following a single desired trajectory. Finally we show how local features can be used to transfer libraries of trajectories between similar problems. Transfer makes it useful to store special purpose behaviors in the library for solving tricky situations in new environments. By adapting the behaviors in the library, we increase the applicability of the behaviors. Our approach can be viewed as a method that allows planning algorithms to make use of special purpose behaviors/actions which are only applicable in certain situations. Results are shown for the labyrinth / marble maze and the Little Dog quadruped robot. The marble maze is a difficult task which requires both fast control as well as planning ahead. In the Little Dog terrain, a quadruped robot has to navigate quickly across rough terrain.},
  number = {CMU-RI-TR-08-27},
  timestamp = {2015.06.16},
  url = {https://www.ri.cmu.edu/publication_view.html?pub_id=6098&menu_code=0307}
}
@mastersthesis{stolle2004thesis,
  title = {Automated Discovery of Options in Reinforcement Learning},
  author = {Martin Stolle},
  school = {McGill University},
  year = {2004},
  month = {February},
  abstract = {AI planning benefits greatly from the use of temporally-extended or macro-actions. Macro-actions allow for faster and more efficient planning as well as the reuse of knowledge from previous solutions. In recent years, a significant amount of research has been devoted to incorporating macro-actions in learned controllers, particularly in the context of Reinforcement Learning. One general approach is the use of options (temporally-extended actions) in Reinforcement Learning. While the properties of options are well understood, it is not clear how to find new options automatically. In this thesis we propose two new algorithms for discovering options and compare them to one algorithm from the literature. We also contribute a new algorithm for learning with options which improves on the performance of two widely used learning algorithms. Extensive experiments are used to demonstrate the effectiveness of the proposed algorithms.},
  file = {stolle2004thesis.pdf:pubs/stolle2004thesis.pdf:PDF},
  tags = {web},
  url = {http://martin.stolle.name/pubs/stolle2004thesis.pdf}
}
@article{1825445,
  title = {Finding and transferring policies using stored behaviors},
  author = {Stolle, Martin and Atkeson, Christopher},
  journal = {Auton. Robots},
  year = {2010},
  number = {2},
  pages = {169--200},
  volume = {29},
  abstract = {We present several algorithms that aim to advance the state-of-the-art in reinforcement learning and planning algorithms. One key idea is to transfer knowledge across problems by representing it using local features. This idea is used to speed up a dynamic programming based generalized policy iteration. We then present a control approach that uses a library of trajectories to establish a control law or policy. This approach is an alternative to methods for finding policies based on value functions using dynamic programming and also to using plans based on a single desired trajectory. Our method has the advantages of providing reasonable policies much faster than dynamic programming and providing more robust and global policies than following a single desired trajectory. Finally we show how local features can be used to transfer libraries of trajectories between similar problems. Transfer makes it useful to store special purpose behaviors in the library for solving tricky situations in new environments. By adapting the behaviors in the library, we increase the applicability of the behaviors. Our approach can be viewed as a method that allows planning algorithms to make use of special purpose behaviors/actions which are only applicable in certain situations. Results are shown for the ``Labyrinth'' marble maze and the Little Dog quadruped robot. The marble maze is a difficult task which requires both fast control as well as planning ahead. In the Little Dog terrain, a quadruped robot has to navigate quickly across rough terrain.},
  address = {Hingham, MA, USA},
  doi = {10.1007/s10514-010-9191-2},
  issn = {0929-5593},
  owner = {sarah},
  publisher = {Kluwer Academic Publishers},
  timestamp = {2015.06.16},
  url = {http://martin.stolle.name/all_abstracts.shtml#1825445}
}
@inproceedings{stolle2007transfer,
  title = {Transfer of Policies Based on Trajectory Libraries},
  author = {Martin Stolle and Christopher Atkeson},
  booktitle = {Proceedings of the International Conference on Intelligent Robots and Systems (IROS 2007)},
  year = {2007},
  abstract = {Recently, libraries of trajectory plans have been shown to be a promising way of creating policies for difficult problems. However, often it is not desirable or even possible to create a new library for every task. We present a method for transferring libraries across tasks, which allows us to build libraries by learning from demonstration on one task and apply them to similar tasks. Representing the libraries in a feature-based space is key to supporting transfer. We also search through the library to ensure a complete path to the goal is possible. Results are shown for the Little Dog task. Little Dog is a quadruped robot that has to walk across rough terrain at reasonably fast speeds.},
  doi = {10.1109/IROS.2007.4399364},
  file = {stolle2007transfer.pdf:pubs/stolle2007transfer.pdf:PDF},
  tags = {web},
  timestamp = {2007.07.12},
  url = {http://martin.stolle.name/pubs/stolle2007transfer.pdf}
}
@inproceedings{stolle2007knowledge,
  title = {Knowledge Transfer using Local Features},
  author = {Martin Stolle and Christopher G. Atkeson},
  booktitle = {Proceedings of the IEEE Symposium on Approximate Dynamic Programming and Reinforcement Learning (ADPRL 2007)},
  year = {2007},
  abstract = {We present a method for reducing the effort required to compute policies for tasks based on solutions to previously solved tasks. The key idea is to use a learned intermediate policy based on local features to create an initial policy for the new task. In order to further improve this initial policy, we developed a form of generalized policy iteration. We achieve a substantial reduction in computation needed to find policies when previous experience is available.},
  doi = {10.1109/ADPRL.2007.368165},
  file = {stolle2007knowledge.pdf:pubs/stolle2007knowledge.pdf:PDF},
  owner = {mstoll},
  tags = {web},
  timestamp = {2007.03.29},
  url = {http://martin.stolle.name/pubs/stolle2007knowledge.pdf}
}
@inproceedings{stolle2006policies,
  title = {Policies Based on Trajectory Libraries},
  author = {Martin Stolle and Christopher G. Atkeson},
  booktitle = {Proceedings of the International Conference on Robotics and Automation (ICRA 2006)},
  year = {2006},
  abstract = {We present a control approach that uses a library of trajectories to establish a global control law or policy. This is an alternative to methods for finding global policies based on value functions using dynamic programming and also to using plans based on a single desired trajectory. Our method has the advantage of providing reasonable policies much faster than dynamic programming can provide an initial policy. It also has the advantage of providing more robust and global policies than following a single desired trajectory. Trajectory libraries can be created for robots with many more degrees of freedom than what dynamic programming can be applied to as well as for robots with dynamic model discontinuities. Results are shown for the ``Labyrinth'' marble maze, both in simulation as well as a real world version. The marble maze is a difficult task which requires both fast control as well as planning ahead.},
  doi = {10.1109/ROBOT.2006.1642212},
  file = {stolle2006policies.pdf:pubs/stolle2006policies.pdf:PDF},
  tags = {web},
  url = {http://martin.stolle.name/pubs/stolle2006policies.pdf}
}
@article{stolle2002learning,
  title = {Learning Options in Reinforcement Learning},
  author = {Stolle, Martin and Precup, Doina},
  journal = {Lecture Notes in Computer Science},
  year = {2002},
  pages = {212--223},
  volume = {2371},
  abstract = {Temporally extended actions (e.g., macro actions) have proven very useful in speeding up learning, ensuring robustness and building prior knowledge into AI systems. The options framework (Precup, 2000; Sutton, Precup & Singh, 1999) provides a natural way of incorporating such actions into reinforcement learning systems, but leaves open the issue of how good options might be identified. In this paper, we empirically explore a simple approach to creating options. The underlying assumption is that the agent will be asked to perform different goal-achievement tasks in an environment that is otherwise the same over time. Our approach is based on the intuition that ``bottleneck'' states, i.e. states that are frequently visited on system trajectories, could prove to be useful subgoals (e.g. McGovern & Barto, 2001; Iba, 1989). We present empirical studies of this approach in two gridworld navigation tasks. One of the environments we explored contains bottleneck states, and the algorithm indeed finds these states, as expected. The second environment is an empty gridworld with no obstacles. Although the environment does not contain bottleneck states, our approach still finds useful options, which essentially allow the agent to travel around the environment more quickly.},
  citeseerurl = {http://citeseer.ist.psu.edu/579862.html},
  file = {stolle2002learning.pdf:pubs/stolle2002learning.pdf:PDF},
  tags = {web},
  url = {http://martin.stolle.name/pubs/stolle2002learning.pdf}
}
@article{1936803,
  title = {Optimization and learning for rough terrain legged locomotion},
  author = {Zucker, Matt and Ratliff, Nathan and Stolle, Martin and Chestnutt, Joel and Bagnell, J Andrew and Atkeson, Christopher G and Kuffner, James},
  journal = {Int. J. Rob. Res.},
  year = {2011},
  number = {2},
  pages = {175--191},
  volume = {30},
  abstract = {We present a novel approach to legged locomotion over rough terrain that is thoroughly rooted in optimization. This approach relies on a hierarchy of fast, anytime algorithms to plan a set of footholds, along with the dynamic body motions required to execute them. Components within the planning framework coordinate to exchange plans, cost-to-go estimates, and ``certificates'' that ensure the output of an abstract high-level planner can be realized by lower layers of the hierarchy. The burden of careful engineering of cost functions to achieve desired performance is substantially mitigated by a simple inverse optimal control technique. Robustness is achieved by real-time re-planning of the full trajectory, augmented by reflexes and feedback control. We demonstrate the successful application of our approach in guiding the LittleDog quadruped robot over a variety of types of rough terrain. Other novel aspects of our past research efforts include a variety of pioneering inverse optimal control techniques as well as a system for planning using arbitrary pre-recorded robot behavior.},
  address = {Thousand Oaks, CA, USA},
  doi = {10.1177/0278364910392608},
  issn = {0278-3649},
  owner = {sarah},
  publisher = {Sage Publications, Inc.},
  timestamp = {2015.06.16}
}

This file was generated by bibtex2html 1.97.