From 00fca9b6e08f8863811b275cf29644f5dc56592b Mon Sep 17 00:00:00 2001 From: Itai Caspi Date: Thu, 19 Oct 2017 16:41:12 +0300 Subject: [PATCH] updated the paper links in the docs and restyled the theme --- docs/docs/algorithms/other/dfp.md | 6 ++++-- docs/docs/algorithms/policy_optimization/ac.md | 6 ++++-- docs/docs/algorithms/policy_optimization/cppo.md | 6 ++++-- docs/docs/algorithms/policy_optimization/ddpg.md | 6 ++++-- docs/docs/algorithms/policy_optimization/pg.md | 6 ++++-- docs/docs/algorithms/policy_optimization/ppo.md | 6 ++++-- docs/docs/algorithms/value_optimization/bs_dqn.md | 6 ++++-- .../algorithms/value_optimization/distributional_dqn.md | 6 ++++-- docs/docs/algorithms/value_optimization/double_dqn.md | 4 ++-- docs/docs/algorithms/value_optimization/dqn.md | 6 ++++-- docs/docs/algorithms/value_optimization/dueling_dqn.md | 6 ++++-- docs/docs/algorithms/value_optimization/mmc.md | 4 ++-- docs/docs/algorithms/value_optimization/n_step.md | 6 ++++-- docs/docs/algorithms/value_optimization/naf.md | 6 ++++-- docs/docs/algorithms/value_optimization/nec.md | 6 ++++-- docs/docs/algorithms/value_optimization/pal.md | 6 ++++-- docs/docs/extra.css | 3 +++ docs/mkdocs.yml | 1 + 18 files changed, 64 insertions(+), 32 deletions(-) create mode 100644 docs/docs/extra.css diff --git a/docs/docs/algorithms/other/dfp.md b/docs/docs/algorithms/other/dfp.md index 0b8985a..4e5c110 100644 --- a/docs/docs/algorithms/other/dfp.md +++ b/docs/docs/algorithms/other/dfp.md @@ -1,6 +1,8 @@ -> Actions space: Discrete +# Direct Future Prediction -[Paper](https://arxiv.org/abs/1611.01779) +**Actions space:** Discrete + +**References:** [Learning to Act by Predicting the Future](https://arxiv.org/abs/1611.01779) ## Network Structure diff --git a/docs/docs/algorithms/policy_optimization/ac.md b/docs/docs/algorithms/policy_optimization/ac.md index a17ea42..d394eae 100644 --- a/docs/docs/algorithms/policy_optimization/ac.md +++ b/docs/docs/algorithms/policy_optimization/ac.md @@ -1,6 +1,8 @@ -> Action space: Discrete|Continuous +# Actor-Critic -[Paper](https://arxiv.org/abs/1602.01783) +**Actions space:** Discrete|Continuous + +**References:** [Asynchronous Methods for Deep Reinforcement Learning](https://arxiv.org/abs/1602.01783) ## Network Structure

diff --git a/docs/docs/algorithms/policy_optimization/cppo.md b/docs/docs/algorithms/policy_optimization/cppo.md index 5df29b0..150cdba 100644 --- a/docs/docs/algorithms/policy_optimization/cppo.md +++ b/docs/docs/algorithms/policy_optimization/cppo.md @@ -1,6 +1,8 @@ -> Action Space: Discrete|Continuous +# Clipped Proximal Policy Optimization -[Paper](https://arxiv.org/pdf/1707.06347.pdf) +**Actions space:** Discrete|Continuous + +**References:** [Proximal Policy Optimization Algorithms](https://arxiv.org/pdf/1707.06347.pdf) ## Network Structure diff --git a/docs/docs/algorithms/policy_optimization/ddpg.md b/docs/docs/algorithms/policy_optimization/ddpg.md index 039557f..f8ed755 100644 --- a/docs/docs/algorithms/policy_optimization/ddpg.md +++ b/docs/docs/algorithms/policy_optimization/ddpg.md @@ -1,6 +1,8 @@ -> Actions space: Continuous +# Deep Deterministic Policy Gradient -[Paper](https://arxiv.org/abs/1509.02971) +**Actions space:** Continuous + +**References:** [Continuous control with deep reinforcement learning](https://arxiv.org/abs/1509.02971) ## Network Structure diff --git a/docs/docs/algorithms/policy_optimization/pg.md b/docs/docs/algorithms/policy_optimization/pg.md index c2933ee..c890510 100644 --- a/docs/docs/algorithms/policy_optimization/pg.md +++ b/docs/docs/algorithms/policy_optimization/pg.md @@ -1,6 +1,8 @@ -> Action Space: Discrete|Continuous +# Policy Gradient -[Paper](http://www-anw.cs.umass.edu/~barto/courses/cs687/williams92simple.pdf) +**Actions space:** Discrete|Continuous + +**References:** [Simple Statistical Gradient-Following Algorithms for Connectionist Reinforcement Learning](http://www-anw.cs.umass.edu/~barto/courses/cs687/williams92simple.pdf) ## Network Structure diff --git a/docs/docs/algorithms/policy_optimization/ppo.md b/docs/docs/algorithms/policy_optimization/ppo.md index 4ff418a..8e23b05 100644 --- a/docs/docs/algorithms/policy_optimization/ppo.md +++ b/docs/docs/algorithms/policy_optimization/ppo.md @@ -1,6 +1,8 @@ -> Actions space: Discrete|Continuous +# Proximal Policy Optimization -[Paper](https://arxiv.org/pdf/1707.02286.pdf) +**Actions space:** Discrete|Continuous + +**References:** [Emergence of Locomotion Behaviours in Rich Environments](https://arxiv.org/pdf/1707.02286.pdf) ## Network Structure diff --git a/docs/docs/algorithms/value_optimization/bs_dqn.md b/docs/docs/algorithms/value_optimization/bs_dqn.md index b10087c..4ee1ee1 100644 --- a/docs/docs/algorithms/value_optimization/bs_dqn.md +++ b/docs/docs/algorithms/value_optimization/bs_dqn.md @@ -1,6 +1,8 @@ -> Action space: Discrete +# Bootstrapped DQN -[Paper](https://arxiv.org/abs/1602.04621) +**Actions space:** Discrete + +**References:** [Deep Exploration via Bootstrapped DQN](https://arxiv.org/abs/1602.04621) ## Network Structure diff --git a/docs/docs/algorithms/value_optimization/distributional_dqn.md b/docs/docs/algorithms/value_optimization/distributional_dqn.md index 1dcd513..5dcc4c2 100644 --- a/docs/docs/algorithms/value_optimization/distributional_dqn.md +++ b/docs/docs/algorithms/value_optimization/distributional_dqn.md @@ -1,6 +1,8 @@ -> Action space: Discrete +# Distributional DQN -[Paper](https://arxiv.org/abs/1707.06887) +**Actions space:** Discrete + +**References:** [A Distributional Perspective on Reinforcement Learning](https://arxiv.org/abs/1707.06887) ## Network Structure diff --git a/docs/docs/algorithms/value_optimization/double_dqn.md b/docs/docs/algorithms/value_optimization/double_dqn.md index ce8513e..3ff88dc 100644 --- a/docs/docs/algorithms/value_optimization/double_dqn.md +++ b/docs/docs/algorithms/value_optimization/double_dqn.md @@ -1,8 +1,8 @@ # Double DQN -> Action space: Discrete - [Paper](https://arxiv.org/pdf/1509.06461.pdf) +**Actions space:** Discrete +**References:** [Deep Reinforcement Learning with Double Q-learning](https://arxiv.org/abs/1509.06461.pdf) ## Network Structure diff --git a/docs/docs/algorithms/value_optimization/dqn.md b/docs/docs/algorithms/value_optimization/dqn.md index 945854d..a21d19c 100644 --- a/docs/docs/algorithms/value_optimization/dqn.md +++ b/docs/docs/algorithms/value_optimization/dqn.md @@ -1,6 +1,8 @@ -> Action space: Discrete +# Deep Q Networks -[Paper](https://www.cs.toronto.edu/~vmnih/docs/dqn.pdf) +**Actions space:** Discrete + +**References:** [Playing Atari with Deep Reinforcement Learning](https://www.cs.toronto.edu/~vmnih/docs/dqn.pdf) ## Network Structure diff --git a/docs/docs/algorithms/value_optimization/dueling_dqn.md b/docs/docs/algorithms/value_optimization/dueling_dqn.md index 1aec923..0b0b15d 100644 --- a/docs/docs/algorithms/value_optimization/dueling_dqn.md +++ b/docs/docs/algorithms/value_optimization/dueling_dqn.md @@ -1,6 +1,8 @@ -> Action space: Discrete +# Dueling DQN -[Paper](https://arxiv.org/abs/1511.06581) +**Actions space:** Discrete + +**References:** [Dueling Network Architectures for Deep Reinforcement Learning](https://arxiv.org/abs/1511.06581) ## Network Structure diff --git a/docs/docs/algorithms/value_optimization/mmc.md b/docs/docs/algorithms/value_optimization/mmc.md index ba629ce..412f564 100644 --- a/docs/docs/algorithms/value_optimization/mmc.md +++ b/docs/docs/algorithms/value_optimization/mmc.md @@ -1,8 +1,8 @@ # Mixed Monte Carlo -> Action space: Discrete +**Actions space:** Discrete -[Paper](https://arxiv.org/abs/1703.01310) +**References:** [Count-Based Exploration with Neural Density Models](https://arxiv.org/abs/1703.01310) ## Network Structure diff --git a/docs/docs/algorithms/value_optimization/n_step.md b/docs/docs/algorithms/value_optimization/n_step.md index 2cd7f1c..4fa7bd2 100644 --- a/docs/docs/algorithms/value_optimization/n_step.md +++ b/docs/docs/algorithms/value_optimization/n_step.md @@ -1,6 +1,8 @@ -> Action space: Discrete +# N-Step Q Learning -[Paper](https://arxiv.org/abs/1602.01783) +**Actions space:** Discrete + +**References:** [Asynchronous Methods for Deep Reinforcement Learning](https://arxiv.org/abs/1602.01783) ## Network Structure diff --git a/docs/docs/algorithms/value_optimization/naf.md b/docs/docs/algorithms/value_optimization/naf.md index 3a06a87..8e0bffd 100644 --- a/docs/docs/algorithms/value_optimization/naf.md +++ b/docs/docs/algorithms/value_optimization/naf.md @@ -1,6 +1,8 @@ -> Action space: Continuous +# Normalized Advantage Functions -[Paper](https://arxiv.org/abs/1603.00748.pdf) +**Actions space:** Continuous + +**References:** [Continuous Deep Q-Learning with Model-based Acceleration](https://arxiv.org/abs/1603.00748.pdf) ## Network Structure diff --git a/docs/docs/algorithms/value_optimization/nec.md b/docs/docs/algorithms/value_optimization/nec.md index cecd679..87c4946 100644 --- a/docs/docs/algorithms/value_optimization/nec.md +++ b/docs/docs/algorithms/value_optimization/nec.md @@ -1,6 +1,8 @@ -> Action space: Discrete +# Neural Episodic Control -[Paper](https://arxiv.org/abs/1703.01988) +**Actions space:** Discrete + +**References:** [Neural Episodic Control](https://arxiv.org/abs/1703.01988) ## Network Structure diff --git a/docs/docs/algorithms/value_optimization/pal.md b/docs/docs/algorithms/value_optimization/pal.md index 58a8bdb..37be118 100644 --- a/docs/docs/algorithms/value_optimization/pal.md +++ b/docs/docs/algorithms/value_optimization/pal.md @@ -1,6 +1,8 @@ -> Action space: Discrete +# Persistent Advantage Learning -[Paper](https://arxiv.org/abs/1512.04860) +**Actions space:** Discrete + +**References:** [Increasing the Action Gap: New Operators for Reinforcement Learning](https://arxiv.org/abs/1512.04860) ## Network Structure diff --git a/docs/docs/extra.css b/docs/docs/extra.css new file mode 100644 index 0000000..83fa72c --- /dev/null +++ b/docs/docs/extra.css @@ -0,0 +1,3 @@ +.wy-side-nav-search { + background-color: #79a7a5; +} \ No newline at end of file diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index dc833ac..b775463 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -6,6 +6,7 @@ markdown_extensions: enable_dollar_delimiter: True #for use of inline $..$ extra_javascript: ['https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML'] +extra_css: [extra.css] pages: - Home : index.md