From 8a5f0b5ed5813b228755f6766b77210bfd31ff15 Mon Sep 17 00:00:00 2001 From: KOLANICH Date: Mon, 7 Jan 2019 16:47:28 +0300 Subject: [PATCH 1/2] Added .editorconfig according to PEP 8 --- .editorconfig | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 .editorconfig diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..1902680 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,7 @@ +root = true + +[*.py] +charset = utf-8 +indent_style = space +indent_size = 4 +insert_final_newline = false From 95530c4dc1da202a5727e88b2099613889a1f091 Mon Sep 17 00:00:00 2001 From: KOLANICH Date: Mon, 7 Jan 2019 17:16:37 +0300 Subject: [PATCH 2/2] Added distributions, adam and tests (surprisingly adam works worse than simple gradient descent) --- .gitlab-ci.yml | 92 ++++++++++++++++++++++++ evostra/algorithms/evolution_strategy.py | 76 +++++++++++++++++--- tests/tests.py | 42 +++++++++++ 3 files changed, 200 insertions(+), 10 deletions(-) create mode 100644 .gitlab-ci.yml create mode 100644 tests/tests.py diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 0000000..6ee89d6 --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,92 @@ +#image: pypy:latest +image: python:latest + +stages: + - dependencies + - build + - test + - tooling + +variables: + GIT_DEPTH: "1" + PYTHONUSERBASE: "${CI_PROJECT_DIR}/python_user_packages" + +dependencies: + tags: + - shared + stage: dependencies + before_script: + - export EXECUTABLE_DEPENDENCIES_DIR=${PYTHONUSERBASE}/bin + - export PATH="$PATH:$EXECUTABLE_DEPENDENCIES_DIR" # don't move into `variables` any of them, it is unordered + script: + - pip3 install --user --upgrade --pre setuptools setuptools_scm + - pip3 install --user --upgrade --pre git+https://github.com/pypa/pip.git git+https://github.com/pypa/wheel.git + - pip3 install --user --upgrade --pre coverage git+https://github.com/coveralls-clients/coveralls-python.git@eba54e4d19e40e3907e5fd516f68e8b4dc9e5a31 git+https://github.com/codecov/codecov-python.git@0743daa83647f12ff31b84d07113d2c24c27b924 + - pip3 install --upgrade --user --pre scikit-learn numpy scipy + + cache: + key: deps + paths: + - $PYTHONUSERBASE + +build: + tags: + - shared + stage: build + + before_script: + - export EXECUTABLE_DEPENDENCIES_DIR=${PYTHONUSERBASE}/bin + - export PATH="$PATH:$EXECUTABLE_DEPENDENCIES_DIR" # don't move into `variables` any of them, it is unordered + + script: + - python3 setup.py bdist_wheel + - mv ./dist/*.whl ./dist/evostra-0.CI-py3-none-any.whl + - pip3 install --user --upgrade --pre ./dist/evostra-0.CI-py3-none-any.whl + - coverage run --source=evostra ./tests/tests.py + - coverage report -m + - coveralls || true + - codecov || true + + cache: + key: deps + paths: + - $PYTHONUSERBASE + + artifacts: + paths: + - dist + +sast: + stage: tooling + tags: + - shared + image: docker:latest + variables: + DOCKER_DRIVER: overlay2 + allow_failure: true + services: + - docker:dind + script: + - docker run --env SAST_CONFIDENCE_LEVEL=5 --volume "$PWD:/code" --volume /var/run/docker.sock:/var/run/docker.sock "registry.gitlab.com/gitlab-org/security-products/sast:latest" /app/bin/run /code + artifacts: + paths: + - gl-sast-report.json + +pages: + stage: tooling + tags: + - shared + image: alpine:latest + allow_failure: true + before_script: + - apk update + - apk add doxygen + - apk add ttf-freefont graphviz + script: + - doxygen ./Doxyfile + - mv ./docs/html ./public + artifacts: + paths: + - public + only: + - master diff --git a/evostra/algorithms/evolution_strategy.py b/evostra/algorithms/evolution_strategy.py index d9df813..9d3179b 100644 --- a/evostra/algorithms/evolution_strategy.py +++ b/evostra/algorithms/evolution_strategy.py @@ -1,6 +1,8 @@ from __future__ import print_function import numpy as np +import scipy.stats as st import multiprocessing as mp +from collections.abc import Iterable np.random.seed(0) @@ -9,24 +11,66 @@ def worker_process(arg): get_reward_func, weights = arg return get_reward_func(weights) +class WeightUpdateStrategy: + __slots__ = ("learning_rate",) + def __init__(self, dim, learning_rate): + self.learning_rate = learning_rate + + +class strategies: + class GD(WeightUpdateStrategy): + def update(self, i, g): + return self.learning_rate * g + + + class Adam(WeightUpdateStrategy): + __slots__ = ("eps", "beta1", "beta2", "m", "v") + def __init__(self, dim, learning_rate, eps=1e-8, beta1=0.9, beta2=0.999): + super().__init__(dim, learning_rate) + self.eps = eps + self.beta1 = beta1 + self.beta2 = beta2 + self.m = np.zeros(dim) + self.v = np.zeros(dim) + + def update(self, i, g): + self.m[i] = self.beta1 * self.m[i] + (1-self.beta1) * g + self.v[i] = self.beta2 * self.v[i] + (1-self.beta2) * (g**2) + return self.learning_rate * np.sqrt(1-self.beta2) / (1-self.beta1) * self.m[i] / np.sqrt(np.sqrt(self.v[i])+self.eps) + class EvolutionStrategy(object): def __init__(self, weights, get_reward_func, population_size=50, sigma=0.1, learning_rate=0.03, decay=0.999, - num_threads=1): - + num_threads=1, limits=None, printer=None, distributions=None, strategy=None): + if limits is None: + limits = (np.inf, -np.inf) self.weights = weights + self.limits = limits self.get_reward = get_reward_func self.POPULATION_SIZE = population_size - self.SIGMA = sigma + if distributions is None: + distributions = st.norm(loc=0., scale=sigma) + if isinstance(distributions, Iterable): + distributions = list(distributions) + self.SIGMA = np.array([d.std() for d in distributions]) + else: + self.SIGMA = distributions.std() + + self.distributions = distributions self.learning_rate = learning_rate self.decay = decay self.num_threads = mp.cpu_count() if num_threads == -1 else num_threads + if printer is None: + printer = print + self.printer = printer + if strategy is None: + strategy = strategies.GD + self.strategy = strategy(len(weights), self.learning_rate) def _get_weights_try(self, w, p): weights_try = [] for index, i in enumerate(p): - jittered = self.SIGMA * i - weights_try.append(w[index] + jittered) + weights_try.append(w[index] + i) return weights_try def get_weights(self): @@ -36,8 +80,13 @@ def _get_population(self): population = [] for i in range(self.POPULATION_SIZE): x = [] - for w in self.weights: - x.append(np.random.randn(*w.shape)) + if isinstance(self.distributions, Iterable): + for j, w in enumerate(self.weights): + x.append(self.distributions[j].rvs(*w.shape)) + else: + for w in self.weights: + x.append(self.distributions.rvs(*w.shape)) + population.append(x) return population @@ -59,10 +108,17 @@ def _update_weights(self, rewards, population): if std == 0: return rewards = (rewards - rewards.mean()) / std + grad_factor = 1. / (self.POPULATION_SIZE * (self.SIGMA ** 2)) + for index, w in enumerate(self.weights): layer_population = np.array([p[index] for p in population]) - update_factor = self.learning_rate / (self.POPULATION_SIZE * self.SIGMA) - self.weights[index] = w + update_factor * np.dot(layer_population.T, rewards).T + corr = np.dot(layer_population.T, rewards).T + + if not isinstance(grad_factor, np.ndarray): + g = grad_factor * corr + else: + g = grad_factor[index] * corr + self.weights[index] = w + self.strategy.update(index, g) self.learning_rate *= self.decay def run(self, iterations, print_step=10): @@ -75,7 +131,7 @@ def run(self, iterations, print_step=10): self._update_weights(rewards, population) if (iteration + 1) % print_step == 0: - print('iter %d. reward: %f' % (iteration + 1, self.get_reward(self.weights))) + self.printer('iter %d. reward: %f' % (iteration + 1, self.get_reward(self.weights)), (self.weights if self.weights.shape[0] <= 10 else None) ) if pool is not None: pool.close() pool.join() diff --git a/tests/tests.py b/tests/tests.py new file mode 100644 index 0000000..82eb77a --- /dev/null +++ b/tests/tests.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python3 +import sys +from pathlib import Path +import unittest +thisDir=Path(__file__).parent.absolute() +sys.path.append(str(thisDir.parent)) + +import numpy as np +import scipy.stats as st +from evostra import EvolutionStrategy + +def modRosenbrockNP(X, a=1, b=100): + return np.sqrt(np.power(a-X[0], 4) + b*np.power(X[1]-np.power(X[0], 2), 2)) + +def ackleyRosenbrockNp(X, a=20, b=0.2, c=2*np.pi): + return np.real(a*(1-np.exp(-b*np.sqrt(modRosenbrockNP(X, a=0, b=a)/X.shape[0])))-np.exp(np.sum(np.cos(c*X), axis=0)/X.shape[0])+np.exp(1)) + + +bounds = np.array([[0, 10], [-10, 10]]) +initialPoint = np.array([10., 5.]) + +def get_reward(weights): + weights=np.array(weights) + #print(weights) + res = -ackleyRosenbrockNp(weights) + #print(res) + return res + + +class OptimizersTests(unittest.TestCase): + def testOptimizerSimple(self): + es = EvolutionStrategy(initialPoint, get_reward, population_size=50, sigma=0.5, learning_rate=0.1, decay=1., num_threads=1) + es.run(270, print_step=10) + + @unittest.skip + def testOptimizerDistributions(self): + es = EvolutionStrategy(initialPoint, get_reward, population_size=20, learning_rate=0.03, decay=1., num_threads=1, distributions=[st.norm(loc=0., scale=0.1), st.norm(loc=0., scale=0.2)]) + es.run(1000, print_step=1) + + +if __name__ == '__main__': + unittest.main()