How to fix premature convergence in simple GA (Python)?
Yesterday i started exploring the genetic algorithms, and when i ended up with some basic theory, i tried to write simple GA on Python, that solves Diophantine equation. I'm new to Python and GAs, so please, don't judge my code strictly.
Problem
I cant get any result due to premature convergence (there is some no-return point (n-population), population[n] == population[n+i], where i is any integer. even the random mutatuion element cant change this, the generation is degradating very quickly)
GA is using crossover to breed, and weighted choice of parents.
- Q1: Is there any design mistakes in my code (below)?
- Q1.2: Do i need to add elitism?
- Q1.3: Do i need to change breed logic?
- Q2: Is there realy needed deep copy?
Code:
# -*- coding: utf-8 -*-
from random import randint
from copy import deepcopy
from math import floor
import random
class Organism:
#initiate
def __init__(self, alleles, fitness, likelihood):
self.alleles = alleles
self.fitness = fitness
self.likelihood = likelihood
self.result = 0
def __unicode__(self):
return '%s [%s - %s]' % (self.alleles, self.fitness, self.likelihood)
class CDiophantine:
def __init__(self, coefficients, result):
self.coefficients = coefficients
self.result = result
maxPopulation = 40
organisms = []
def GetGene (self,i):
return self.organisms[i]
def OrganismFitness (self,gene):
gene.result = 0
for i in range (0, len(self.coefficients)):
gene.result += self.coefficients[i]*gene.alleles[i]
gene.fitness = abs(gene.result - self.result)
return gene.fitness
def Fitness (self):
for organism in self.organisms:
organism.fitness = self.OrganismFitness(organism)
if organism.fitness == 0:
return organism
return None
def MultiplyFitness (self):
coefficientSum = 0
for organism in self.organisms:
coefficientSum += 1/float(organism.fitness)
return coefficientSum
def GenerateLikelihoods (self):
last = 0
multiplyFitness = self.MultiplyFitness()
for organism in self.organisms:
last = ((1/float(organism.fitness)/multiplyFitness)*100)
#print '1/%s/%s*100 - %s' % (organism.fitness, multiplyFitness, last)
organism.likelihood = last
def Breed (self, parentOne, 开发者_如何学PythonparentTwo):
crossover = randint (1,len(self.coefficients)-1)
child = deepcopy(parentOne)
initial = 0
final = len(parentOne.alleles) - 1
if randint (1,100) < 50:
father = parentOne
mother = parentTwo
else:
father = parentTwo
mother = parentOne
child.alleles = mother.alleles[:crossover] + father.alleles[crossover:]
if randint (1,100) < 5:
for i in range(initial,final):
child.alleles[i] = randint (0,self.result)
return child
def CreateNewOrganisms (self):
#generating new population
tempPopulation = []
for _ in self.organisms:
iterations = 0
father = deepcopy(self.organisms[0])
mother = deepcopy(self.organisms[1])
while father.alleles == mother.alleles:
father = self.WeightedChoice()
mother = self.WeightedChoice()
iterations+=1
if iterations > 35:
break
kid = self.Breed(father,mother)
tempPopulation.append(kid)
self.organisms = tempPopulation
def WeightedChoice (self):
list = []
for organism in self.organisms:
list.append((organism.likelihood,organism))
list = sorted((random.random() * x[0], x[1]) for x in list)
return list[-1][1]
def AverageFitness (self):
sum = 0
for organism in self.organisms:
sum += organism.fitness
return float(sum)/len(self.organisms)
def AverageLikelihoods (self):
sum = 0
for organism in self.organisms:
sum += organism.likelihood
return sum/len(self.organisms)
def Solve (self):
solution = None
for i in range(0,self.maxPopulation):
alleles = []
#
for j in range(0, len(self.coefficients)):
alleles.append(randint(0, self.result))
self.organisms.append(Organism(alleles,0,0))
solution = self.Fitness()
if solution:
return solution.alleles
iterations = 0
while not solution and iterations <3000:
self.GenerateLikelihoods()
self.CreateNewOrganisms()
solution = self.Fitness()
if solution:
print 'SOLUTION FOUND IN %s ITERATIONS' % iterations
return solution.alleles
iterations += 1
return -1
if __name__ == "__main__":
diophantine = CDiophantine ([1,2,3,4],30)
#cProfile.run('diophantine.Solve()')
print diophantine.Solve()
I tried to change breed and weighted random choice logic but with no results. This GA supposed to be work, i dont know, what's wrong. I know that there are some GA libraries on Python, i'm trying to understand them at the moment - it seems that they are quite complex to me. Sorry for mistakes, english is not my native language. Thank you for your understanding.
NECROUPDATE: Store chromosomes in Gray Code, not in integer.
Slight logic error: parentTwo is slightly more likely to be the father than the mother. Even odds would be randint (1,100) <= 50, not randint (1,100) < 50. Won't be what's causing the issue here.
- Your population size is fairly small. 40 is very few for most problems. That will cause it to converge quickly.
- Elitism will cause your population to converge faster, not slower.
- Your WeightedChoice function appears to be rather inefficient, if I'm reading it correctly. I haven't used Python recently enough to really understand what's going on there, but looking at it it certainly feels like something inefficient. If you can improve on that, it should speed up the processing so you can increase the population size (and, seeing as I'm figuring your algorithm there is probably at least O(n^2), that'll be really important).
With such a small population size, 200-300 generations is not surprising to solve the problem. If you increase the population, it should reduce the generations required.
Note: I found some old code that I wrote a few years ago for solving a similar problem. It's in C, and uses tournament selection, but perhaps it can give you a few ideas:
/*Diophantine equation solving genetic algorithm
Copyright (C) 2009- by Joel Rein
Licensed under the terms of the MIT License*/
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define POP 100
//number of variables to solve for
#define VAR 4
//maximum value for a) result and b) variables
#define MAX 100
#define MAX_GENS 500
//probability of crossover (otherwise just one parent will be used)
#define CROSSOVER 0.7
//probability of mutation (per gene)
#define MUTATION 0.4
//print out debug information each generation (recommended: if used, keep RUNS low)
#define DEBUG
//print result of each run individually
#define PRINT_RESULT
//how many times to run the GA
#define RUNS 1
int pop[POP][VAR], scores[POP], new[POP][VAR];
int coefficients[VAR];
int result=0;
int score(int index){
int sum=0;
for(int i=0;i<VAR;i++)
sum+=coefficients[i]*pop[index][i];
return abs(sum-result);
}
int tournament(int size){
int best=rand()%POP;
for(int i=1;i<size;i++){
int comp=rand()%POP;
if(scores[comp]<scores[best])
best=comp;
}
return best;
}
void breed(int target){
int a=tournament(3), b=tournament(3);
//copy a
for(int i=0;i<VAR;i++)
new[target][i]=pop[a][i];
//crossover
if((float)rand()/RAND_MAX<CROSSOVER){
int x=rand()%VAR;
for(int i=x;i<VAR;i++)
new[target][i]=pop[b][i];
}
//mutation
for(int i=0;i<VAR;i++)
if((float)rand()/RAND_MAX<MUTATION)
new[target][i]=rand()%(result*2)-result;
}
void debug(int gen, int best){
#ifdef DEBUG
printf("Gen: %3i Score: %3i --- ", gen, scores[best]);
int sum=0;
for(int i=0;i<VAR;i++){
sum+=coefficients[i]*pop[best][i];
printf("%3i*%3i+", coefficients[i], pop[best][i]);
}
printf("0= %3i (target: %i)\n", sum, result);
#endif
}
int ga(int run){
srand(time(NULL)+run);
//calculate a result for the equation.
//this mustn't be 0, else we get division-by-zero errors while initialising & mutating.
while(!result)
result=rand()%MAX;
for(int i=0;i<VAR;i++)
coefficients[i]=rand()%result;
//initialise population
for(int i=0;i<POP;i++)
for(int j=0;j<VAR;j++)
pop[i][j]=rand()%(result*2)-result;
//main loop
int gen, best;
for(gen=0;gen<MAX_GENS;gen++){
best=0;
//evaluate population
for(int i=0;i<POP;i++){
scores[i]=score(i);
if(scores[i]<scores[best])
best=i;
}
debug(gen, best);
if(scores[best]==0)
break;
//breed and replace
for(int i=0;i<POP;i++)
breed(i);
for(int i=0;i<POP;i++)
for(int j=0;j<VAR;j++)
pop[i][j]=new[i][j];
}
#ifdef PRINT_RESULT
printf("Terminated after %4i generations with a score of %3i\n", gen, scores[best]);
#else
printf(".");
#endif
return gen;
}
int main(){
int total=0;
for(int i=0;i<RUNS;i++)
total+=ga(i);
printf("\nAverage runtime: %i generations\n", total/RUNS);
}
精彩评论