昨天我开始探索遗传算法,在掌握一些基本理论后,我尝试用 Python 编写一个简单的遗传算法来解决丢番图方程。我是 Python 和遗传算法的新手,所以请不要太严格地评价我的代码。
问题
由于过早收敛,我无法获得任何结果(存在一些无法返回的点(n-population),population[n] == population[n+i],其中 i 是任何整数)。即使是随机变异元素也无法改变这一点,种群退化的速度非常快)
遗传算法使用交叉进行繁殖,并使用加权选择父代。
- Q1: 我的代码(如下)是否存在任何设计错误?
- Q1.2: 我是否需要添加精英主义?
- Q1.3: 我是否需要更改繁殖逻辑?
- Q2: 是否真的需要深拷贝?
代码:
# -*- coding: utf-8 -*-from random import randintfrom copy import deepcopyfrom math import floorimport randomclass Organism: #initiate def __init__(self, alleles, fitness, likelihood): self.alleles = alleles self.fitness = fitness self.likelihood = likelihood self.result = 0 def __unicode__(self): return '%s [%s - %s]' % (self.alleles, self.fitness, self.likelihood)class CDiophantine: def __init__(self, coefficients, result): self.coefficients = coefficients self.result = result maxPopulation = 40 organisms = [] def GetGene (self,i): return self.organisms[i] def OrganismFitness (self,gene): gene.result = 0 for i in range (0, len(self.coefficients)): gene.result += self.coefficients[i]*gene.alleles[i] gene.fitness = abs(gene.result - self.result) return gene.fitness def Fitness (self): for organism in self.organisms: organism.fitness = self.OrganismFitness(organism) if organism.fitness == 0: return organism return None def MultiplyFitness (self): coefficientSum = 0 for organism in self.organisms: coefficientSum += 1/float(organism.fitness) return coefficientSum def GenerateLikelihoods (self): last = 0 multiplyFitness = self.MultiplyFitness() for organism in self.organisms: last = ((1/float(organism.fitness)/multiplyFitness)*100) #print '1/%s/%s*100 - %s' % (organism.fitness, multiplyFitness, last) organism.likelihood = last def Breed (self, parentOne, parentTwo): crossover = randint (1,len(self.coefficients)-1) child = deepcopy(parentOne) initial = 0 final = len(parentOne.alleles) - 1 if randint (1,100) < 50: father = parentOne mother = parentTwo else: father = parentTwo mother = parentOne child.alleles = mother.alleles[:crossover] + father.alleles[crossover:] if randint (1,100) < 5: for i in range(initial,final): child.alleles[i] = randint (0,self.result) return child def CreateNewOrganisms (self): #generating new population tempPopulation = [] for _ in self.organisms: iterations = 0 father = deepcopy(self.organisms[0]) mother = deepcopy(self.organisms[1]) while father.alleles == mother.alleles: father = self.WeightedChoice() mother = self.WeightedChoice() iterations+=1 if iterations > 35: break kid = self.Breed(father,mother) tempPopulation.append(kid) self.organisms = tempPopulation def WeightedChoice (self): list = [] for organism in self.organisms: list.append((organism.likelihood,organism)) list = sorted((random.random() * x[0], x[1]) for x in list) return list[-1][1] def AverageFitness (self): sum = 0 for organism in self.organisms: sum += organism.fitness return float(sum)/len(self.organisms) def AverageLikelihoods (self): sum = 0 for organism in self.organisms: sum += organism.likelihood return sum/len(self.organisms) def Solve (self): solution = None for i in range(0,self.maxPopulation): alleles = [] # for j in range(0, len(self.coefficients)): alleles.append(randint(0, self.result)) self.organisms.append(Organism(alleles,0,0)) solution = self.Fitness() if solution: return solution.alleles iterations = 0 while not solution and iterations <3000: self.GenerateLikelihoods() self.CreateNewOrganisms() solution = self.Fitness() if solution: print 'SOLUTION FOUND IN %s ITERATIONS' % iterations return solution.alleles iterations += 1 return -1if __name__ == "__main__": diophantine = CDiophantine ([1,2,3,4],30) #cProfile.run('diophantine.Solve()') print diophantine.Solve()
我尝试更改繁殖和加权随机选择逻辑,但没有结果。这个遗传算法应该可以工作的,我不知道哪里出错了。我知道 Python 上有一些遗传算法库,我目前正在尝试理解它们 – 它们对我来说似乎相当复杂。对不起我的错误,英语不是我的母语。感谢您的理解。
情况更新:将染色体存储在格雷码中,而不是整数中。
回答:
轻微的逻辑错误:parentTwo 作为父亲的可能性略高于母亲。相等的几率应该是 randint (1,100) <= 50,而不是 randint (1,100) < 50。这不会是导致问题的原因。
- 你的种群规模相当小。 对于大多数问题来说,40 非常少。 这会导致它快速收敛。
- 精英主义会导致你的种群更快地收敛,而不是更慢。
- 如果我理解正确,你的 WeightedChoice 函数似乎效率很低。 我最近没有使用 Python,所以不太了解那里发生了什么,但从表面上看,它确实感觉效率不高。 如果你能改进它,它应该会加快处理速度,这样你就可以增加种群规模(而且,鉴于我认为你的算法可能至少是 O(n^2),这将非常重要)。
对于如此小的种群规模,200-300 代来解决问题并不奇怪。 如果你增加种群规模,应该会减少所需的代数。
注意:我找到了一些几年前写的用于解决类似问题的旧代码。 它是用 C 编写的,并使用锦标赛选择,但也许它可以给你一些想法:
/*Diophantine equation solving genetic algorithmCopyright (C) 2009- by Joel ReinLicensed under the terms of the MIT License*/#include <stdio.h>#include <stdlib.h>#include <time.h>#define POP 100//number of variables to solve for#define VAR 4//maximum value for a) result and b) variables#define MAX 100 #define MAX_GENS 500//probability of crossover (otherwise just one parent will be used)#define CROSSOVER 0.7//probability of mutation (per gene)#define MUTATION 0.4//print out debug information each generation (recommended: if used, keep RUNS low)#define DEBUG//print result of each run individually#define PRINT_RESULT//how many times to run the GA#define RUNS 1int pop[POP][VAR], scores[POP], new[POP][VAR];int coefficients[VAR];int result=0;int score(int index){ int sum=0; for(int i=0;i<VAR;i++) sum+=coefficients[i]*pop[index][i]; return abs(sum-result);}int tournament(int size){ int best=rand()%POP; for(int i=1;i<size;i++){ int comp=rand()%POP; if(scores[comp]<scores[best]) best=comp; } return best;}void breed(int target){ int a=tournament(3), b=tournament(3); //copy a for(int i=0;i<VAR;i++) new[target][i]=pop[a][i]; //crossover if((float)rand()/RAND_MAX<CROSSOVER){ int x=rand()%VAR; for(int i=x;i<VAR;i++) new[target][i]=pop[b][i]; } //mutation for(int i=0;i<VAR;i++) if((float)rand()/RAND_MAX<MUTATION) new[target][i]=rand()%(result*2)-result;}void debug(int gen, int best){#ifdef DEBUG printf("Gen: %3i Score: %3i --- ", gen, scores[best]); int sum=0; for(int i=0;i<VAR;i++){ sum+=coefficients[i]*pop[best][i]; printf("%3i*%3i+", coefficients[i], pop[best][i]); } printf("0= %3i (target: %i)\n", sum, result);#endif}int ga(int run){ srand(time(NULL)+run); //calculate a result for the equation. //this mustn't be 0, else we get division-by-zero errors while initialising & mutating. while(!result) result=rand()%MAX; for(int i=0;i<VAR;i++) coefficients[i]=rand()%result; //initialise population for(int i=0;i<POP;i++) for(int j=0;j<VAR;j++) pop[i][j]=rand()%(result*2)-result; //main loop int gen, best; for(gen=0;gen<MAX_GENS;gen++){ best=0; //evaluate population for(int i=0;i<POP;i++){ scores[i]=score(i); if(scores[i]<scores[best]) best=i; } debug(gen, best); if(scores[best]==0) break; //breed and replace for(int i=0;i<POP;i++) breed(i); for(int i=0;i<POP;i++) for(int j=0;j<VAR;j++) pop[i][j]=new[i][j]; }#ifdef PRINT_RESULT printf("Terminated after %4i generations with a score of %3i\n", gen, scores[best]); #else printf(".");#endif return gen;}int main(){ int total=0; for(int i=0;i<RUNS;i++) total+=ga(i); printf("\nAverage runtime: %i generations\n", total/RUNS);}