The biggest open-source Python library for feature selection has been successfully completed and is now available on GitHub at https://github.com/thieu1995/mafese.
MAFESE is an improved and completed version of this module. It not only includes all meta-heuristic algorithms, but it also categorizes them into different feature selection methods. These methods include traditional techniques such as filter-based, wrapper-based, unsupervised-based, and embedded-based methods. Among them, metaheuristic-based feature selection belongs to the filter-based method.
- Mealpy (https://github.com/thieu1995/mealpy)
- Permetrics (https://github.com/thieu1995/permetrics)
- Scikit-learn (https://scikit-learn.org/stable/index.html)
- Pandas (https://pandas.pydata.org/)
- Matplotlib (https://matplotlib.org/)
pip install numpy, pandas, scikit-learn, matplotlib
pip install mealpy==2.5.0
pip install permetrics==1.3.0
conda create -n ml python==3.7.5
conda activate ml
conda install -c conda-forge numpy, pandas, scikit-learn, matplotlib
pip install mealpy==2.5.0
pip install permetrics==1.3.0
Run the file: src/models/mha_fs.py
Notes:
1. The format of solution / position
- Because we need to select the best features in dataset right?
So a solution is a 1-D vector, each dimension represent an index of column in dataset.
If it has value 1, meaning this column is selected for the model
If it has value 0, meaning this column is not selected for the model
2. The purposes of function: amend_position
- Because the algorithm create a real-value solution, right?
So we need this function to convert real-value back into integer value (0 and 1).
That is why the lower bound is 0 for all dimensions (floor of 0 is 0)
The upper bound is 1.99 for all dimensions (floor of 1.99 is 1)
Also if all the dimensions have value 0, meaning no column is selected --> Can't run the model
So we need to select a random column for the model
3. Fitness function
- Use the Evaluator object to build a classification model with the selected model (KNN, or Random forrest or SVM)
and calculate the metrics such as accuracy, precision, recall and f1.
- Set the obj_weights to 1 if you interested in selected metric
- In the current example, accuracy metric is selected, so the weight for accuracy is 1 and other metrics are 0.
Beside, this problem will be maximization problem (we want the maximum accuracy - 1)
4. Use different algorithm
- Replace the selected model in the section: ## 2. Define algorithm and trial
5. Setting
- Everything needs to config is located at: src/config.py
https://mealpy.readthedocs.io/en/latest/pages/general/guide_to_use_model.html#import-all-models
from mealpy.bio_based import BBO, EOA, IWO, SBO, SMA, TPO, VCS, WHO
from mealpy.evolutionary_based import CRO, DE, EP, ES, FPA, GA, MA
from mealpy.human_based import BRO, BSO, CA, CHIO, FBIO, GSKA, ICA, LCO, QSA, SARO, SSDO, TLO
from mealpy.math_based import AOA, CGO, GBO, HC, SCA
from mealpy.music_based import HS
from mealpy.physics_based import ArchOA, ASO, EFO, EO, HGSO, MVO, NRO, SA, TWO, WDO
from mealpy.probabilistic_based import CEM
from mealpy.system_based import AEO, GCO, WCA
from mealpy.swarm_based import ABC, ACOR, ALO, AO, BA, BeesA, BES, BFO, BSA, COA, CSA, CSO, DO, EHO, FA, FFA, FOA, GOA, GWO, HGS
from mealpy.swarm_based import HHO, JA, MFO, MRFO, MSA, NMRA, PFA, PSO, SFO, SHO, SLO, SRSR, SSA, SSO, SSpiderA, SSpiderO, WOA
model = BBO.OriginalBBO(problem, epoch=10, pop_size=50)
# model = BBO.BaseBBO(problem, epoch=10, pop_size=50)
# model = EOA.BaseEOA(problem, epoch=10, pop_size=50)
# model = IWO.OriginalIWO(problem, epoch=10, pop_size=50)
# model = SBO.BaseSBO(problem, epoch=10, pop_size=50)
# model = SBO.OriginalSBO(problem, epoch=10, pop_size=50)
# model = SMA.OriginalSMA(problem, epoch=10, pop_size=50)
# model = SMA.BaseSMA(problem, epoch=10, pop_size=50)
# model = VCS.OriginalVCS(problem, epoch=100, pop_size=50)
# model = VCS.BaseVCS(problem, epoch=10, pop_size=50)
# model = WHO.BaseWHO(problem, epoch=10, pop_size=50)
# model = CRO.BaseCRO(problem, epoch=10, pop_size=50)
# model = CRO.OCRO(problem, epoch=10, pop_size=50)
# model = DE.BaseDE(problem, epoch=10, pop_size=50)
# model = DE.JADE(problem, epoch=10, pop_size=50)
# model = DE.SADE(problem, epoch=10, pop_size=50)
# model = DE.SHADE(problem, epoch=10, pop_size=50)
# model = DE.L_SHADE(problem, epoch=10, pop_size=50)
# model = DE.SAP_DE(problem, epoch=10, pop_size=50)
# model = EP.BaseEP(problem, epoch=10, pop_size=50)
# model = EP.LevyEP(problem, epoch=10, pop_size=50)
# model = ES.BaseES(problem, epoch=10, pop_size=50)
# model = ES.LevyES(problem, epoch=10, pop_size=50)
# model = FPA.BaseFPA(problem, epoch=10, pop_size=50)
# model = GA.BaseGA(problem, epoch=10, pop_size=50)
# model = MA.BaseMA(problem, epoch=10, pop_size=50)
# model = BRO.OriginalBRO(problem, epoch=10, pop_size=50)
# model = BRO.BaseBRO(problem, epoch=10, pop_size=50, pr=0.03)
# model = BSO.BaseBSO(problem, epoch=10, pop_size=50)
# model = BSO.ImprovedBSO(problem, epoch=10, pop_size=50)
# model = CA.OriginalCA(problem, epoch=10, pop_size=50)
# model = CHIO.BaseCHIO(problem, epoch=10, pop_size=50)
# model = CHIO.OriginalCHIO(problem, epoch=10, pop_size=50, pr=0.03)
# model = GSKA.BaseGSKA(problem, epoch=10, pop_size=50)
# model = GSKA.OriginalGSKA(problem, epoch=10, pop_size=50)
# model = FBIO.OriginalFBIO(problem, epoch=10, pop_size=50)
# model = FBIO.BaseFBIO(problem, epoch=10, pop_size=50)
# model = ICA.BaseICA(problem, epoch=10, pop_size=50)
# model = LCO.BaseLCO(problem, epoch=10, pop_size=50)
# model = LCO.ImprovedLCO(problem, epoch=10, pop_size=50, pr=0.03)
# model = LCO.OriginalLCO(problem, epoch=10, pop_size=50)
# model = QSA.BaseQSA(problem, epoch=10, pop_size=50)
# model = QSA.OppoQSA(problem, epoch=10, pop_size=50)
# model = QSA.LevyQSA(problem, epoch=10, pop_size=50)
# model = QSA.ImprovedQSA(problem, epoch=10, pop_size=50, pr=0.03)
# model = QSA.OriginalQSA(problem, epoch=10, pop_size=50)
# model = SARO.BaseSARO(problem, epoch=10, pop_size=50)
# model = SARO.OriginalSARO(problem, epoch=10, pop_size=50)
# model = SSDO.BaseSSDO(problem, epoch=10, pop_size=50)
# model = TLO.BaseTLO(problem, epoch=10, pop_size=50)
# model = TLO.OriginalTLO(problem, epoch=10, pop_size=50)
# model = TLO.ITLO(problem, epoch=10, pop_size=50)
# model = AOA.OriginalAOA(problem, epoch=10, pop_size=50)
# model = CGO.OriginalCGO(problem, epoch=100, pop_size=50)
# model = GBO.OriginalGBO(problem, epoch=10, pop_size=50, pr=0.03)
# model = HC.OriginalHC(problem, epoch=10, pop_size=50)
# model = HC.BaseHC(problem, epoch=10, pop_size=50)
# model = SCA.OriginalSCA(problem, epoch=10, pop_size=50)
# model = SCA.BaseSCA(problem, epoch=10, pop_size=50)
# model = HS.OriginalHS(problem, epoch=10, pop_size=50)
# model = HS.BaseHS(problem, epoch=10, pop_size=50)
# model = ArchOA.OriginalArchOA(problem, epoch=10, pop_size=50)
# model = ASO.BaseASO(problem, epoch=10, pop_size=50)
# model = EFO.OriginalEFO(problem, epoch=10, pop_size=50)
# model = EFO.BaseEFO(problem, epoch=10, pop_size=50)
# model = EO.BaseEO(problem, epoch=10, pop_size=50)
# model = EO.ModifiedEO(problem, epoch=10, pop_size=50)
# model = EO.AdaptiveEO(problem, epoch=10, pop_size=50)
# model = HGSO.BaseHGSO(problem, epoch=10, pop_size=50)
# model = MVO.OriginalMVO(problem, epoch=10, pop_size=50)
# model = MVO.BaseMVO(problem, epoch=10, pop_size=50)
# model = NRO.BaseNRO(problem, epoch=10, pop_size=50)
# model = SA.BaseSA(problem, epoch=10, pop_size=50)
# model = TWO.BaseTWO(problem, epoch=10, pop_size=50)
# model = TWO.OppoTWO(problem, epoch=10, pop_size=50)
# model = TWO.LevyTWO(problem, epoch=10, pop_size=50)
# model = TWO.EnhancedTWO(problem, epoch=10, pop_size=50)
# model = WDO.BaseWDO(problem, epoch=10, pop_size=50)
# model = CEM.BaseCEM(problem, epoch=10, pop_size=50)
# model = AEO.OriginalAEO(problem, epoch=10, pop_size=50)
# model = AEO.ModifiedAEO(problem, epoch=10, pop_size=50)
# model = AEO.AdaptiveAEO(problem, epoch=10, pop_size=50)
# model = AEO.EnhancedAEO(problem, epoch=10, pop_size=50)
# model = AEO.IAEO(problem, epoch=10, pop_size=50)
# model = GCO.OriginalGCO(problem, epoch=10, pop_size=50)
# model = GCO.BaseGCO(problem, epoch=10, pop_size=50)
# model = WCA.BaseWCA(problem, epoch=10, pop_size=50)
# model = ABC.BaseABC(problem, epoch=10, pop_size=50)
# model = ACOR.BaseACOR(problem, epoch=10, pop_size=50)
# model = ALO.OriginalALO(problem, epoch=10, pop_size=50)
# model = ALO.BaseALO(problem, epoch=10, pop_size=50)
# model = AO.OriginalAO(problem, epoch=10, pop_size=50)
# model = BA.BaseBA(problem, epoch=10, pop_size=50)
# model = BA.OriginalBA(problem, epoch=10, pop_size=50)
# model = BA.ModifiedBA(problem, epoch=10, pop_size=50)
# model = BeesA.BaseBeesA(problem, epoch=10, pop_size=50)
# model = BeesA.ProbBeesA(problem, epoch=10, pop_size=50)
# model = BES.BaseBES(problem, epoch=10, pop_size=50)
# model = BFO.OriginalBFO(problem, epoch=10, pop_size=50)
# model = BFO.ABFO(problem, epoch=10, pop_size=50)
# model = BSA.BaseBSA(problem, epoch=10, pop_size=50)
# model = COA.BaseCOA(problem, epoch=10, pop_size=50)
# model = CSA.BaseCSA(problem, epoch=10, pop_size=50)
# model = CSO.BaseCSO(problem, epoch=10, pop_size=50)
# model = DO.BaseDO(problem, epoch=10, pop_size=50)
# model = EHO.BaseEHO(problem, epoch=10, pop_size=50)
# model = FA.BaseFA(problem, epoch=10, pop_size=50)
# model = FFA.BaseFFA(problem, epoch=10, pop_size=50)
# model = FOA.OriginalFOA(problem, epoch=10, pop_size=50)
# model = FOA.BaseFOA(problem, epoch=10, pop_size=50)
# model = FOA.WhaleFOA(problem, epoch=10, pop_size=50)
# model = GOA.BaseGOA(problem, epoch=10, pop_size=50)
# model = GWO.BaseGWO(problem, epoch=10, pop_size=50)
# model = GWO.RW_GWO(problem, epoch=10, pop_size=50)
# model = HGS.OriginalHGS(problem, epoch=10, pop_size=50)
# model = HHO.BaseHHO(problem, epoch=10, pop_size=50)
# model = JA.OriginalJA(problem, epoch=10, pop_size=50)
# model = JA.BaseJA(problem, epoch=10, pop_size=50)
# model = JA.LevyJA(problem, epoch=10, pop_size=50)
# model = MFO.OriginalMFO(problem, epoch=10, pop_size=50)
# model = MFO.BaseMFO(problem, epoch=10, pop_size=50)
# model = MRFO.BaseMRFO(problem, epoch=10, pop_size=50)
# model = MSA.BaseMSA(problem, epoch=10, pop_size=50)
# model = NMRA.BaseNMRA(problem, epoch=10, pop_size=50)
# model = NMRA.ImprovedNMRA(problem, epoch=10, pop_size=50)
# model = PFA.BasePFA(problem, epoch=10, pop_size=50)
# model = PSO.BasePSO(problem, epoch=10, pop_size=50)
# model = PSO.C_PSO(problem, epoch=10, pop_size=50)
# model = PSO.CL_PSO(problem, epoch=10, pop_size=50)
# model = PSO.PPSO(problem, epoch=10, pop_size=50)
# model = PSO.HPSO_TVAC(problem, epoch=10, pop_size=50)
# model = SFO.BaseSFO(problem, epoch=10, pop_size=50)
# model = SFO.ImprovedSFO(problem, epoch=10, pop_size=50)
# model = SHO.BaseSHO(problem, epoch=10, pop_size=50)
# model = SLO.BaseSLO(problem, epoch=10, pop_size=50)
# model = SLO.ModifiedSLO(problem, epoch=10, pop_size=50)
# model = SLO.ISLO(problem, epoch=10, pop_size=50)
# model = SRSR.BaseSRSR(problem, epoch=10, pop_size=50)
# model = SSA.OriginalSSA(problem, epoch=10, pop_size=50)
# model = SSA.BaseSSA(problem, epoch=10, pop_size=50)
# model = SSO.BaseSSO(problem, epoch=10, pop_size=50)
# model = SSpiderA.BaseSSpiderA(problem, epoch=10, pop_size=50)
# model = SSpiderO.BaseSSpiderO(problem, epoch=10, pop_size=50)
# model = WOA.BaseWOA(problem, epoch=10, pop_size=50)
# model = WOA.HI_WOA(problem, epoch=10, pop_size=50)
- If this code is useful for you, please give me some credits, link to my first-author papers.
@software{thieu_nguyen_2020_3711949,
author = {Nguyen Van Thieu},
title = {A collection of the state-of-the-art MEta-heuristics ALgorithms in PYthon: Mealpy},
month = march,
year = 2020,
publisher = {Zenodo},
doi = {10.5281/zenodo.3711948},
url = {https://doi.org/10.5281/zenodo.3711948}
}