@Article{Collobert, author = {R. Collobert and S. Bengio and Y. Bengio}, title = {A Parallel Mixture of SVMs for Very Large Scale Problems}, journal = {Neural Computation}, year = {2002}, OPTkey = {}, olume = {14}, OPTnumber = {}, pages = {1105-1114}, OPTmonth = {}, OPTnote = {}, OPTannote = {} } @InProceedings{Fan02, author = {W. Fan and F. Chu and H. Wang and P.S. Yu}, title = {Pruning and Dynamic Scheduling of Cost-sensitive Ensembles}, booktitle = {AAAI-02}, OPTcrossref = {}, OPTkey = {}, pages = {146-151}, year = {2002}, OPTeditor = {}, OPTvolume = {}, OPTnumber = {}, OPTseries = {}, OPTaddress = {}, OPTmonth = {}, OPTorganization = {}, OPTpublisher = {}, OPTnote = {}, OPTannote = {This paper discusses learners that produce probabilities. costs sensitive and skew are synonymous. We might say this is suggestive that ensembles can be utilized with skewed data sets. It has a good idea of quitting predicting from the ensemble when you are "sure" of the decision for speed. It shows that pruning can get you a better ensemble and better than the utilizing diversity though it is unclear what diversity is here.} } @InProceedings{gaselect, author = {K. Sirlantzis and S. Hoque and M.C. Fairhurst}, title = {Trainable Multiple Classifier Schemes for Handwritten Character Recognition}, booktitle = {Multiple Classifier Systems, Third International Workshop}, OPTcrossref = {}, OPTkey = {}, pages = {169-178}, year = {2002}, editor = {F. Roli and J. Kittler}, OPTvolume = {}, OPTnumber = {}, series = {Lecture Notes in Computer Science 2364}}, OPTaddress = {}, OPTmonth = {}, OPTorganization = {}, publisher = {Springer}, OPTnote = {}, OPTannote = {} } @InProceedings{gabunke, author = {S. Gunter and H. Bunke}, title = {Generating Classifier's Ensembles from Multiple Prototypes and its Application to Handwriting Recognition}, booktitle = {Multiple Classifier Systems, Third International Workshop}, OPTcrossref = {}, OPTkey = {}, pages = {179-188}, year = {2002}, editor = {F. Roli and J. Kittler}, OPTvolume = {}, OPTnumber = {}, series = {Lecture Notes in Computer Science 2364}}, OPTaddress = {}, OPTmonth = {}, OPTorganization = {}, publisher = {Springer}, OPTnote = {}, OPTannote = {} } @InProceedings{svmediet, author = {G. Valentini and T.G. Dietterich}, title = {Bias-Variance Analysis and Ensembles of SVM}, booktitle = {Multiple Classifier Systems, Third International Workshop}, OPTcrossref = {}, OPTkey = {}, pages = {222-231}, year = {2002}, editor = {F. Roli and J. Kittler}, OPTvolume = {}, OPTnumber = {}, series = {Lecture Notes in Computer Science 2364}}, OPTaddress = {}, OPTmonth = {}, OPTorganization = {}, publisher = {Springer}, OPTnote = {}, OPTannote = {} } @InProceedings{IVM, author = {J. Zhu and T. Hastie}, title = {Support Vector Machines, Kernel Logistic Regression and Boosting}, booktitle = {Multiple Classifier Systems, Third International Workshop}, OPTcrossref = {}, OPTkey = {}, pages = {16-26}, year = {2002}, editor = {F. Roli and J. Kittler}, OPTvolume = {}, OPTnumber = {}, series = {Lecture Notes in Computer Science 2364}}, OPTaddress = {}, OPTmonth = {}, OPTorganization = {}, publisher = {Springer}, OPTnote = {}, OPTannote = {} } @InProceedings{prov99, author = "Provost, F.J. and Jensen, D. and Oates, T.", title = {Efficient Progressive Sampling}, booktitle = {Proceedings of the Fifth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining}, pages = {23-32}, year = {1999}, } @Article{tec, author = {Hall L.O. and Ozyurt I.B. and Bezdek J.C.}, title = {Clustering with a Genetically Optimized Approach}, journal = {IEEE Transactions on Evolutionary Computation}, year = {1999}, OPTkey = {}, volume = {3}, number = {2}, pages = {103-112}, OPTmonth = {}, OPTnote = {}, OPTannote = {} } @Article{jair02, author = {Chawla N.V. and Bowyer K.W. and Hall L.O. and Kegelmeyer WP}, title = {SMOTE: Synthetic Minority Over-sampling TEchnique}, journal = {Journal of Artificial Intelligence Research}, year = {2002}, OPTkey = {}, volume = {16}, OPTnumber = {}, pages = {321-357}, OPTmonth = {}, OPTnote = {}, OPTannote = {} } @InProceedings{smote1, author = {Chawla N.V. and Bowyer K.W. and Hall L.O. and Kegelmeyer WP}, title = {SMOTE: A Synthetic Minority Oversampling Technique}, booktitle = {Knowledge Based Computer Systems}, OPTcrossref = {}, OPTkey = {}, OPTpages = {}, year = {2000}, OPTeditor = {}, OPTvolume = {}, OPTnumber = {}, OPTseries = {}, OPTaddress = {}, OPTmonth = {}, OPTorganization = {}, OPTpublisher = {}, OPTnote = {}, OPTannote = {} } @Book{goldberg, author = {Goldberg D.}, title = { Genetic Algorithms in Search, Optimization and Machine Learning}, publisher = {Addison-Wesley}, year = {1989}, OPTkey = {}, OPTvolume = {}, OPTnumber = {}, OPTseries = {}, OPTaddress = {}, OPTedition = {}, OPTmonth = {}, OPTnote = {}, OPTannote = {} } @Article{Nigam, author = {Nigam K. and McCallum A. and Thrun S. and Mitchell T.}, title = {Text classification from labeled and unlabeled documents using EM}, journal = {Machine Learning}, year = {2000}, OPTkey = {}, volume = {39}, number = {2-3}, pages = {103-134}, OPTmonth = {}, OPTnote = {}, OPTannote = {} } @Article{text1, author = {Larsen J.A. and Hansen L.K. and Have A.S. and Christiansen T. and Kolenda T.}, title = {Webmining: learning from the World Wide Web }, journal = {Computational Statistics \& Data Analysis}, year = {2002}, OPTkey = {}, volume = {38}, number = {4}, pages = {517-532}, OPTmonth = {}, OPTnote = {}, OPTannote = {} } @Article{Jones, author = {D.T. Jones}, title = {Protein Secondary Structure Prediction Based on Decision-specific Scoring Matrices}, journal = {Journal of Molecular Biology}, year = {1999}, volume = {292}, pages = {195-202} } @Article{Credit, author = {Prodromidis A.L. and Stolfo S.J.}, title = { Cost complexity-based pruning of ensemble classifiers}, journal = {Knowledge and Information Systems}, year = {2001}, volume = {3}, number = {4}, pages = {449-469}, OPTmonth = {}, OPTnote = {}, OPTannote = {} } @Article{SVMtext, author = {E. Leopold and J. Kindermann}, title = {Text Categorization with Support Vector Machines. How to Represent Texts in Input Space}, journal = {Machine Learning}, year = {2002}, OPTkey = {}, volume = {46}, number = {1-3}, pages = {423-444}, OPTmonth = {}, OPTnote = {}, OPTannote = {} } @article{Heath, author = {M. Heath and K. Bowyer and D. Kopans and P. Kegelmeyer and R. Moore and and K. Chang and S. Munishkumaran}, journal = {International Workshop on Digital Mammography, Nijmegen, The Netherlands}, pages = {457 -- 460}, title = {Current status of the Digital Database for Screening Mammo graphy}, OPTannote = {DDSM reference}, year = {1998} } @article{TParr98-2, author = {T.C. Parr and R. Zwiggelaar and S.M. Astley and C.R.M. Bo ggis and C.J. Taylor}, journal = {International Workshop on Digital Mammography, Nijmegen, The Netherlands.}, pages = {71 -- 78}, title = {Comparison methods for combining evidence for spiculated l esions}, year = {1998} } @InProceedings{kdd01, author = {Chawla N. and Moore, Jr. T.E. and K.W. Bowyer and L.O. Hall and C. Springer and W.P. Kegelmeyer}, title = {Bagging-Like Effects for Decision Trees and Neural Nets in Protein Secondary Structure Prediction }, booktitle = {BioKDD01 Workshop on DataMining in Bioinformatics}, OPTkey = {}, pages = {50-59}, year = {2001}, OPTeditor = {}, OPTvolume = {}, OPTnumber = {}, OPTseries = {}, OPTaddress = {}, OPTmonth = {}, OPTorganization = {}, OPTpublisher = {}, note = {KDD Conference}, OPTannote = {} } @Book{SVM, author = {V. Vapnik}, ALTeditor = {}, title = {Statistical Learning Theory}, publisher = {Wiley}, year = {1998}, OPTkey = {}, OPTvolume = {}, OPTnumber = {}, OPTseries = {}, address = {N.Y., N.Y.}, OPTedition = {}, OPTmonth = {}, OPTnote = {}, OPTannote = {} } @InCollection{SVMlight, author = {T. Joachims}, title = {Making large-Scale SVM Learning Practical}, booktitle = {Advances in Kernel Methods - Support Vector Learning}, OPTcrossref = {}, OPTkey = {}, OPTpages = {}, publisher = {MIT-Press}, year = {1999}, editor = {B. Scholkopf and C. Burges and A. Smola}, OPTvolume = {}, OPTnumber = {}, OPTseries = {}, OPTtype = {}, OPTchapter = {}, OPTaddress = {}, OPTedition = {}, OPTmonth = {}, OPTnote = {}, OPTannote = {} } @TechReport{Avatar, author = {W.P. Kegelmeyer}, title = {AVATAR}, institution = {Sandia National Labs}, year = {1998}, address = {http://www.ca.sandia.gov/avatar} } @Article{LogTree, author = {C. Perlich and F. Provost and J. Simonoff}, title = {Tree Induction vs. Logistic Regression: A Learning-curve Analysis}, journal = {Journal of Machine Learning Research}, year = {2002}, OPTkey = {}, OPTvolume = {}, OPTnumber = {}, OPTpages = {}, OPTmonth = {}, note = {To appear}, OPTannote = {This paper is showing a comparison of logistic regression and tree induction. It shows that tree induction does better for very large training data sets or even large training data sets. It also shows that classifier accuracy tends to increase even with very large training data sets, as more examples are given to the classifier. That is, if you have 2 million examples until the classifier with one million, 1,500,000, and 2 million the classifier built on 2 million examples will be the most accurate. They show this with learning curves. This is contrary to some previous work even by the same author. } } @InProceedings{Sea, author = {W. N. Street and Y. Kim}, title = { A Streaming Ensemble Algorithm ({SEA}) for Large -Scale Classification}, booktitle = {KDD'01}, OPTcrossref = {}, OPTkey = {}, pages = {377-382}, year = {2001}, editor = {F. Provost and R. Srikant}, OPTvolume = {}, OPTnumber = {}, OPTseries = {}, OPTaddress = {}, OPTmonth = {}, OPTorganization = {}, OPTpublisher = {}, note = {San Francisco, CA}, OPTannote = {} } @InProceedings{svmens, author = {T. Evgeniou and L. Perez-Breva and M. Pontil and T. Poggio}, title = {Bounds on the Generalization Performance of Kernel Machine Ensembles}, booktitle = {Proceedings of Seventeenth International Conference on Machine Learning, Stanford University}, OPTcrossref = {}, OPTkey = {}, OPTpages = {}, year = {2000}, OPTeditor = {}, OPTvolume = {}, OPTnumber = {}, OPTseries = {}, OPTaddress = {}, OPTmonth = {}, OPTorganization = {}, OPTpublisher = {}, OPTnote = {}, OPTannote = {} } @InProceedings{mamodb, author = {Heath, M. and Bowyer, K. and Kopans, D. and Kegelmeyer, P. and Moore, R. and Chang, K. and Munishkumaran, S.}, title = {Current status of the Digital Database for Screening Mammography}, booktitle = {Digital Mammography}, series = {Proceedings of the Fourth International Workshop on Digital Mammography}, year = {1998}, annote = {Nijmegen, The Netherlands} } @Article{Vis-ed, author = {Berry, M.W.}, title = {Guest Editor's Introduction: Massive Data Visualization}, journal = {Computing in Science and Engineering}, year = {1999}, volume = {1}, number = {4}, pages = {16-17} } @Article{Vis-Asci, author = {Heermann, P.D.}, title = {First-Generation ASCI Production Visualization Environment}, journal = {IEEE Computer Graphics and Applications}, year = {1999}, pages = {66-71}, month = {Sept/Oct} } @Article{Vis-data-handle, author = {Norman, M.L. and Shalf, J. and Levy, S. and Daues, G.}, title = {Diving Deep: Data-Management and Visualization Strategies for Adaptive Mesh Refinement Simulations}, journal = {Computing in Science and Engineering}, year = {1999}, volume = {1}, number = {4}, pages = {36-47} } @Article{Vis-scale, author = {Nakano, A. and Kalia, R.K. and Vashishta, P.}, title = {Scalable Molecular-Dynamics, Visualization, and Data-Management Algorithms for Materials Simulations}, journal = {Computing in Science and Engineering}, year = {1999}, volume = {1}, number = {5}, pages = {39-47} } @Manual{Blue, title = {Using ASCI Blue-Pacific}, organization = {Lawrence Livermore National Laboratory}, address = {http://www.llnl.gov/asci/platforms/bluepac/}, year = {1999} } @Article{Marefat, author = {Marefat, M.M. and Varecka, A.F. and Yost, J.}, title = {An Intelligent Visualization Agent for Simulation-Based Decision Support}, journal = {IEEE Computational Science and Engineering}, year = {1997}, volume = {4}, number = {3}, pages = {77-82} } @Book{parprog, author = {Ian Foster}, title = {Designing and Building Parallel Programs}, publisher = {Addison-Wesley}, year = {1995}, OPTannote = {Fairly nice on-line version available at:http://www-unix.mcs.anl.gov/dbpp/ } } @InProceedings{NCAR, author = {Don Middleton}, title = {Visualizing Terabyte Earth Systems Siumulation Datasets: The Challenges Ahead}, booktitle = {NSF/DOE Workshop on Large-Scale Visualization and Data Management}, year = {1999}, address = {Salt Lake City, Utah}, month = {May} } @Article{ROC, author = {Woods, K.S. and Bowyer, K.W.}, title = {Generating {ROC} curves for artificial neural networks}, journal = {IEEE Transactions on Medical Imaging}, year = {1997}, volume = {16}, number = {3}, pages = {329-337}, month = {June} } @TechReport{internal, author = {N. Chawla and L.O. Hall}, title = {Modifying {MUSTAFA} to capture salient data}, institution = {University of South Florida}, year = {1999}, number = {ISL-99-01}, address = {Computer Science and Eng. Dept.} } @Article{Woods, author = {Woods, K. and Kegelmeyer, W.P. and Bowyer, K.W.}, title = {Combination of multiple classifiers using local accuracy estimates}, journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence}, volume = {19}, number = {4}, pages = {405-410}, year = {1997}, month = {April} } @Article{Woods-comp, author = {Woods, K.S. and Doss, C.C. and Bowyer, K.W. and Solka, J.L. and Priebe, C.E. and Kegelmeyer, W.P.}, title = {Comparative evaluation of pattern recognition techniques for detection of microcalcifications in mammography}, journal = {International Journal of Pattern Recognition and Artificial Intelligence}, year = {1993}, volume = {7}, number = {6}, pages = {1417-1436}, month = {December} } %Parallel Programming with MPI, Peter Pacheco, Morgan Kaufmann. @Article{Gropp:1996:HPI, author = "W. Gropp and E. Lusk and N. Doss and A. Skjellum", title = "A high-performance, portable implementation of the {MPI} message passing interface standard", journal = "Parallel Computing", volume = "22", number = "6", pages = "789--828", month = sep, year = "1996" } @Manual{MPICH, title = {User's Guide for {\tt mpich}, a Portable Implementation of {MPI}}, author = {William D. Gropp and Ewing Lusk}, note = "ANL-96/6", organization = {Mathematics and Computer Science Division, Argonne National Laboratory}, year = 1996 } @Manual{ASCI, title = {ASCI Red Users Manual}, organization = {Sandia National Labs}, address = {http://www.sandia.gov/ASCI/Red/UserGuide.htm}, year = {1997} } @InProceedings{Michalski, author = {Michalski, R.S. and Mozetic, I. and Hong, J. and Lavrac, H.}, title = {The multi-purpose incremental learning system {AQ15} and its testing application to three medical domains}, booktitle = {Proceedings of the Fifth National Conference on AI}, pages = {1041-1045}, year = {1986} } @Book{Cart, author = {L. Breiman and J.H. Friedman and R.A. Olshen and P.J. Stone}, title = {Classification and Regression Trees}, publisher = {Wadsworth International Group}, year = {1984}, address = {Belmont, CA.} } @InProceedings{Chan, author = {P. Chan and S. Stolfo}, title = {Sharing learned models among remote database partitions by local meta-learning}, booktitle = { Proceedings Second International Conference on Knowledge Discovery and Data Mining}, pages = {2-7}, year = {1996} } @Article{Chan2, author = {P. Chan and S. Stolfo}, title = {On the accuracy of Meta-Learning for Scalable Data Mining}, journal = {Journal of Intelligent Information Systems}, year = {1997}, volume = {8}, pages = {5-28} } @InProceedings{dom01, author = {Hulten G. and and Spencer, L. and Domingos P.}, title = {Mining Time-Changing Data Streams}, booktitle = {Proceedings of the Seventh ACM SIGKDD International Conference on Knowledge Discovery And Data Mining}, OPTcrossref = {}, OPTkey = {}, pages = {97-106}, year = {2001}, editor = {Provost, F and Ramakrishnan, S}, OPTvolume = {}, OPTnumber = {}, OPTseries = {}, OPTaddress = {}, OPTmonth = {}, OPTorganization = {}, OPTpublisher = {}, OPTnote = {}, OPTannote = {} } @InProceedings{VFDT, author = {Domingos P. and Hulten G.}, title = {Mining high-speed data streams,}, booktitle = {Proceedings. KDD-2000.}, OPTcrossref = {}, OPTkey = {}, pages = {71-80}, year = {2000}, OPTeditor = {}, OPTvolume = {}, OPTnumber = {}, OPTseries = {}, OPTaddress = {}, OPTmonth = {}, OPTorganization = {}, OPTpublisher = {}, OPTnote = {}, OPTannote = {} } @InProceedings{Domingos, author = {P. Domingos}, title = {Knowledge Acquisition from Examples via Multiple Models}, booktitle = {International Confernce on Machine Learning}, year = {1997} } @Article{Iris, author = {R.A. Fisher}, title = {The use of multiple measurements in taxonomic problems}, journal = {Ann. Eugenics}, year = {1936}, volume = {7}, OPTpages = {179-188} } @InProceedings{usfkdd99, author = {Hall L.O. and Chawla N. and Bowyer K.W. and Kegelmeyer W.P. }, title = {Learning Rules from Distributed Data}, booktitle = {Proceedings of the Workshop on Large-Scale Parallel KDD Systems}, publisher= {Technical Report 99-8, Rensselaer Polytechnic Institute, CS Dept.}, editor = {Zaki M.J. and Ching-Tien H.}, year = {1999}, organization = {KDD99}, pages = {77-83} } @InProceedings{Utgoff, author = {Piater J.H. and Riseman E.M. and Utgoff P.E.}, title = {Interactively Training Pixel Classifiers}, booktitle = {Proceedings of the Eleventh International Florida Artificial Intelligence Research Symposium Conference}, OPTcrossref = {}, OPTkey = {}, pages = {57-61}, year = {1998}, editor = {Cook, D.C.} } @InProceedings{smc98, author = {L.O. Hall and N. Chawla and K.W. Bowyer}, title = {Decision Tree Learning on Very Large Data Sets}, booktitle = {International Conference on Systems, Man and Cybernetics}, month={Oct}, pages = {2579-2584}, year = {1998} } @InProceedings{Kufrin, author = {R. Kufrin}, title = {Generating {C4.5} Production Rules In Parallel}, booktitle = {Proceedings of the Fourteenth National Conference on Artificial Intelligence (AAAI-97)}, pages = {565-570}, year = {1997}, month = {July} } @Book{Mitchell, author = {T.M. Mitchell}, title = {Machine Learning}, publisher = {McGraw-Hill}, year = {1997}, address = {N.Y.} } @Manual{Irvine, title = {UCI Repository of Machine Learning Databases}, author = {C.J. Merz and P.M. Murphy}, organization = {Univ. of CA., Dept. of CIS}, address = { Irvine, CA.}, note = { http://www.ics.uci.edu/\~{}mlearn/MLRepository.html} } @InProceedings{Provost1, author = {F.J. Provost and D. Hennessy}, title = {Distributed Machine Learning: Scaling up with coarse-grained parallelism}, booktitle = {Proceedings of the Second International Conference on Intelligent Systems for Molecular Biology}, year = {1994} } @InProceedings{cheng90, author = {Cheng, J. and Fayyad, U.M. and Irani, K.B. and Qian, Z.}, title = {Applications of Machine Learning Techniques in Semiconductor Manufacturing}, booktitle = {Proceedings of the SPIE Conference on Aplications of Artificial Intelligence VIII}, pages = {956-965}, year = {1990}, address = {Orlando, Fl.} } @InProceedings{Buchanan, author = {Clearwater, S. and Cheng, T. and Hirsh, H. and Buchanan, B.}, title = {Incremental Batch Learning}, booktitle = {Proceedings of the sixth Int. Workshop on Machine Learning}, pages = {366-370}, year = {1989} } @InProceedings{cascade, author = {Fahlman, S. E. and Lebiere, C.}, title = {The Cascade-Correlation Learning Architecture}, booktitle = {Advances in Neural Information Processing Systems 2}, publisher = "Morgan Kaufmann", year = "1990" } @InProceedings{fuzz02, author = {Keswani G. and Hall L.O.}, title = {Text Classification with Enhanced Semi-Supervised Fuzzy Clustering}, booktitle = {Proceedings of the IEEE International Conference on Fuzzy Systems}, OPTcrossref = {}, OPTkey = {}, OPTpages = {}, year = {2002}, OPTeditor = {}, OPTvolume = {}, OPTnumber = {}, OPTseries = {}, OPTaddress = {}, OPTmonth = {}, OPTorganization = {}, OPTpublisher = {}, OPTnote = {}, OPTannote = {} } @InProceedings{Cohen, author = {W.W. Cohen}, title = {Fast Effective Rule Induction}, booktitle = {Proceedings of the 12th Conference on Machine Learning}, year = {1995} } @InProceedings{Provost, author = {F.J. Provost and D.N. Hennessy}, title = {Scaling Up: Distributed Machine Learning with Cooperation}, booktitle = {Proceedings of AAAI'96}, pages = {74-79}, year = {1996} } @Article{Quinlan87a, author = {J.R. Quinlan}, title = {Simplifying Decision Trees}, journal = {International Journal of Man-Machine Studies}, year = {1987}, volume = {27}, pages = {227-248} } @InProceedings{Quinlan87, author = {J.R. Quinlan}, title = {Generating Production Rules from decision trees}, booktitle = {Proceedings of IJCAI-87}, year ={1987}, pages = {304-307} } @Book{C4.5, author = {J.R. Quinlan}, title = {{C4.5:} Programs for Machine Learning}, publisher = {Morgan Kaufmann}, year = 1992, note = {San Mateo, CA.} } @Article{Quinlan, author = {J.R. Quinlan}, title = {Improved Use of Continuous Attributes in {C4.5}}, journal = {Journal of Artificial Intelligence Research}, year = {1996}, volume = {4}, pages = {77-90} } @Article{Weiss, author = {Weiss, S.M. and Galen, R.S. and Tadepalli, P.V.}, title = {Maximizing the Predictive Value of Production Rules}, journal = {Artificial Intelligence}, year = {1990}, volume = {45}, pages = {47-71} } @InProceedings{Clark91, author = {Clark, P. and Boswell, R.}, title = {Rule Induction with CN2: Some recent improvements}, booktitle = {Proceedings of the {Fifth} European Working Session on Learning}, pages = {151-163}, year = {1991} } @InProceedings{Segal94, author = {Segal, R. and Etzioni, O.}, title = {Learning Decision Lists using Homogeneous Rules}, booktitle = {Proceedings of the AAAI-94}, pages = {619-624}, year = {1994} } @InProceedings{Quinlan95, author = {Quinlan, J.R. and Cameron-Jones, R.M.}, title = {Oversearching and Layered Search in Empirical Learning}, booktitle = {Proceedings of the IJCAI-95}, pages = {1019-1024}, year = {1995} } @Article{Mingersb, author = {J. Mingers}, title = {An Empirical Comparison of pruning methods for decision tree induction}, journal = {Machine Learning}, year = {1989}, volume = {4}, number = {2}, pages = {227-243} } @InProceedings{CVPR2001, author = {N.~Chawla and Moore, Jr., T.E. and K.W.~Bowyer and L.O.~Hall and C.~Springer and W.P.~Kegelmeyer}, title = {Bagging Is A Small-Data-Set Phenomenon}, booktitle = {IEEE Conference on Computer Vision and Pattern Recognition}, OPTcrossref = {}, OPTkey = {}, pages = {684-689}, year = {2001}, OPTeditor = {}, OPTvolume = {}, OPTnumber = {}, OPTseries = {}, OPTaddress = {}, OPTmonth = {}, OPTorganization = {}, OPTpublisher = {}, OPTnote = {}, OPTannote = {} } @InProceedings{smc2000, author = {Bowyer K.W. and Chawla N.V. and Moore, Jr., T.E. and Hall L.O. and Kegelmeyer W.P.}, title = { A Parallel Decision Tree Builder for Mining Very Large Visualization DataSets}, booktitle = { IEEE Systems, Man, and Cybernetics Conference}, OPTcrossref = {}, OPTkey = {}, pages = {1888-1893}, year = { 2000}, OPTeditor = {}, OPTvolume = {}, OPTnumber = {}, OPTseries = {}, OPTaddress = {}, OPTmonth = {}, OPTorganization = {}, OPTpublisher = {}, OPTnote = {}, OPTannote = {} } @InProceedings{Scalparcp, author = { Mahesh V. Joshi and George Karypis and Vipin Kumar }, title = {ScalParC: A new scalable and efficient parallel classification algorithm for mining large datasets}, booktitle = {Proceedings of the International Parallel Processing Symposium}, pages = {573-579}, year = {1998} } @InProceedings{Sprint, author = {John Shafer and Rakesh Agrawal and Manish Mehta}, title = {Sprint: A Scalable Parallel Classifier for Data Mining}, booktitle = {Proceedings of the 22nd VLDB Conference}, year = {1996} } @TechReport{Scalparc, author = {M. V. Joshi and G. Karypis and V. Kumar}, title = {ScalParC: A New Scalable and Efficient Parallel Classification Algorithm for Mining Large Datasets}, institution = {University of Minnesota}, year = {1998}, annote = {Dept. of CS, http://www.cs.umn.edu/\~{}kumar} } @Article{Quinlan90, author = {J.R. Quinlan}, title = {Learning Logical Definitions from Relations}, journal = {Machine Learning}, year = {1990}, volume = {5}, number = {3} } @InProceedings{QuinlanJones93, author = {Quinlan, J.R. and Cameron-Jones, R.M.}, title = {FOIL: A midterm report}, booktitle = {Machine Learning: ECML-93}, year = {1993}, editor = {Pavel B. Brazdil}, series = {Springer-Verlag Lecture Notes in Computer Science \#667} } @InProceedings{Provost90, author = {Clearwater, S. and Provost, F.}, title = {{RL4:} A Tool for Knowledge-Based Induction}, booktitle = {Proceedings of the 2nd International IEEE Conference on Tools for AI}, pages = {24-30}, year = {1990} } @Article{Provost-massive, author = {F.J. Provost and J. Aronis}, title = {Scaling Up Inductive Learning with Massive Parallelism}, journal = {Machine Learning}, year = {1996}, number = {23} } @InProceedings{SKICAT, author ={ U. Fayyad and et. al.}, title = {SKICAT: A Machine Learning System for Automated Cataloging of Large Scale Sky Surveys}, booktitle = {Proceedings of the tenth International Conference on Machine Learning}, publisher = {Morgan Kaufmann}, year ={1993} } @InProceedings{kdd97, author = { S.J. Stolfo and A.L. Prodromidis and S. Tselepis and W. Lee and D. Fan and P.K. Chan}, title = {JAM: Java Agents for Meta-learning over Distributed Databases}, booktitle = {Proc. KDD-97}, year = {1997} } @InProceedings{intru99, author = {W. Lee and S.J. Stolfo and K. Mok}, title = {A Data Mining Framework for Building Intrusion Detection Models}, booktitle = {Proc. 1999 IEEE Symposium on Security and Privacy}, year = {1999} } @InProceedings{Pruning, author = {A. L. Prodromidis and S. J. Stolfo and P. K. Chan}, title = {Pruning Classifiers in a Distributed Meta-Learning System}, booktitle = {Proc. of First National Conference on New Information Technologies}, pages = {151-160}, year = {1998} } @InProceedings{Obr01, author = {Lazarevic A. and Obradovic Z.}, title = {The distributed boosting algorithm}, booktitle = {Seventh ACM SIGKDD International Conference on Knowledge Discovery and Data Mining}, OPTcrossref = {}, OPTkey = {}, OPTpages = {}, year = {2000}, OPTeditor = {}, OPTvolume = {}, OPTnumber = {}, OPTseries = {}, OPTaddress = {}, OPTmonth = {}, OPTorganization = {}, OPTpublisher = {}, OPTnote = {}, OPTannote = {} } @InProceedings{Scalable, author = {P. K. Chan and S. J. Stolfo}, title = {Toward Scalable Learning with Non-uniform Class and Cost Distributions: A Case Study in Credit Card Fraud Detection}, booktitle = {Proc. KDD-98}, year = {1998} } @Article{OC1, author = {Murthy, S.K. and Kasif, S. and Salzberg, S.}, title = {A system for induction of Oblique decision trees}, journal = {Journal of Artificial Intelligence Research}, year = {1994}, volume = {2}, pages = {1-33} } @Article{Agrawal, author = {R. Agrawal and T. Imielinski and A. Swami}, title = {Database mining: A performance perspective}, journal = {IEEE Transactions on Knowledge and Data Engineering}, year = {1993}, volume = {5}, number = {6}, pages = {914-925} } @Article{Breiman99, author = {L. Breiman}, title = {Pasting Bites Together for Prediction in Large Data Sets}, journal = {Machine Learning}, year = {1999}, volume = {36}, number = {1,2}, pages = {85-103}, OPTmonth = {}, OPTnote = {}, OPTannote = {Concentrates on dealing with data sets which are large where large is defined as too big for main memory on a modern workstation. Looks at datasets of up to 50,000 examples. Uses CART as its classification tool. Builds K trees or predictors and aggregates them. The aggregation is done by taking a vote. The key focus here is on how to create the training data sets for the predictors and how to decide when to stop adding predictors. Essentially, a sequential build of K on N examples which are a subset of the total number of examples. For each K two ways of choosing N are explored. Bagging where the N examples are chosen randomly from the full set is one, but it does not provide as good performance as arcing. Arcing puts examples in a train set for K that have not been correctly classified by the current classifier plus other classified examples chosen with a calculated probability. Arcing is related to boosting. The arcing approach together with a well-chosen error estimate leads to performance that is quite comparable to training on all the data when N is "large enough". No tree pruning is done. An interesting question for future work is can the K trees be stored in a more compact form. I take this to mean a single tree or less trees or a set of rule or a few sets of rules. Also asks question about deleting unneeded trees or not adding them on. So, this approach looks at the whole training data set when creating each training subset. The subsets are not likely to be disjoint. They also concern themselves with which examples have been used before in training which may be painful to keep track of for really large train datasets.} } @Article{Merz, author = {C. Merz}, title = {Using Correspondence Analysis to Combine Classifiers}, journal = {Machine Learning}, year = {1999}, volume = {36}, number = {1,2}, OPTpages = {}, OPTmonth = {}, OPTnote = {}, OPTannote= { The paper looks at combining multiple learned models with the goal of forming an improved classifier. The training data sets are not disjoint. They want to make the models diverse in the sense they make errors in different ways. They note that when errors are uncorrelated the optimal approach is to take a majority vote. They use stacking and correspondence analysis to do the combination. Essentially, stacking is the idea that K classifiers are trained on the same data set and then tested on a held-out data set which is labeled. A vector of their predictions and the correct class is created and used to train another classifier to better do the classification. Results in K+1 classifiers. Correspondence analysis will produce a pretty large matrix used in doing classification. For K classifiers and c classes it will have (K+1)*c columns per row. There will be one row for each training example. For a lot of examples this approach will be very costly. For a "small" test set it is O.K. The approach performs well for poorly learned models and is considered "as good as" plurality voting. Nothing about really large K or large example sets. Acknowledges that you get a loss of interpretable (i.e. no rules or trees.) } } @InProceedings{Shannon97, author = {W.D. Shannon and D. Banks}, title = {A Distance Metric for Classification Trees}, booktitle = {Sixth International Workshop on Artificial Intelligence and Statistics}, year = {1997}, address = {Fort Lauderdale, Fl.}, OPTannote = {Merz claims this paper shows how to aggregate a set of decision trees into a single tree } } @Article{Bauer, author = {E. Bauer and R. Kohavi}, title = {An Empirical Comparison of Voting Classification Algorithms: Bagging, Boosting, and Variants}, journal = {Machine Learning}, year = {1999}, volume = {36}, number = {1,2}, pages = {105-139}, OPTmonth = {}, OPTnote = {}, OPTannote= { Shows boosting increases performance the most. Bagging increases performance without any degradation. The best performance came from AdaBoost (arc-fs in Breiman's terminology). Largest train data set was 11,000. Largest data set was 58,000 examples. Train data set sizes were chosen to allow for improvement in learning performance (i.e. the default learners could not get max. performance with the given sizes). They used 25 sub-classifiers in the paper for all experiments. Boosting was found to affected by noise. Stacking a naive Bayes classifier here did not help (however perhaps it was the classifier). Boosting is not comprehensible in the final model. Boosting requires the estimated training set error on Trial T to generate the train data weights for trial T+1. Hence, it is not very parallel. Bagging CAN be used in parallel. Some Bagging variants outperformed the original Bagging algorithm. } } @Article{DTC, author = "W.D. Shannon and D. Banks", title = "Combining Classification Trees Using {MLE}", journal = "Statistics in Medicine", year = "1999", OPTannote = { This paper discusses taking n decision trees and creating a single decision tree from them. The single tree is a kind of central tree. The centrality of the tree is measured with a maximum likelihood estimate. Each of the n trees is made a candidate for the ``central tree''. Then tests are randomly added or a single prune is done (200) times. The tree that is closest to all the other trees is kept as the central tree. At each node only the splitting attribute (not the value) is kept. They do an experiment where n = 13 and each n is a clinical trial at a different hospital. Only continuous attributes are in these trees. The actual split values in the final chosen tree is gotten by using Gini with the chosen attribute/feature and ``examples at the node''. They do not say what train set is used to do this. I assume it is the full train set. Performance on this example is better than with individual trees or a tree grown on the full data. It is less than with bagging. There is a problem if the trees are multi-modal or there are multiple shapes that produces equally good classifiers. In that case, one does not want to ``average'' trees. Only do the averaging of trees from the same mode. Could the add/delete a split simply have the effect of better pruning? This is not tested as they apparently use CART's default pruning and do not discuss this. They call 10-fold CV pruning... } } @InProceedings{MCS2002, author = { Chawla N.V. and Hall L.O. and Bowyer K.W. and Moore, Jr., T. E. and Kegelmeyer W.P.}, title = {Distributed Pasting of Small Votes}, booktitle = {Multiple Classifier Systems Conference}, OPTcrossref = {}, OPTkey = {}, OPTpages = {}, year = {2002}, OPTeditor = {}, OPTvolume = {}, OPTnumber = {}, OPTseries = {}, address = {Cagliari, Italy}, month = {June}, OPTorganization = {}, OPTpublisher = {}, OPTnote = {}, OPTannote = {} } @InProceedings{Boost2, author = {Freund, Y. and Schapire, R.E.}, title = {Experiments with a new Boosting Algorithm}, booktitle = {Machine Learning: Proceedings of the Thirteenth National Conference}, year = {1996}, pages = {148-156} } @Article{Boost1, author = "Schapire, R.E.", title = "The Strength of Weak Learnability", journal = "Machine Learning", year = "1990", volume = "5", number = "2", pages = "197-227" } @Article{Bag1, author = "L. Breiman", title = "Bagging Predictors", journal = "Machine Learning", year = "1996", volume = "24", pages = "123-140" } @InProceedings{Jelonek, author = "J. Jelonek", title = "Generalization Capability of Homogeneous Voting Classifier Based on Partially Replicated Data", OPTcrossref = "", OPTkey = "", OPTeditor = "", OPTvolume = "", OPTnumber = "", series = "Integrating Multiple Learned Models for Improving and Scaling Machine Learning Algorithms Workshop", booktitle = "AAAI'96", year = "1996", note = "Portland, OR", OPTannote = "Discussing learning with the same base classifier on partially disjoint data sets. The idea is to have diversity from the training data. Their overlap is up to 90%. They do show better results using voting of the classifiers for C4.5 anyway. " } @InProceedings{boostm1w, author = "Gunther Eibl and Karl-Peter Pfeiffer", title = "How to Make AdaBoost.M1 work for weak classifiers by changing only one line of the code", booktitle = "Machine Learning: Proceedings of the Thirteenth European Conference", year = "2002", pages = "109-120" } @InProceedings{Brodley96, author = "C.E. Brodley and T. Lane", title = "Creating and Exploiting Coverage and Diversity", OPTcrossref = "", OPTkey = "", OPTeditor = "", OPTvolume = "", OPTnumber = "", series = "Integrating Multiple Learned Models for Improving and Scaling Machine Learning Algorithms Workshop", booktitle = "AAAI'96", year = "1996", note = "Portland, OR", OPTannote = "Finds that a set of classifiers may be built to cover more of the full train data (for example 95% vs. 83%), but when integrated the performance is not necessarily better as coverage is increased. For example, a 93% coverage voting classifier (plurality vote) is worse in accuracy than a classifier with 83% coverage (82.8% to 83.1%)" } @InProceedings{Chan98, author = "P.K. Chan and S.J. Stolfo", title = "Toward Scalable Learning with Non-uniform Class and Cost Distributions: A Case Study in Credit Card Fraud Detection", OPTcrossref = "", OPTkey = "", OPTeditor = "", OPTvolume = "", OPTnumber = "", booktitle = "AAAI'98", year = "1998", OPTannote = "Shows a method of dealing with skewed training distributions. They set up subsets of the data with each subset a 50:50 mix in the 2 class problem. The data is naturally 20:80. So, the majority data got divided into n (4 here) groups and the complete minority data was copied to each partition. All minority data in each partition. Then the results of the classifiers was merged for pretty good performance. " } @InProceedings{Chan96, author = "P.K. Chan and S.J. Stolfo", title = "Scaling Learning by Meta-Learning over Disjoint and Partially replicated Data ", OPTcrossref = "", OPTkey = "", OPTeditor = "", OPTvolume = "", OPTnumber = "", booktitle = "Proceedings of the Florida Artificial Intelligence Society", year = "1996", OPTannote = "On 2 data sets, splice junctions and Protein coding regions, this paper examines the performance of ID3 and CART on 2, 4, 8, 16, 32 and 64 disjoint partitions as well as partitions with partial overlap. Looks at different ways of combining the classifiers, a combiner approach and a Bayesian approach. It shows what we have found as far as accuracy getting worse as the number of partitions is increased, maybe once better. They replicate up to 30% of the data in a funny random manner. In practice not all subsets will have all replicates. They found no improvement in performance from data replication. They conclude that replication buys you nothing. " } @TechReport{mitchelltr, author = {T.M. Mitchell}, title = {The need for biases in learning generalizations}, institution = {Rutgers Univ.}, year = {1980}, number = {CBM-TR-117}, address = {Computer Science Dept.}, OPTmonth = {}, OPTnote = {}, OPTannote = {Talks about the bias in learning algorithms.} } @article{ dietterich98approximate, author = "Thomas G. Dietterich", title = "Approximate Statistical Test For Comparing Supervised Classification Learning Algorithms", journal = "Neural Computation", volume = "10", number = "7", pages = "1895-1923", year = "1998", url = "citeseer.ist.psu.edu/dietterich98approximate.html" } @InProceedings {thinning, author = "Robert E. Banfield and Lawrence O. Hall and Kevin W. Bowyer and W. Philip Kegelmeyer", title = "A New Ensemble Diversity Measure Applied to Thinning Ensembles", booktitle = "Fifth International Workshop on Multiple Classifier Systems", pages = "306-316", year = "2003" } @article{arcingclassifiers, author = "Leo Breiman", title = "Arcing Classifiers", journal = "Annals of Statistics", volume = "26", number = "3", pages = "801-849", year = "1998" } @article{boostingdt, author = "Harris Drucker and Corinna Cortes", title = "Boosting Decision Trees", journal = "Advances in Neural Information Processing Systems 8", pages = "479-485", year = "1996" } @InProceedings{quinc45, author = "J. R. Quinlan", title = "Bagging, Boosting, and C4.5", booktitle = "Thirteenth National Conference on Artificial Intelligence", pages = "725-730", year = "1996" }