This paper proposes a novel algorithm to jointly determine the structure and the parameters of a posteriori probability model based on neural networks (NNs). It makes use of well-known ideas of pruning, splitting, and merging neural components and takes advantage of the probabilistic interpretation of these components. The algorithm, so called a posteriori probability model selection (PPMS), is applied to an NN architecture called the generalized softmax perceptron (GSP) whose outputs can be understood as probabilities although results shown can be extended to more general network architectures. Learning rules are derived from the application of the expectation-maximization algorithm to the GSP-PPMS structure. Simulation results show the advantages of the proposed algorithm with respect to other schemes. {\^A}{\textcopyright} 2005 IEEE.

}, keywords = {algorithm, Algorithms, article, artificial neural network, Automated, automated pattern recognition, Biological, biological model, Breast Neoplasms, breast tumor, classification, cluster analysis, computer analysis, Computer-Assisted, computer assisted diagnosis, Computer simulation, Computing Methodologies, decision support system, Decision Support Techniques, Diagnosis, Estimation, evaluation, Expectation-maximization, Generalized Softmax Perceptron (GSP), human, Humans, mathematical computing, Mathematical models, methodology, Models, Model selection, Neural networks, Neural Networks (Computer), Numerical Analysis, Objective function, Pattern recognition, Posterior probability, Probability, Statistical, statistical model, statistics, Stochastic Processes}, issn = {10459227}, doi = {10.1109/TNN.2005.849826}, url = {http://www.scopus.com/inward/record.url?eid=2-s2.0-23044459586\&partnerID=40\&md5=f00e7d86a625cfc466373a2a938276d0}, author = {J I Arribas and Jes{\'u}s Cid-Sueiro} } @conference {413, title = {Fusing Output Information in Neural Networks: Ensemble Performs Better}, booktitle = {Annual International Conference of the IEEE Engineering in Medicine and Biology - Proceedings}, year = {2003}, address = {Cancun}, abstract = {A neural network ensemble is a learning paradigm where a finite number of component neural networks are trained for the same task. Previous research suggests that an ensemble as a whole is often more accurate than any of the single component networks. This paper focuses on the advantages of fusing different nature network architectures, and to determine the appropriate information fusion algorithm in component neural networks by several approaches within hard decision classifiers, when solving a binary pattern recognition problem. We numerically simulated and compared the different fusion approaches in terms of the mean-square error rate in testing data set, over synthetically generated binary Gaussian noisy data, and stated the advantages of fusing the hard outputs of different component networks to make a final hard decision classification. The results of the experiments indicate that neural network ensembles can indeed improve the overall accuracy for classification problems; in all fusion architectures tested, the ensemble correct classification rates are better than those achieved by the individual component networks. Finally we are nowadays comparing the above mentioned hard decision classifiers with new soft decision classifier architectures that make use of the additional continuous type intermediate network soft outputs, fulfilling probability fundamental laws (positive, and add to unity), which can be understood as the a posteriori probabilities of a given pattern to belong to a certain class.

}, keywords = {Algorithms, Backpropagation, Classification (of information), Computer simulation, Decision making, Estimation, Gaussian noise (electronic), Information fusions, Mathematical models, Medical imaging, Model selection, Multilayer neural networks, Neural network ensembles, Pattern recognition, Probability, Probability estimation, Problem solving, Regularization, Statistical methods, Statistical pattern recognition, Vectors}, doi = {https://doi.org/10.1109/IEMBS.2003.1280254}, url = {http://www.scopus.com/inward/record.url?eid=2-s2.0-1542301061\&partnerID=40\&md5=32dbadb3b6ac3c6ae1ea33d89b52c75f}, author = {Y Wu and J I Arribas} } @article {409, title = {Cost functions to estimate a posteriori probabilities in multiclass problems}, journal = {IEEE Transactions on Neural Networks}, volume = {10}, year = {1999}, pages = {645-656}, abstract = {The problem of designing cost functions to estimate a posteriori probabilities in multiclass problems is addressed in this paper. We establish necessary and sufficient conditions that these costs must satisfy in one-class one-output networks whose outputs are consistent with probability laws. We focus our attention on a particular subset of the corresponding cost functions; those which verify two usually interesting properties: symmetry and separability (well-known cost functions, such as the quadratic cost or the cross entropy are particular cases in this subset). Finally, we present a universal stochastic gradient learning rule for single-layer networks, in the sense of minimizing a general version of these cost functions for a wide family of nonlinear activation functions.

}, keywords = {Cost functions, Estimation, Functions, Learning algorithms, Multiclass problems, Neural networks, Pattern recognition, Probability, Problem solving, Random processes, Stochastic gradient learning rule}, issn = {10459227}, doi = {10.1109/72.761724}, url = {http://www.scopus.com/inward/record.url?eid=2-s2.0-0032643080\&partnerID=40\&md5=d528195bd6ec84531e59ddd2ececcd46}, author = {Jes{\'u}s Cid-Sueiro and J I Arribas and S Urban-Munoz and A R Figueiras-Vidal} } @conference {411, title = {Neural architectures for parametric estimation of a posteriori probabilities by constrained conditional density functions}, booktitle = {Neural Networks for Signal Processing - Proceedings of the IEEE Workshop}, year = {1999}, publisher = {IEEE, Piscataway, NJ, United States}, organization = {IEEE, Piscataway, NJ, United States}, address = {Madison, WI, USA}, abstract = {A new approach to the estimation of {\textquoteright}a posteriori{\textquoteright} class probabilities using neural networks, the Joint Network and Data Density Estimation (JNDDE), is presented in this paper. It is based on the estimation of the conditional data density functions, with some restrictions imposed by the classifier structure; the Bayes{\textquoteright} rule is used to obtain the {\textquoteright}a posteriori{\textquoteright} probabilities from these densities. The proposed method is applied to three different network structures: the logistic perceptron (for the binary case), the softmax perceptron (for multi-class problems) and a generalized softmax perceptron (that can be used to map arbitrarily complex probability functions). Gaussian mixture models are used for the conditional densities. The method has the advantage of establishing a distinction between the network parameters and the model parameters. Complexity on any of them can be fixed as desired. Maximum Likelihood gradient-based rules for the estimation of the parameters can be obtained. It is shown that JNDDE exhibits a more robust convergence characteristics than other methods of a posteriori probability estimation, such as those based on the minimization of a Strict Sense Bayesian (SSB) cost function.

}, keywords = {Asymptotic stability, Constraint theory, Data structures, Gaussian mixture models, Joint network and data density estimation, Mathematical models, Maximum likelihood estimation, Neural networks, Probability}, doi = {https://doi.org/10.1109/NNSP.1999.788145}, url = {http://www.scopus.com/inward/record.url?eid=2-s2.0-0033321049\&partnerID=40\&md5=7967fa377810cc0c3e6a4d9020024b80}, author = {J I Arribas and Jes{\'u}s Cid-Sueiro and T Adali and A R Figueiras-Vidal} }