<?xml version="1.0" encoding="UTF-8"?>
<collection xmlns="http://www.loc.gov/MARC21/slim">
	<record>
		<datafield tag="980" ind1=" " ind2=" ">
			<subfield code="a">CONF</subfield>
		</datafield>
		<datafield tag="970" ind1=" " ind2=" ">
			<subfield code="a">Thimm-94.2/IDIAP</subfield>
		</datafield>
		<datafield tag="245" ind1=" " ind2=" ">
			<subfield code="a">Weight Initialization for High Order and Multilayer Perceptrons</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Thimm, Georg</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Fiesler, Emile</subfield>
		</datafield>
		<datafield tag="700" ind1=" " ind2=" ">
			<subfield code="a">Aguilar, Marc</subfield>
			<subfield code="e">Ed.</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">activation function</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">comparison of weight initialization methods</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">connectionism</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">high(er) order neural network</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">high(er) order perceptron</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">initial weight</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">initial weight distribution</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">interconnection strength</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">learning rate</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">multilayer perceptron</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">neural computation</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">neural network</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">neurocomputing</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">optimization</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">sigma-pi connection</subfield>
		</datafield>
		<datafield tag="653" ind1="1" ind2=" ">
			<subfield code="a">weight initialization</subfield>
		</datafield>
		<datafield tag="711" ind1="2" ind2=" ">
			<subfield code="a">SI Group for Parallel Systems - Proceedings of the '94 SIPAR--Workshop on Parallel and Distributed Computing</subfield>
		</datafield>
		<datafield tag="260" ind1=" " ind2=" ">
			<subfield code="c">1994</subfield>
			<subfield code="a">Institute of Informatics University, P\'erolles, Chemin du Mus\'ee 3, CH-1700 Fribourg, Switzerland</subfield>
		</datafield>
		<datafield tag="771" ind1="2" ind2=" ">
			<subfield code="d">October 1994</subfield>
		</datafield>
		<datafield tag="773" ind1=" " ind2=" ">
			<subfield code="c">87-90</subfield>
		</datafield>
		<datafield tag="520" ind1=" " ind2=" ">
			<subfield code="a">Proper weight initialization is one of the most important prerequisites for fast convergence of feed-forward neural networks like high order and multilayer perceptrons. In order to determine the optimal value of the initial weight variance (or range,',','),
 which is a important parameter of random weight initialization methods for high order perceptrons, a wide range of experiments (more than $200,000$ simulations) was performed, using seven different data sets, three weight distributions, three activation functions, and several network orders. The results of these experiments are compared to weight initialization techniques for multilayer perceptrons, which leads to the proposal of a suitable weight initialization method for high order perceptrons. Experiments over a large range of initial weight variances are performed (more than $20,000$ simulations) for multilayer perceptrons and compared to weight initialization methods proposed by other authors. The results of this comparison are justified by sufficiently small confidence intervals.</subfield>
		</datafield>
	</record>
</collection>