Ich habe Entscheidungsbaum-Modell im Pmml-Format wie unten gezeigt. Wie speichere ich die Regeln jedes Blattes in Text oder einem anderen Format?Wie man Regeln jedes Blattes aus dem Entscheidungsbaummodell extrahiert?
Zum Beispiel: uniformitycellsize < = 3.5^clumpthickness < = 6,5^normalnucleoli> = 3,5 => B
<TreeModel modelName="DecisionTree" functionName="classification" splitCharacteristic="binarySplit" missingValueStrategy="lastPrediction" noTrueChildStrategy="returnNullPrediction">
<MiningSchema>
<MiningField name="clumpthickness" invalidValueTreatment="asIs"/>
<MiningField name="uniformitycellsize" invalidValueTreatment="asIs"/>
<MiningField name="uniformitycellshape" invalidValueTreatment="asIs"/>
<MiningField name="marginaladhesion" invalidValueTreatment="asIs"/>
<MiningField name="epithelialcellsize" invalidValueTreatment="asIs"/>
<MiningField name="barenuclei" invalidValueTreatment="asIs"/>
<MiningField name="blandchromatin" invalidValueTreatment="asIs"/>
<MiningField name="normalnucleoli" invalidValueTreatment="asIs"/>
<MiningField name="mitoses" invalidValueTreatment="asIs"/>
<MiningField name="partition" invalidValueTreatment="asIs"/>
<MiningField name="Class_Categorical" invalidValueTreatment="asIs" usageType="target"/>
</MiningSchema>
<Node id="0" score="B" recordCount="559.0">
<True/>
<ScoreDistribution value="B" recordCount="365.0"/>
<ScoreDistribution value="M" recordCount="194.0"/>
<Node id="1" score="B" recordCount="384.0">
<SimplePredicate field="uniformitycellsize" operator="lessOrEqual" value="3.5"/>
<ScoreDistribution value="B" recordCount="356.0"/>
<ScoreDistribution value="M" recordCount="28.0"/>
<Node id="2" score="B" recordCount="368.0">
<SimplePredicate field="clumpthickness" operator="lessOrEqual" value="6.5"/>
<ScoreDistribution value="B" recordCount="354.0"/>
<ScoreDistribution value="M" recordCount="14.0"/>
<Node id="3" score="B" recordCount="353.0">
<SimplePredicate field="normalnucleoli" operator="lessOrEqual" value="3.5"/>
<ScoreDistribution value="B" recordCount="347.0"/>
<ScoreDistribution value="M" recordCount="6.0"/>
</Node>
<Node id="10" score="M" recordCount="15.0">
<SimplePredicate field="normalnucleoli" operator="greaterThan" value="3.5"/>
<ScoreDistribution value="B" recordCount="7.0"/>
<ScoreDistribution value="M" recordCount="8.0"/>
</Node>
</Node>
<Node id="11" score="M" recordCount="16.0">
<SimplePredicate field="clumpthickness" operator="greaterThan" value="6.5"/>
<ScoreDistribution value="B" recordCount="2.0"/>
<ScoreDistribution value="M" recordCount="14.0"/>
</Node>
</Node>
<Node id="12" score="M" recordCount="175.0">
<SimplePredicate field="uniformitycellsize" operator="greaterThan" value="3.5"/>
<ScoreDistribution value="B" recordCount="9.0"/>
<ScoreDistribution value="M" recordCount="166.0"/>
<Node id="13" score="M" recordCount="33.0">
<SimplePredicate field="uniformitycellsize" operator="lessOrEqual" value="4.5"/>
<ScoreDistribution value="B" recordCount="7.0"/>
<ScoreDistribution value="M" recordCount="26.0"/>
<Node id="14" score="M" recordCount="21.0">
<SimplePredicate field="marginaladhesion" operator="lessOrEqual" value="5.5"/>
<ScoreDistribution value="B" recordCount="7.0"/>
<ScoreDistribution value="M" recordCount="14.0"/>
<Node id="15" score="B" recordCount="10.0">
<SimplePredicate field="clumpthickness" operator="lessOrEqual" value="7.5"/>
<ScoreDistribution value="B" recordCount="6.0"/>
<ScoreDistribution value="M" recordCount="4.0"/>
</Node>
<Node id="16" score="M" recordCount="11.0">
<SimplePredicate field="clumpthickness" operator="greaterThan" value="7.5"/>
<ScoreDistribution value="B" recordCount="1.0"/>
<ScoreDistribution value="M" recordCount="10.0"/>
</Node>
</Node>
<Node id="17" score="M" recordCount="12.0">
<SimplePredicate field="marginaladhesion" operator="greaterThan" value="5.5"/>
<ScoreDistribution value="B" recordCount="0.0"/>
<ScoreDistribution value="M" recordCount="12.0"/>
</Node>
</Node>
<Node id="18" score="M" recordCount="142.0">
<SimplePredicate field="uniformitycellsize" operator="greaterThan" value="4.5"/>
<ScoreDistribution value="B" recordCount="2.0"/>
<ScoreDistribution value="M" recordCount="140.0"/>
</Node>
</Node>
</Node>
</TreeModel>
=================== ================================================= ====== Das xsl-Stylesheet zum Erzielen eines solchen Ergebnisses wird unten gezeigt.
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="text" encoding="UTF-8"/>
<xsl:template match="/">
<xsl:for-each select="//Node[not(Node)]">
<xsl:for-each select="ancestor-or-self::Node/SimplePredicate">
<xsl:value-of select="@field"/>
<xsl:choose>
<xsl:when test="@operator = 'lessOrEqual'"> <= </xsl:when>
<xsl:when test="@operator = 'greaterThan'"> > </xsl:when>
</xsl:choose>
<xsl:value-of select="@value"/>
<xsl:if test="position() != last()">
<xsl:text>^</xsl:text>
</xsl:if>
<xsl:if test="position() = last()">
<xsl:text> => </xsl:text>
<xsl:value-of select="../@score"/>
</xsl:if>
</xsl:for-each>
<xsl:text> </xsl:text>
</xsl:for-each>
</xsl:template>
</xsl:stylesheet>
Der Ausgang ergibt, ist:
Uniformity of Cell Size <= 2.5^Bare Nuclei <= 5.5 => B
Uniformity of Cell Size <= 2.5^Bare Nuclei > 5.5 => M
Uniformity of Cell Size > 2.5^Uniformity of Cell Shape <= 2.5^Clump Thickness <= 5.5 => B
Uniformity of Cell Size > 2.5^Uniformity of Cell Shape <= 2.5^Clump Thickness > 5.5 => M
Uniformity of Cell Size > 2.5^Uniformity of Cell Shape > 2.5 => M
Wie soll das die verschiedenen ScoreDistribution Werte jeder Knoten behandeln? –
@ michael.hor257k Ich möchte den letzten Knoten jedes Blattes verwenden. d. h. anstelle der Berücksichtigung der zwei geteilten Verteilung, die auf jedem letzten Blattknoten existiert. Ich bin wirklich neugierig darauf, wie das in xsl erreicht werden kann? –
"* Ich möchte den letzten Knoten jedes Blattes verwenden. *" Ich fürchte, ich verstehe nicht, was das bedeutet. Warum postest du nicht genau das Ergebnis, das du erwartest? –