Kernel Principal Component Analysis Action Set

Provides actions for kernel principal component analysis

kPca Action

Performs kernel PCA training.

CASL Syntax

kernelPca.kPca <result=results> <status=rc> /
attributes={{
format="string",
formattedLength=integer,
label="string",
required parameter name="variable-name",
nfd=integer,
nfl=integer
}, {...}},
bwTune={
method="RANDOMCMSE" | "SCMSE" | 64-bit-integer,
nClus=integer,
nPass=integer,
nSample=integer,
seed=integer
},
center=TRUE | FALSE,
centroids={
caslib="string",
compress=TRUE | FALSE,
indexVars={"variable-name-1" <, "variable-name-2", ...>},
label="string",
lifetime=64-bit-integer,
maxMemSize=64-bit-integer,
memoryFormat="DVR" | "INHERIT" | "STANDARD",
name="table-name",
promote=TRUE | FALSE,
replace=TRUE | FALSE,
replication=integer,
tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",
threadBlockSize=64-bit-integer,
timeStamp="string",
where={"string-1" <, "string-2", ...>}
},
centroidsPC={
caslib="string",
compress=TRUE | FALSE,
indexVars={"variable-name-1" <, "variable-name-2", ...>},
label="string",
lifetime=64-bit-integer,
maxMemSize=64-bit-integer,
memoryFormat="DVR" | "INHERIT" | "STANDARD",
name="table-name",
promote=TRUE | FALSE,
replace=TRUE | FALSE,
replication=integer,
tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",
threadBlockSize=64-bit-integer,
timeStamp="string",
where={"string-1" <, "string-2", ...>}
},
clusMaxiter=integer,
clusMethod="FC" | "KMPP" | "RANDOM" | 64-bit-integer,
clusNrestart=integer,
clusRandSeed=integer,
display={
caseSensitive=TRUE | FALSE,
exclude=TRUE | FALSE,
excludeAll=TRUE | FALSE,
keyIsPath=TRUE | FALSE,
names={"string-1" <, "string-2", ...>},
pathType="LABEL" | "NAME",
traceNames=TRUE | FALSE
},
eigenVal={
caslib="string",
compress=TRUE | FALSE,
indexVars={"variable-name-1" <, "variable-name-2", ...>},
label="string",
lifetime=64-bit-integer,
maxMemSize=64-bit-integer,
memoryFormat="DVR" | "INHERIT" | "STANDARD",
name="table-name",
promote=TRUE | FALSE,
replace=TRUE | FALSE,
replication=integer,
tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",
threadBlockSize=64-bit-integer,
timeStamp="string",
where={"string-1" <, "string-2", ...>}
},
eigenVec={
required parameter casout={
caslib="string"
compress=TRUE | FALSE
indexVars={"variable-name-1" <, "variable-name-2", ...>}
label="string"
lifetime=64-bit-integer
maxMemSize=64-bit-integer
memoryFormat="DVR" | "INHERIT" | "STANDARD"
name="table-name"
promote=TRUE | FALSE
replace=TRUE | FALSE
replication=integer
tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"
threadBlockSize=64-bit-integer
timeStamp="string"
where={"string-1" <, "string-2", ...>}
},
copyVars={"variable-name-1" <, "variable-name-2", ...>}
},
exactScore=TRUE | FALSE,
id={"variable-name-1" <, "variable-name-2", ...>},
inputs={{
format="string",
formattedLength=integer,
label="string",
required parameter name="variable-name",
nfd=integer,
nfl=integer
}, {...}},
intercept=double,
kerParam=double,
kerType="LINEAR" | "POLYNOMIAL" | "RBF" | 64-bit-integer,
mapCoeffs={
caslib="string",
compress=TRUE | FALSE,
indexVars={"variable-name-1" <, "variable-name-2", ...>},
label="string",
lifetime=64-bit-integer,
maxMemSize=64-bit-integer,
memoryFormat="DVR" | "INHERIT" | "STANDARD",
name="table-name",
promote=TRUE | FALSE,
replace=TRUE | FALSE,
replication=integer,
tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",
threadBlockSize=64-bit-integer,
timeStamp="string",
where={"string-1" <, "string-2", ...>}
},
mapParam={
intercept=double,
kerParam=double,
kerType="LINEAR" | "POLYNOMIAL" | "RBF" | 64-bit-integer,
lambda=double
},
maxClus=integer,
method="APPROXIMATE" | "EXACT" | 64-bit-integer,
order=TRUE | FALSE,
output={
required parameter casout={
caslib="string"
compress=TRUE | FALSE
indexVars={"variable-name-1" <, "variable-name-2", ...>}
label="string"
lifetime=64-bit-integer
maxMemSize=64-bit-integer
memoryFormat="DVR" | "INHERIT" | "STANDARD"
name="table-name"
promote=TRUE | FALSE
replace=TRUE | FALSE
replication=integer
tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"
threadBlockSize=64-bit-integer
timeStamp="string"
where={"string-1" <, "string-2", ...>}
},
copyVars={"variable-name-1" <, "variable-name-2", ...>},
npc=integer
},
outputTables={
groupByVarsRaw=TRUE | FALSE,
includeAll=TRUE | FALSE,
names={"string-1" <, "string-2", ...>} | {key-1={casouttable-1} <, key-2={casouttable-2}, ...>},
repeated=TRUE | FALSE,
replace=TRUE | FALSE
},
preimage=TRUE | FALSE,
preimageMethod="ITERATIVE" | "MAP" | 64-bit-integer,
preimageNPC=integer,
rankThreshold=double,
saveState={
caslib="string",
compress=TRUE | FALSE,
indexVars={"variable-name-1" <, "variable-name-2", ...>},
label="string",
lifetime=64-bit-integer,
maxMemSize=64-bit-integer,
memoryFormat="DVR" | "INHERIT" | "STANDARD",
name="table-name",
promote=TRUE | FALSE,
replace=TRUE | FALSE,
replication=integer,
tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",
threadBlockSize=64-bit-integer,
timeStamp="string",
where={"string-1" <, "string-2", ...>}
},
scale=TRUE | FALSE,
required parameter table={
caslib="string",
computedOnDemand=TRUE | FALSE,
computedVars={{
format="string",
formattedLength=integer,
label="string",
required parameter name="variable-name",
nfd=integer,
nfl=integer
}, {...}},
computedVarsProgram="string",
dataSourceOptions={key-1=any-list-or-data-type-1 <, key-2=any-list-or-data-type-2, ...>},
groupBy={{
format="string",
formattedLength=integer,
label="string",
required parameter name="variable-name",
nfd=integer,
nfl=integer
}, {...}},
groupByMode="NOSORT" | "REDISTRIBUTE",
importOptions={fileType="ANY" | "AUDIO" | "AUTO" | "BASESAS" | "CSV" | "DELIMITED" | "DOCUMENT" | "DTA" | "ESP" | "EXCEL" | "FMT" | "HDAT" | "IMAGE" | "JMP" | "LASR" | "PARQUET" | "SOUND" | "SPSS" | "VIDEO" | "XLS", fileType-specific-parameters},
required parameter name="table-name",
orderBy={{
format="string",
formattedLength=integer,
label="string",
required parameter name="variable-name",
nfd=integer,
nfl=integer
}, {...}},
singlePass=TRUE | FALSE,
vars={{
format="string",
formattedLength=integer,
label="string",
required parameter name="variable-name",
nfd=integer,
nfl=integer
}, {...}},
where="where-expression",
whereTable={
casLib="string"
dataSourceOptions={adls_noreq-parameters | bigquery-parameters | cas_noreq-parameters | clouddex-parameters | db2-parameters | dnfs-parameters | esp-parameters | fedsvr-parameters | gcs_noreq-parameters | hadoop-parameters | hana-parameters | impala-parameters | informix-parameters | jdbc-parameters | mongodb-parameters | mysql-parameters | odbc-parameters | oracle-parameters | path-parameters | postgres-parameters | redshift-parameters | s3-parameters | sapiq-parameters | sforce-parameters | singlestore_standard-parameters | snowflake-parameters | spark-parameters | spde-parameters | sqlserver-parameters | ss_noreq-parameters | teradata-parameters | vertica-parameters | yellowbrick-parameters}
importOptions={fileType="ANY" | "AUDIO" | "AUTO" | "BASESAS" | "CSV" | "DELIMITED" | "DOCUMENT" | "DTA" | "ESP" | "EXCEL" | "FMT" | "HDAT" | "IMAGE" | "JMP" | "LASR" | "PARQUET" | "SOUND" | "SPSS" | "VIDEO" | "XLS", fileType-specific-parameters}
required parameter name="table-name"
vars={{
format="string",
formattedLength=integer,
label="string",
required parameter name="variable-name",
nfd=integer,
nfl=integer
}, {...}}
where="where-expression"
}
}
;
indicates a required parameter

Summary: Input and Output Tables

If a row includes a subparameter, you can specify the name, caslib, and so on in the subparameter. Otherwise, you can specify the name, caslib, and so on in the parameter.

Parameters for Reading Input Tables

Parameter

Subparameter

Description

required parametertable

specifies the settings for an input table.

Parameters for Creating Output Tables

Parameter

Subparameter

Description

 centroids

specifies the output data table in which to save the centroids matrix.

 centroidsPC

specifies the output data table in which to save the centroids matrix of the KPCA principal components.

 eigenVal

specifies the output data table in which to save the eigenVal matrix.

 eigenVec

required parametercasout

specifies the output data table in which to save the eigenVector matrix.

 mapCoeffs

specifies the output data table in which to save the mapping pre-image coefficients matrix.

 output

required parametercasout

specifies the output data table in which to save the score values of the training data.

 outputTables

names

lists the names of results tables to save as CAS tables on the server.

 saveState

specifies the output data table in which to save the state of eigenvector matrix for future scoring.

Parameter Descriptions

attributes={{casinvardesc-1} <, {casinvardesc-2}, ...>}

specifies the variable attributes.

For more information about specifying the attributes parameter, see the common casinvardesc parameter (Appendix A: Common Parameters).

Alias attribute

bwTune={bwTune}

specifies the parameter list for the RBF kernel bandwidth tuning.

The bwTune value can be one or more of the following:

method="RANDOMCMSE" | "SCMSE" | 64-bit-integer

specifies the method to use in bandwidth tuning: the random criterion of maximum sum of eigenvalues method (random CMSE) or the scalable method (SCMSE).

nClus=integer

specifies the number of clusters to use in bandwidth tuning when the tuning method is the scalable criterion of maximum sum of eigenvalues method (SCMSE).

Default 100
nPass=integer

specifies the number of passes to use in bandwidth tuning when the tuning method is the random criterion of maximum sum of eigenvalues method (random CMSE).

Default 10
nSample=integer

specifies the random sample size to use in bandwidth tuning when the tuning method is the random criterion of maximum sum of eigenvalues method (random CMSE).

Default 100
seed=integer

specifies the random seed to use in bandwidth tuning.

center=TRUE | FALSE

when set to True, centers the numeric variables by the mean of each column.

Alias centering
Default FALSE

centroids={casouttable}

specifies the output data table in which to save the centroids matrix.

For more information about specifying the centroids parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

centroidsPC={casouttable}

specifies the output data table in which to save the centroids matrix of the KPCA principal components.

For more information about specifying the centroidsPC parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

clusCCriterion=double

specifies the convergence criterion for the k-means clustering algorithm.

Alias cc
Default 0.02

clusMaxiter=integer

specifies the maximum number of iterations for the k-means clustering algorithm.

Default 50

clusMethod="FC" | "KMPP" | "RANDOM" | 64-bit-integer

specifies the initial centroid type in k-means clustering.

Default KMPP
FC specifies fast clustering as the initial centroid type.
KMPP specifies k-means plus plus as the initial centroid type.
RANDOM specifies randomization as the initial centroid type.

clusNrestart=integer

specifies the number of restarts when the initial centroids are selected by random.

Default 5

clusRandSeed=integer

specifies the random seed to use in initial centroid selection.

display={displayTables}

specifies a list of results tables to send to the client for display.

For more information about specifying the display parameter, see the common displayTables parameter (Appendix A: Common Parameters).

eigenVal={casouttable}

specifies the output data table in which to save the eigenVal matrix.

For more information about specifying the eigenVal parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

eigenVec={eigOutput}

specifies the output data table in which to save the eigenVector matrix.

The eigOutput value can be one or more of the following:

* casout={casouttable}

specifies the output eigenvector table.

For more information about specifying the casout parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

copyVars={"variable-name-1" <, "variable-name-2", ...>}

copies one or more variables from the input table to the output table.

Alias copyVar

exactScore=TRUE | FALSE

when set to False and the training method is low-rank approximation, implements the fast scoring method.

Alias ES
Default FALSE

id={"variable-name-1" <, "variable-name-2", ...>}

specifies the variable to use as the record identifier.

inputs={{casinvardesc-1} <, {casinvardesc-2}, ...>}

specifies the variables to use in the analysis.

For more information about specifying the inputs parameter, see the common casinvardesc parameter (Appendix A: Common Parameters).

Alias input

intercept=double

specifies the constant term in the polynomial kernel.

Alias coefficient0
Default 1

kerParam=double

specifies the kernel parameter.

kerType="LINEAR" | "POLYNOMIAL" | "RBF" | 64-bit-integer

specifies the kernel type to use for kernel principal component analysis. "RBF" indicates the radial basis function type.

Aliases kernel
kernelType

mapCoeffs={casouttable}

specifies the output data table in which to save the mapping pre-image coefficients matrix.

For more information about specifying the mapCoeffs parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

mapParam={mapParm}

specifies the parameter list in the mapping pre-image method.

The mapParm value can be one or more of the following:

intercept=double

specifies the intercept parameter of the polynomial kernel for kernel ridge regression to use in the mapping pre-image method.

Default 1
kerParam=double

specifies the kernel parameter for kernel ridge regression to use in the mapping method.

kerType="LINEAR" | "POLYNOMIAL" | "RBF" | 64-bit-integer

specifies the kernel type for kernel ridge regression to use in the mapping method.

lambda=double

specifies the lambda parameter for the L2 regularization term for kernel ridge regression to use in the mapping pre-image method.

Alias L2
Default 1

maxClus=integer

specifies the maximum number of clusters to use in k-means clustering.

Alias maxc
Default 100

method="APPROXIMATE" | "EXACT" | 64-bit-integer

specifies the computation method to use for kernel principal component analysis.

order=TRUE | FALSE

when set to True, an order is considered for the input dataset based on the KPCA_ROWID variable (or the ROWID variable in the absence of KPCA_ROWID variable) to generate consistent results when data is distributed on multiple grid nodes.

Alias sort
Default FALSE

output={kpcaOutputStatement}

specifies the output data table in which to save the score values of the training data.

The kpcaOutputStatement value can be one or more of the following:

* casout={casouttable}

specifies the output scoring table for training data.

For more information about specifying the casout parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

copyVars={"variable-name-1" <, "variable-name-2", ...>}

copies one or more variables from the input table to the output table.

Alias copyVar
npc=integer

specifies the number of principal components to use in scoring the training data.

Default 4

outputTables={outputTables}

lists the names of results tables to save as CAS tables on the server.

For more information about specifying the outputTables parameter, see the common outputTables parameter (Appendix A: Common Parameters).

Alias DISPLAYOUT

preimage=TRUE | FALSE

when set to True, implements pre-image training.

Alias pre
Default FALSE

preimageMethod="ITERATIVE" | "MAP" | 64-bit-integer

specifies the method to use in pre-image training.

Alias preMethod
Default ITERATIVE
ITERATIVE uses the iterative method to calculate the pre-image.
MAP uses the mapping method to calculate the pre-image.

preimageNPC=integer

specifies the number of principal components to use in pre-image training. For the mapping method, this is also the number of principal components to use in pre-image scoring.

Alias preNPC
Default 4

rankThreshold=double

specifies the eigenvalue threshold to use for determining the kernel matrix rank.

Alias threshold
Default 1E-08

saveState={casouttable}

specifies the output data table in which to save the state of eigenvector matrix for future scoring.

For more information about specifying the saveState parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

scale=TRUE | FALSE

when set to True, scales the numeric variables by the standard deviation of each column.

Alias scaling
Default FALSE

* table={castable}

specifies the settings for an input table.

For more information about specifying the table parameter, see the common castable (Form 1) parameter (Appendix A: Common Parameters).

kPca Action

Performs kernel PCA training.

Lua Syntax

results, info = s:kernelPca_kPca{
attributes={{
format="string",
formattedLength=integer,
label="string",
required parameter name="variable-name",
nfd=integer,
nfl=integer
}, {...}},
bwTune={
method="RANDOMCMSE" | "SCMSE" | 64-bit-integer,
nClus=integer,
nPass=integer,
nSample=integer,
seed=integer
},
center=true | false,
centroids={
caslib="string",
compress=true | false,
indexVars={"variable-name-1" <, "variable-name-2", ...>},
label="string",
lifetime=64-bit-integer,
maxMemSize=64-bit-integer,
memoryFormat="DVR" | "INHERIT" | "STANDARD",
name="table-name",
promote=true | false,
replace=true | false,
replication=integer,
tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",
threadBlockSize=64-bit-integer,
timeStamp="string",
where={"string-1" <, "string-2", ...>}
},
centroidsPC={
caslib="string",
compress=true | false,
indexVars={"variable-name-1" <, "variable-name-2", ...>},
label="string",
lifetime=64-bit-integer,
maxMemSize=64-bit-integer,
memoryFormat="DVR" | "INHERIT" | "STANDARD",
name="table-name",
promote=true | false,
replace=true | false,
replication=integer,
tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",
threadBlockSize=64-bit-integer,
timeStamp="string",
where={"string-1" <, "string-2", ...>}
},
clusMaxiter=integer,
clusMethod="FC" | "KMPP" | "RANDOM" | 64-bit-integer,
clusNrestart=integer,
clusRandSeed=integer,
display={
caseSensitive=true | false,
exclude=true | false,
excludeAll=true | false,
keyIsPath=true | false,
names={"string-1" <, "string-2", ...>},
pathType="LABEL" | "NAME",
traceNames=true | false
},
eigenVal={
caslib="string",
compress=true | false,
indexVars={"variable-name-1" <, "variable-name-2", ...>},
label="string",
lifetime=64-bit-integer,
maxMemSize=64-bit-integer,
memoryFormat="DVR" | "INHERIT" | "STANDARD",
name="table-name",
promote=true | false,
replace=true | false,
replication=integer,
tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",
threadBlockSize=64-bit-integer,
timeStamp="string",
where={"string-1" <, "string-2", ...>}
},
eigenVec={
required parameter casout={
caslib="string"
compress=true | false
indexVars={"variable-name-1" <, "variable-name-2", ...>}
label="string"
lifetime=64-bit-integer
maxMemSize=64-bit-integer
memoryFormat="DVR" | "INHERIT" | "STANDARD"
name="table-name"
promote=true | false
replace=true | false
replication=integer
tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"
threadBlockSize=64-bit-integer
timeStamp="string"
where={"string-1" <, "string-2", ...>}
},
copyVars={"variable-name-1" <, "variable-name-2", ...>}
},
exactScore=true | false,
id={"variable-name-1" <, "variable-name-2", ...>},
inputs={{
format="string",
formattedLength=integer,
label="string",
required parameter name="variable-name",
nfd=integer,
nfl=integer
}, {...}},
intercept=double,
kerParam=double,
kerType="LINEAR" | "POLYNOMIAL" | "RBF" | 64-bit-integer,
mapCoeffs={
caslib="string",
compress=true | false,
indexVars={"variable-name-1" <, "variable-name-2", ...>},
label="string",
lifetime=64-bit-integer,
maxMemSize=64-bit-integer,
memoryFormat="DVR" | "INHERIT" | "STANDARD",
name="table-name",
promote=true | false,
replace=true | false,
replication=integer,
tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",
threadBlockSize=64-bit-integer,
timeStamp="string",
where={"string-1" <, "string-2", ...>}
},
mapParam={
intercept=double,
kerParam=double,
kerType="LINEAR" | "POLYNOMIAL" | "RBF" | 64-bit-integer,
lambda=double
},
maxClus=integer,
method="APPROXIMATE" | "EXACT" | 64-bit-integer,
order=true | false,
output={
required parameter casout={
caslib="string"
compress=true | false
indexVars={"variable-name-1" <, "variable-name-2", ...>}
label="string"
lifetime=64-bit-integer
maxMemSize=64-bit-integer
memoryFormat="DVR" | "INHERIT" | "STANDARD"
name="table-name"
promote=true | false
replace=true | false
replication=integer
tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"
threadBlockSize=64-bit-integer
timeStamp="string"
where={"string-1" <, "string-2", ...>}
},
copyVars={"variable-name-1" <, "variable-name-2", ...>},
npc=integer
},
outputTables={
groupByVarsRaw=true | false,
includeAll=true | false,
names={"string-1" <, "string-2", ...>} | {key-1={casouttable-1} <, key-2={casouttable-2}, ...>},
repeated=true | false,
replace=true | false
},
preimage=true | false,
preimageMethod="ITERATIVE" | "MAP" | 64-bit-integer,
preimageNPC=integer,
rankThreshold=double,
saveState={
caslib="string",
compress=true | false,
indexVars={"variable-name-1" <, "variable-name-2", ...>},
label="string",
lifetime=64-bit-integer,
maxMemSize=64-bit-integer,
memoryFormat="DVR" | "INHERIT" | "STANDARD",
name="table-name",
promote=true | false,
replace=true | false,
replication=integer,
tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",
threadBlockSize=64-bit-integer,
timeStamp="string",
where={"string-1" <, "string-2", ...>}
},
scale=true | false,
required parameter table={
caslib="string",
computedOnDemand=true | false,
computedVars={{
format="string",
formattedLength=integer,
label="string",
required parameter name="variable-name",
nfd=integer,
nfl=integer
}, {...}},
computedVarsProgram="string",
dataSourceOptions={key-1=any-list-or-data-type-1 <, key-2=any-list-or-data-type-2, ...>},
groupBy={{
format="string",
formattedLength=integer,
label="string",
required parameter name="variable-name",
nfd=integer,
nfl=integer
}, {...}},
groupByMode="NOSORT" | "REDISTRIBUTE",
importOptions={fileType="ANY" | "AUDIO" | "AUTO" | "BASESAS" | "CSV" | "DELIMITED" | "DOCUMENT" | "DTA" | "ESP" | "EXCEL" | "FMT" | "HDAT" | "IMAGE" | "JMP" | "LASR" | "PARQUET" | "SOUND" | "SPSS" | "VIDEO" | "XLS", fileType-specific-parameters},
required parameter name="table-name",
orderBy={{
format="string",
formattedLength=integer,
label="string",
required parameter name="variable-name",
nfd=integer,
nfl=integer
}, {...}},
singlePass=true | false,
vars={{
format="string",
formattedLength=integer,
label="string",
required parameter name="variable-name",
nfd=integer,
nfl=integer
}, {...}},
where="where-expression",
whereTable={
casLib="string"
dataSourceOptions={adls_noreq-parameters | bigquery-parameters | cas_noreq-parameters | clouddex-parameters | db2-parameters | dnfs-parameters | esp-parameters | fedsvr-parameters | gcs_noreq-parameters | hadoop-parameters | hana-parameters | impala-parameters | informix-parameters | jdbc-parameters | mongodb-parameters | mysql-parameters | odbc-parameters | oracle-parameters | path-parameters | postgres-parameters | redshift-parameters | s3-parameters | sapiq-parameters | sforce-parameters | singlestore_standard-parameters | snowflake-parameters | spark-parameters | spde-parameters | sqlserver-parameters | ss_noreq-parameters | teradata-parameters | vertica-parameters | yellowbrick-parameters}
importOptions={fileType="ANY" | "AUDIO" | "AUTO" | "BASESAS" | "CSV" | "DELIMITED" | "DOCUMENT" | "DTA" | "ESP" | "EXCEL" | "FMT" | "HDAT" | "IMAGE" | "JMP" | "LASR" | "PARQUET" | "SOUND" | "SPSS" | "VIDEO" | "XLS", fileType-specific-parameters}
required parameter name="table-name"
vars={{
format="string",
formattedLength=integer,
label="string",
required parameter name="variable-name",
nfd=integer,
nfl=integer
}, {...}}
where="where-expression"
}
}
}
indicates a required parameter

Summary: Input and Output Tables

If a row includes a subparameter, you can specify the name, caslib, and so on in the subparameter. Otherwise, you can specify the name, caslib, and so on in the parameter.

Parameters for Reading Input Tables

Parameter

Subparameter

Description

required parametertable

specifies the settings for an input table.

Parameters for Creating Output Tables

Parameter

Subparameter

Description

 centroids

specifies the output data table in which to save the centroids matrix.

 centroidsPC

specifies the output data table in which to save the centroids matrix of the KPCA principal components.

 eigenVal

specifies the output data table in which to save the eigenVal matrix.

 eigenVec

required parametercasout

specifies the output data table in which to save the eigenVector matrix.

 mapCoeffs

specifies the output data table in which to save the mapping pre-image coefficients matrix.

 output

required parametercasout

specifies the output data table in which to save the score values of the training data.

 outputTables

names

lists the names of results tables to save as CAS tables on the server.

 saveState

specifies the output data table in which to save the state of eigenvector matrix for future scoring.

Parameter Descriptions

attributes={{casinvardesc-1} <, {casinvardesc-2}, ...>}

specifies the variable attributes.

For more information about specifying the attributes parameter, see the common casinvardesc parameter (Appendix A: Common Parameters).

Alias attribute

bwTune={bwTune}

specifies the parameter list for the RBF kernel bandwidth tuning.

The bwTune value can be one or more of the following:

method="RANDOMCMSE" | "SCMSE" | 64-bit-integer

specifies the method to use in bandwidth tuning: the random criterion of maximum sum of eigenvalues method (random CMSE) or the scalable method (SCMSE).

nClus=integer

specifies the number of clusters to use in bandwidth tuning when the tuning method is the scalable criterion of maximum sum of eigenvalues method (SCMSE).

Default 100
nPass=integer

specifies the number of passes to use in bandwidth tuning when the tuning method is the random criterion of maximum sum of eigenvalues method (random CMSE).

Default 10
nSample=integer

specifies the random sample size to use in bandwidth tuning when the tuning method is the random criterion of maximum sum of eigenvalues method (random CMSE).

Default 100
seed=integer

specifies the random seed to use in bandwidth tuning.

center=true | false

when set to True, centers the numeric variables by the mean of each column.

Alias centering
Default false

centroids={casouttable}

specifies the output data table in which to save the centroids matrix.

For more information about specifying the centroids parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

centroidsPC={casouttable}

specifies the output data table in which to save the centroids matrix of the KPCA principal components.

For more information about specifying the centroidsPC parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

clusCCriterion=double

specifies the convergence criterion for the k-means clustering algorithm.

Alias cc
Default 0.02

clusMaxiter=integer

specifies the maximum number of iterations for the k-means clustering algorithm.

Default 50

clusMethod="FC" | "KMPP" | "RANDOM" | 64-bit-integer

specifies the initial centroid type in k-means clustering.

Default KMPP
FC specifies fast clustering as the initial centroid type.
KMPP specifies k-means plus plus as the initial centroid type.
RANDOM specifies randomization as the initial centroid type.

clusNrestart=integer

specifies the number of restarts when the initial centroids are selected by random.

Default 5

clusRandSeed=integer

specifies the random seed to use in initial centroid selection.

display={displayTables}

specifies a list of results tables to send to the client for display.

For more information about specifying the display parameter, see the common displayTables parameter (Appendix A: Common Parameters).

eigenVal={casouttable}

specifies the output data table in which to save the eigenVal matrix.

For more information about specifying the eigenVal parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

eigenVec={eigOutput}

specifies the output data table in which to save the eigenVector matrix.

The eigOutput value can be one or more of the following:

* casout={casouttable}

specifies the output eigenvector table.

For more information about specifying the casout parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

copyVars={"variable-name-1" <, "variable-name-2", ...>}

copies one or more variables from the input table to the output table.

Alias copyVar

exactScore=true | false

when set to False and the training method is low-rank approximation, implements the fast scoring method.

Alias ES
Default false

id={"variable-name-1" <, "variable-name-2", ...>}

specifies the variable to use as the record identifier.

inputs={{casinvardesc-1} <, {casinvardesc-2}, ...>}

specifies the variables to use in the analysis.

For more information about specifying the inputs parameter, see the common casinvardesc parameter (Appendix A: Common Parameters).

Alias input

intercept=double

specifies the constant term in the polynomial kernel.

Alias coefficient0
Default 1

kerParam=double

specifies the kernel parameter.

kerType="LINEAR" | "POLYNOMIAL" | "RBF" | 64-bit-integer

specifies the kernel type to use for kernel principal component analysis. "RBF" indicates the radial basis function type.

Aliases kernel
kernelType

mapCoeffs={casouttable}

specifies the output data table in which to save the mapping pre-image coefficients matrix.

For more information about specifying the mapCoeffs parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

mapParam={mapParm}

specifies the parameter list in the mapping pre-image method.

The mapParm value can be one or more of the following:

intercept=double

specifies the intercept parameter of the polynomial kernel for kernel ridge regression to use in the mapping pre-image method.

Default 1
kerParam=double

specifies the kernel parameter for kernel ridge regression to use in the mapping method.

kerType="LINEAR" | "POLYNOMIAL" | "RBF" | 64-bit-integer

specifies the kernel type for kernel ridge regression to use in the mapping method.

lambda=double

specifies the lambda parameter for the L2 regularization term for kernel ridge regression to use in the mapping pre-image method.

Alias L2
Default 1

maxClus=integer

specifies the maximum number of clusters to use in k-means clustering.

Alias maxc
Default 100

method="APPROXIMATE" | "EXACT" | 64-bit-integer

specifies the computation method to use for kernel principal component analysis.

order=true | false

when set to True, an order is considered for the input dataset based on the KPCA_ROWID variable (or the ROWID variable in the absence of KPCA_ROWID variable) to generate consistent results when data is distributed on multiple grid nodes.

Alias sort
Default false

output={kpcaOutputStatement}

specifies the output data table in which to save the score values of the training data.

The kpcaOutputStatement value can be one or more of the following:

* casout={casouttable}

specifies the output scoring table for training data.

For more information about specifying the casout parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

copyVars={"variable-name-1" <, "variable-name-2", ...>}

copies one or more variables from the input table to the output table.

Alias copyVar
npc=integer

specifies the number of principal components to use in scoring the training data.

Default 4

outputTables={outputTables}

lists the names of results tables to save as CAS tables on the server.

For more information about specifying the outputTables parameter, see the common outputTables parameter (Appendix A: Common Parameters).

Alias DISPLAYOUT

preimage=true | false

when set to True, implements pre-image training.

Alias pre
Default false

preimageMethod="ITERATIVE" | "MAP" | 64-bit-integer

specifies the method to use in pre-image training.

Alias preMethod
Default ITERATIVE
ITERATIVE uses the iterative method to calculate the pre-image.
MAP uses the mapping method to calculate the pre-image.

preimageNPC=integer

specifies the number of principal components to use in pre-image training. For the mapping method, this is also the number of principal components to use in pre-image scoring.

Alias preNPC
Default 4

rankThreshold=double

specifies the eigenvalue threshold to use for determining the kernel matrix rank.

Alias threshold
Default 1E-08

saveState={casouttable}

specifies the output data table in which to save the state of eigenvector matrix for future scoring.

For more information about specifying the saveState parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

scale=true | false

when set to True, scales the numeric variables by the standard deviation of each column.

Alias scaling
Default false

* table={castable}

specifies the settings for an input table.

For more information about specifying the table parameter, see the common castable (Form 1) parameter (Appendix A: Common Parameters).

kPca Action

Performs kernel PCA training.

Python Syntax

results=s.kernelPca.kPca(
attributes=[{
"format":"string",
"formattedLength":integer,
"label":"string",
required parameter "name":"variable-name",
"nfd":integer,
"nfl":integer
}<, {...}>],
bwTune={
"method":"RANDOMCMSE" | "SCMSE" | 64-bit-integer,
"nClus":integer,
"nPass":integer,
"nSample":integer,
"seed":integer
},
center=True | False,
centroids={
"caslib":"string",
"compress":True | False,
"indexVars":["variable-name-1" <, "variable-name-2", ...>],
"label":"string",
"lifetime":64-bit-integer,
"maxMemSize":64-bit-integer,
"memoryFormat":"DVR" | "INHERIT" | "STANDARD",
"name":"table-name",
"promote":True | False,
"replace":True | False,
"replication":integer,
"tableRedistUpPolicy":"DEFER" | "NOREDIST" | "REBALANCE",
"threadBlockSize":64-bit-integer,
"timeStamp":"string",
"where":["string-1" <, "string-2", ...>]
},
centroidsPC={
"caslib":"string",
"compress":True | False,
"indexVars":["variable-name-1" <, "variable-name-2", ...>],
"label":"string",
"lifetime":64-bit-integer,
"maxMemSize":64-bit-integer,
"memoryFormat":"DVR" | "INHERIT" | "STANDARD",
"name":"table-name",
"promote":True | False,
"replace":True | False,
"replication":integer,
"tableRedistUpPolicy":"DEFER" | "NOREDIST" | "REBALANCE",
"threadBlockSize":64-bit-integer,
"timeStamp":"string",
"where":["string-1" <, "string-2", ...>]
},
clusMaxiter=integer,
clusMethod="FC" | "KMPP" | "RANDOM" | 64-bit-integer,
clusNrestart=integer,
clusRandSeed=integer,
display={
"caseSensitive":True | False,
"exclude":True | False,
"excludeAll":True | False,
"keyIsPath":True | False,
"names":["string-1" <, "string-2", ...>],
"pathType":"LABEL" | "NAME",
"traceNames":True | False
},
eigenVal={
"caslib":"string",
"compress":True | False,
"indexVars":["variable-name-1" <, "variable-name-2", ...>],
"label":"string",
"lifetime":64-bit-integer,
"maxMemSize":64-bit-integer,
"memoryFormat":"DVR" | "INHERIT" | "STANDARD",
"name":"table-name",
"promote":True | False,
"replace":True | False,
"replication":integer,
"tableRedistUpPolicy":"DEFER" | "NOREDIST" | "REBALANCE",
"threadBlockSize":64-bit-integer,
"timeStamp":"string",
"where":["string-1" <, "string-2", ...>]
},
eigenVec={
required parameter "casout":{
"caslib":"string"
"compress":True | False
"indexVars":["variable-name-1" <, "variable-name-2", ...>]
"label":"string"
"lifetime":64-bit-integer
"maxMemSize":64-bit-integer
"memoryFormat":"DVR" | "INHERIT" | "STANDARD"
"name":"table-name"
"promote":True | False
"replace":True | False
"replication":integer
"tableRedistUpPolicy":"DEFER" | "NOREDIST" | "REBALANCE"
"threadBlockSize":64-bit-integer
"timeStamp":"string"
"where":["string-1" <, "string-2", ...>]
},
"copyVars":["variable-name-1" <, "variable-name-2", ...>]
},
exactScore=True | False,
id=["variable-name-1" <, "variable-name-2", ...>],
inputs=[{
"format":"string",
"formattedLength":integer,
"label":"string",
required parameter "name":"variable-name",
"nfd":integer,
"nfl":integer
}<, {...}>],
intercept=double,
kerParam=double,
kerType="LINEAR" | "POLYNOMIAL" | "RBF" | 64-bit-integer,
mapCoeffs={
"caslib":"string",
"compress":True | False,
"indexVars":["variable-name-1" <, "variable-name-2", ...>],
"label":"string",
"lifetime":64-bit-integer,
"maxMemSize":64-bit-integer,
"memoryFormat":"DVR" | "INHERIT" | "STANDARD",
"name":"table-name",
"promote":True | False,
"replace":True | False,
"replication":integer,
"tableRedistUpPolicy":"DEFER" | "NOREDIST" | "REBALANCE",
"threadBlockSize":64-bit-integer,
"timeStamp":"string",
"where":["string-1" <, "string-2", ...>]
},
mapParam={
"intercept":double,
"kerParam":double,
"kerType":"LINEAR" | "POLYNOMIAL" | "RBF" | 64-bit-integer,
"lambda_":double
},
maxClus=integer,
method="APPROXIMATE" | "EXACT" | 64-bit-integer,
order=True | False,
output={
required parameter "casout":{
"caslib":"string"
"compress":True | False
"indexVars":["variable-name-1" <, "variable-name-2", ...>]
"label":"string"
"lifetime":64-bit-integer
"maxMemSize":64-bit-integer
"memoryFormat":"DVR" | "INHERIT" | "STANDARD"
"name":"table-name"
"promote":True | False
"replace":True | False
"replication":integer
"tableRedistUpPolicy":"DEFER" | "NOREDIST" | "REBALANCE"
"threadBlockSize":64-bit-integer
"timeStamp":"string"
"where":["string-1" <, "string-2", ...>]
},
"copyVars":["variable-name-1" <, "variable-name-2", ...>],
"npc":integer
},
outputTables={
"groupByVarsRaw":True | False,
"includeAll":True | False,
"names":["string-1" <, "string-2", ...>] | {"key-1":{casouttable-1} <, "key-2":{casouttable-2}, ...>},
"repeated":True | False,
"replace":True | False
},
preimage=True | False,
preimageMethod="ITERATIVE" | "MAP" | 64-bit-integer,
preimageNPC=integer,
rankThreshold=double,
saveState={
"caslib":"string",
"compress":True | False,
"indexVars":["variable-name-1" <, "variable-name-2", ...>],
"label":"string",
"lifetime":64-bit-integer,
"maxMemSize":64-bit-integer,
"memoryFormat":"DVR" | "INHERIT" | "STANDARD",
"name":"table-name",
"promote":True | False,
"replace":True | False,
"replication":integer,
"tableRedistUpPolicy":"DEFER" | "NOREDIST" | "REBALANCE",
"threadBlockSize":64-bit-integer,
"timeStamp":"string",
"where":["string-1" <, "string-2", ...>]
},
scale=True | False,
required parameter table={
"caslib":"string",
"computedOnDemand":True | False,
"computedVars":[{
"format":"string",
"formattedLength":integer,
"label":"string",
required parameter "name":"variable-name",
"nfd":integer,
"nfl":integer
}<, {...}>],
"computedVarsProgram":"string",
"dataSourceOptions":{"key-1":{any-list-or-data-type-1} <, "key-2":{any-list-or-data-type-2}, ...>},
"groupBy":[{
"format":"string",
"formattedLength":integer,
"label":"string",
required parameter "name":"variable-name",
"nfd":integer,
"nfl":integer
}<, {...}>],
"groupByMode":"NOSORT" | "REDISTRIBUTE",
"importOptions":{"fileType":"ANY" | "AUDIO" | "AUTO" | "BASESAS" | "CSV" | "DELIMITED" | "DOCUMENT" | "DTA" | "ESP" | "EXCEL" | "FMT" | "HDAT" | "IMAGE" | "JMP" | "LASR" | "PARQUET" | "SOUND" | "SPSS" | "VIDEO" | "XLS", fileType-specific-parameters},
required parameter "name":"table-name",
"orderBy":[{
"format":"string",
"formattedLength":integer,
"label":"string",
required parameter "name":"variable-name",
"nfd":integer,
"nfl":integer
}<, {...}>],
"singlePass":True | False,
"vars":[{
"format":"string",
"formattedLength":integer,
"label":"string",
required parameter "name":"variable-name",
"nfd":integer,
"nfl":integer
}<, {...}>],
"where":"where-expression",
"whereTable":{
"casLib":"string"
"dataSourceOptions":{adls_noreq-parameters | bigquery-parameters | cas_noreq-parameters | clouddex-parameters | db2-parameters | dnfs-parameters | esp-parameters | fedsvr-parameters | gcs_noreq-parameters | hadoop-parameters | hana-parameters | impala-parameters | informix-parameters | jdbc-parameters | mongodb-parameters | mysql-parameters | odbc-parameters | oracle-parameters | path-parameters | postgres-parameters | redshift-parameters | s3-parameters | sapiq-parameters | sforce-parameters | singlestore_standard-parameters | snowflake-parameters | spark-parameters | spde-parameters | sqlserver-parameters | ss_noreq-parameters | teradata-parameters | vertica-parameters | yellowbrick-parameters}
"importOptions":{"fileType":"ANY" | "AUDIO" | "AUTO" | "BASESAS" | "CSV" | "DELIMITED" | "DOCUMENT" | "DTA" | "ESP" | "EXCEL" | "FMT" | "HDAT" | "IMAGE" | "JMP" | "LASR" | "PARQUET" | "SOUND" | "SPSS" | "VIDEO" | "XLS", fileType-specific-parameters}
required parameter "name":"table-name"
"vars":[{
"format":"string",
"formattedLength":integer,
"label":"string",
required parameter "name":"variable-name",
"nfd":integer,
"nfl":integer
}<, {...}>]
"where":"where-expression"
}
}
)
indicates a required parameter

Summary: Input and Output Tables

If a row includes a subparameter, you can specify the name, caslib, and so on in the subparameter. Otherwise, you can specify the name, caslib, and so on in the parameter.

Parameters for Reading Input Tables

Parameter

Subparameter

Description

required parametertable

specifies the settings for an input table.

Parameters for Creating Output Tables

Parameter

Subparameter

Description

 centroids

specifies the output data table in which to save the centroids matrix.

 centroidsPC

specifies the output data table in which to save the centroids matrix of the KPCA principal components.

 eigenVal

specifies the output data table in which to save the eigenVal matrix.

 eigenVec

required parametercasout

specifies the output data table in which to save the eigenVector matrix.

 mapCoeffs

specifies the output data table in which to save the mapping pre-image coefficients matrix.

 output

required parametercasout

specifies the output data table in which to save the score values of the training data.

 outputTables

names

lists the names of results tables to save as CAS tables on the server.

 saveState

specifies the output data table in which to save the state of eigenvector matrix for future scoring.

Parameter Descriptions

attributes=[{casinvardesc-1} <, {casinvardesc-2}, ...>]

specifies the variable attributes.

For more information about specifying the attributes parameter, see the common casinvardesc parameter (Appendix A: Common Parameters).

Alias attribute

bwTune={bwTune}

specifies the parameter list for the RBF kernel bandwidth tuning.

The bwTune value can be one or more of the following:

"method":"RANDOMCMSE" | "SCMSE" | 64-bit-integer

specifies the method to use in bandwidth tuning: the random criterion of maximum sum of eigenvalues method (random CMSE) or the scalable method (SCMSE).

"nClus":integer

specifies the number of clusters to use in bandwidth tuning when the tuning method is the scalable criterion of maximum sum of eigenvalues method (SCMSE).

Default 100
"nPass":integer

specifies the number of passes to use in bandwidth tuning when the tuning method is the random criterion of maximum sum of eigenvalues method (random CMSE).

Default 10
"nSample":integer

specifies the random sample size to use in bandwidth tuning when the tuning method is the random criterion of maximum sum of eigenvalues method (random CMSE).

Default 100
"seed":integer

specifies the random seed to use in bandwidth tuning.

center=True | False

when set to True, centers the numeric variables by the mean of each column.

Alias centering
Default False

centroids={casouttable}

specifies the output data table in which to save the centroids matrix.

For more information about specifying the centroids parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

centroidsPC={casouttable}

specifies the output data table in which to save the centroids matrix of the KPCA principal components.

For more information about specifying the centroidsPC parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

clusCCriterion=double

specifies the convergence criterion for the k-means clustering algorithm.

Alias cc
Default 0.02

clusMaxiter=integer

specifies the maximum number of iterations for the k-means clustering algorithm.

Default 50

clusMethod="FC" | "KMPP" | "RANDOM" | 64-bit-integer

specifies the initial centroid type in k-means clustering.

Default KMPP
FC specifies fast clustering as the initial centroid type.
KMPP specifies k-means plus plus as the initial centroid type.
RANDOM specifies randomization as the initial centroid type.

clusNrestart=integer

specifies the number of restarts when the initial centroids are selected by random.

Default 5

clusRandSeed=integer

specifies the random seed to use in initial centroid selection.

display={displayTables}

specifies a list of results tables to send to the client for display.

For more information about specifying the display parameter, see the common displayTables parameter (Appendix A: Common Parameters).

eigenVal={casouttable}

specifies the output data table in which to save the eigenVal matrix.

For more information about specifying the eigenVal parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

eigenVec={eigOutput}

specifies the output data table in which to save the eigenVector matrix.

The eigOutput value can be one or more of the following:

* "casout":{casouttable}

specifies the output eigenvector table.

For more information about specifying the casout parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

"copyVars":["variable-name-1" <, "variable-name-2", ...>]

copies one or more variables from the input table to the output table.

Alias copyVar

exactScore=True | False

when set to False and the training method is low-rank approximation, implements the fast scoring method.

Alias ES
Default False

id=["variable-name-1" <, "variable-name-2", ...>]

specifies the variable to use as the record identifier.

inputs=[{casinvardesc-1} <, {casinvardesc-2}, ...>]

specifies the variables to use in the analysis.

For more information about specifying the inputs parameter, see the common casinvardesc parameter (Appendix A: Common Parameters).

Alias input

intercept=double

specifies the constant term in the polynomial kernel.

Alias coefficient0
Default 1

kerParam=double

specifies the kernel parameter.

kerType="LINEAR" | "POLYNOMIAL" | "RBF" | 64-bit-integer

specifies the kernel type to use for kernel principal component analysis. "RBF" indicates the radial basis function type.

Aliases kernel
kernelType

mapCoeffs={casouttable}

specifies the output data table in which to save the mapping pre-image coefficients matrix.

For more information about specifying the mapCoeffs parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

mapParam={mapParm}

specifies the parameter list in the mapping pre-image method.

The mapParm value can be one or more of the following:

"intercept":double

specifies the intercept parameter of the polynomial kernel for kernel ridge regression to use in the mapping pre-image method.

Default 1
"kerParam":double

specifies the kernel parameter for kernel ridge regression to use in the mapping method.

"kerType":"LINEAR" | "POLYNOMIAL" | "RBF" | 64-bit-integer

specifies the kernel type for kernel ridge regression to use in the mapping method.

"lambda_":double

specifies the lambda parameter for the L2 regularization term for kernel ridge regression to use in the mapping pre-image method.

Alias L2
Default 1

maxClus=integer

specifies the maximum number of clusters to use in k-means clustering.

Alias maxc
Default 100

method="APPROXIMATE" | "EXACT" | 64-bit-integer

specifies the computation method to use for kernel principal component analysis.

order=True | False

when set to True, an order is considered for the input dataset based on the KPCA_ROWID variable (or the ROWID variable in the absence of KPCA_ROWID variable) to generate consistent results when data is distributed on multiple grid nodes.

Alias sort
Default False

output={kpcaOutputStatement}

specifies the output data table in which to save the score values of the training data.

The kpcaOutputStatement value can be one or more of the following:

* "casout":{casouttable}

specifies the output scoring table for training data.

For more information about specifying the casout parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

"copyVars":["variable-name-1" <, "variable-name-2", ...>]

copies one or more variables from the input table to the output table.

Alias copyVar
"npc":integer

specifies the number of principal components to use in scoring the training data.

Default 4

outputTables={outputTables}

lists the names of results tables to save as CAS tables on the server.

For more information about specifying the outputTables parameter, see the common outputTables parameter (Appendix A: Common Parameters).

Alias DISPLAYOUT

preimage=True | False

when set to True, implements pre-image training.

Alias pre
Default False

preimageMethod="ITERATIVE" | "MAP" | 64-bit-integer

specifies the method to use in pre-image training.

Alias preMethod
Default ITERATIVE
ITERATIVE uses the iterative method to calculate the pre-image.
MAP uses the mapping method to calculate the pre-image.

preimageNPC=integer

specifies the number of principal components to use in pre-image training. For the mapping method, this is also the number of principal components to use in pre-image scoring.

Alias preNPC
Default 4

rankThreshold=double

specifies the eigenvalue threshold to use for determining the kernel matrix rank.

Alias threshold
Default 1E-08

saveState={casouttable}

specifies the output data table in which to save the state of eigenvector matrix for future scoring.

For more information about specifying the saveState parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

scale=True | False

when set to True, scales the numeric variables by the standard deviation of each column.

Alias scaling
Default False

* table={castable}

specifies the settings for an input table.

For more information about specifying the table parameter, see the common castable (Form 1) parameter (Appendix A: Common Parameters).

kPca Action

Performs kernel PCA training.

R Syntax

results <– cas.kernelPca.kPca(s,
attributes=list( list(
format="string",
formattedLength=integer,
label="string",
required parameter name="variable-name",
nfd=integer,
nfl=integer
) <, list(...)>),
bwTune=list(
method="RANDOMCMSE" | "SCMSE" | 64-bit-integer,
nClus=integer,
nPass=integer,
nSample=integer,
seed=integer
),
center=TRUE | FALSE,
centroids=list(
caslib="string",
compress=TRUE | FALSE,
indexVars=list("variable-name-1" <, "variable-name-2", ...>),
label="string",
lifetime=64-bit-integer,
maxMemSize=64-bit-integer,
memoryFormat="DVR" | "INHERIT" | "STANDARD",
name="table-name",
promote=TRUE | FALSE,
replace=TRUE | FALSE,
replication=integer,
tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",
threadBlockSize=64-bit-integer,
timeStamp="string",
where=list("string-1" <, "string-2", ...>)
),
centroidsPC=list(
caslib="string",
compress=TRUE | FALSE,
indexVars=list("variable-name-1" <, "variable-name-2", ...>),
label="string",
lifetime=64-bit-integer,
maxMemSize=64-bit-integer,
memoryFormat="DVR" | "INHERIT" | "STANDARD",
name="table-name",
promote=TRUE | FALSE,
replace=TRUE | FALSE,
replication=integer,
tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",
threadBlockSize=64-bit-integer,
timeStamp="string",
where=list("string-1" <, "string-2", ...>)
),
clusMaxiter=integer,
clusMethod="FC" | "KMPP" | "RANDOM" | 64-bit-integer,
clusNrestart=integer,
clusRandSeed=integer,
display=list(
caseSensitive=TRUE | FALSE,
exclude=TRUE | FALSE,
excludeAll=TRUE | FALSE,
keyIsPath=TRUE | FALSE,
names=list("string-1" <, "string-2", ...>),
pathType="LABEL" | "NAME",
traceNames=TRUE | FALSE
),
eigenVal=list(
caslib="string",
compress=TRUE | FALSE,
indexVars=list("variable-name-1" <, "variable-name-2", ...>),
label="string",
lifetime=64-bit-integer,
maxMemSize=64-bit-integer,
memoryFormat="DVR" | "INHERIT" | "STANDARD",
name="table-name",
promote=TRUE | FALSE,
replace=TRUE | FALSE,
replication=integer,
tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",
threadBlockSize=64-bit-integer,
timeStamp="string",
where=list("string-1" <, "string-2", ...>)
),
eigenVec=list(
required parameter casout=list(
caslib="string"
compress=TRUE | FALSE
indexVars=list("variable-name-1" <, "variable-name-2", ...>)
label="string"
lifetime=64-bit-integer
maxMemSize=64-bit-integer
memoryFormat="DVR" | "INHERIT" | "STANDARD"
name="table-name"
promote=TRUE | FALSE
replace=TRUE | FALSE
replication=integer
tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"
threadBlockSize=64-bit-integer
timeStamp="string"
where=list("string-1" <, "string-2", ...>)
),
copyVars=list("variable-name-1" <, "variable-name-2", ...>)
),
exactScore=TRUE | FALSE,
id=list("variable-name-1" <, "variable-name-2", ...>),
inputs=list( list(
format="string",
formattedLength=integer,
label="string",
required parameter name="variable-name",
nfd=integer,
nfl=integer
) <, list(...)>),
intercept=double,
kerParam=double,
kerType="LINEAR" | "POLYNOMIAL" | "RBF" | 64-bit-integer,
mapCoeffs=list(
caslib="string",
compress=TRUE | FALSE,
indexVars=list("variable-name-1" <, "variable-name-2", ...>),
label="string",
lifetime=64-bit-integer,
maxMemSize=64-bit-integer,
memoryFormat="DVR" | "INHERIT" | "STANDARD",
name="table-name",
promote=TRUE | FALSE,
replace=TRUE | FALSE,
replication=integer,
tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",
threadBlockSize=64-bit-integer,
timeStamp="string",
where=list("string-1" <, "string-2", ...>)
),
mapParam=list(
intercept=double,
kerParam=double,
kerType="LINEAR" | "POLYNOMIAL" | "RBF" | 64-bit-integer,
lambda=double
),
maxClus=integer,
method="APPROXIMATE" | "EXACT" | 64-bit-integer,
order=TRUE | FALSE,
output=list(
required parameter casout=list(
caslib="string"
compress=TRUE | FALSE
indexVars=list("variable-name-1" <, "variable-name-2", ...>)
label="string"
lifetime=64-bit-integer
maxMemSize=64-bit-integer
memoryFormat="DVR" | "INHERIT" | "STANDARD"
name="table-name"
promote=TRUE | FALSE
replace=TRUE | FALSE
replication=integer
tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"
threadBlockSize=64-bit-integer
timeStamp="string"
where=list("string-1" <, "string-2", ...>)
),
copyVars=list("variable-name-1" <, "variable-name-2", ...>),
npc=integer
),
outputTables=list(
groupByVarsRaw=TRUE | FALSE,
includeAll=TRUE | FALSE,
names=list("string-1" <, "string-2", ...>) | list(key-1=list(casouttable-1) <, key-2=list(casouttable-2), ...>),
repeated=TRUE | FALSE,
replace=TRUE | FALSE
),
preimage=TRUE | FALSE,
preimageMethod="ITERATIVE" | "MAP" | 64-bit-integer,
preimageNPC=integer,
rankThreshold=double,
saveState=list(
caslib="string",
compress=TRUE | FALSE,
indexVars=list("variable-name-1" <, "variable-name-2", ...>),
label="string",
lifetime=64-bit-integer,
maxMemSize=64-bit-integer,
memoryFormat="DVR" | "INHERIT" | "STANDARD",
name="table-name",
promote=TRUE | FALSE,
replace=TRUE | FALSE,
replication=integer,
tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",
threadBlockSize=64-bit-integer,
timeStamp="string",
where=list("string-1" <, "string-2", ...>)
),
scale=TRUE | FALSE,
required parameter table=list(
caslib="string",
computedOnDemand=TRUE | FALSE,
computedVars=list( list(
format="string",
formattedLength=integer,
label="string",
required parameter name="variable-name",
nfd=integer,
nfl=integer
) <, list(...)>),
computedVarsProgram="string",
dataSourceOptions=list(key-1=list(any-list-or-data-type-1) <, key-2=list(any-list-or-data-type-2), ...>),
groupBy=list( list(
format="string",
formattedLength=integer,
label="string",
required parameter name="variable-name",
nfd=integer,
nfl=integer
) <, list(...)>),
groupByMode="NOSORT" | "REDISTRIBUTE",
importOptions=list(fileType="ANY" | "AUDIO" | "AUTO" | "BASESAS" | "CSV" | "DELIMITED" | "DOCUMENT" | "DTA" | "ESP" | "EXCEL" | "FMT" | "HDAT" | "IMAGE" | "JMP" | "LASR" | "PARQUET" | "SOUND" | "SPSS" | "VIDEO" | "XLS", fileType-specific-parameters),
required parameter name="table-name",
orderBy=list( list(
format="string",
formattedLength=integer,
label="string",
required parameter name="variable-name",
nfd=integer,
nfl=integer
) <, list(...)>),
singlePass=TRUE | FALSE,
vars=list( list(
format="string",
formattedLength=integer,
label="string",
required parameter name="variable-name",
nfd=integer,
nfl=integer
) <, list(...)>),
where="where-expression",
whereTable=list(
casLib="string"
dataSourceOptions=list(adls_noreq-parameters | bigquery-parameters | cas_noreq-parameters | clouddex-parameters | db2-parameters | dnfs-parameters | esp-parameters | fedsvr-parameters | gcs_noreq-parameters | hadoop-parameters | hana-parameters | impala-parameters | informix-parameters | jdbc-parameters | mongodb-parameters | mysql-parameters | odbc-parameters | oracle-parameters | path-parameters | postgres-parameters | redshift-parameters | s3-parameters | sapiq-parameters | sforce-parameters | singlestore_standard-parameters | snowflake-parameters | spark-parameters | spde-parameters | sqlserver-parameters | ss_noreq-parameters | teradata-parameters | vertica-parameters | yellowbrick-parameters)
importOptions=list(fileType="ANY" | "AUDIO" | "AUTO" | "BASESAS" | "CSV" | "DELIMITED" | "DOCUMENT" | "DTA" | "ESP" | "EXCEL" | "FMT" | "HDAT" | "IMAGE" | "JMP" | "LASR" | "PARQUET" | "SOUND" | "SPSS" | "VIDEO" | "XLS", fileType-specific-parameters)
required parameter name="table-name"
vars=list( list(
format="string",
formattedLength=integer,
label="string",
required parameter name="variable-name",
nfd=integer,
nfl=integer
) <, list(...)>)
where="where-expression"
)
)
)
indicates a required parameter

Summary: Input and Output Tables

If a row includes a subparameter, you can specify the name, caslib, and so on in the subparameter. Otherwise, you can specify the name, caslib, and so on in the parameter.

Parameters for Reading Input Tables

Parameter

Subparameter

Description

required parametertable

specifies the settings for an input table.

Parameters for Creating Output Tables

Parameter

Subparameter

Description

 centroids

specifies the output data table in which to save the centroids matrix.

 centroidsPC

specifies the output data table in which to save the centroids matrix of the KPCA principal components.

 eigenVal

specifies the output data table in which to save the eigenVal matrix.

 eigenVec

required parametercasout

specifies the output data table in which to save the eigenVector matrix.

 mapCoeffs

specifies the output data table in which to save the mapping pre-image coefficients matrix.

 output

required parametercasout

specifies the output data table in which to save the score values of the training data.

 outputTables

names

lists the names of results tables to save as CAS tables on the server.

 saveState

specifies the output data table in which to save the state of eigenvector matrix for future scoring.

Parameter Descriptions

attributes=list( list(casinvardesc-1) <, list(casinvardesc-2), ...>)

specifies the variable attributes.

For more information about specifying the attributes parameter, see the common casinvardesc parameter (Appendix A: Common Parameters).

Alias attribute

bwTune=list(bwTune)

specifies the parameter list for the RBF kernel bandwidth tuning.

The bwTune value can be one or more of the following:

method="RANDOMCMSE" | "SCMSE" | 64-bit-integer

specifies the method to use in bandwidth tuning: the random criterion of maximum sum of eigenvalues method (random CMSE) or the scalable method (SCMSE).

nClus=integer

specifies the number of clusters to use in bandwidth tuning when the tuning method is the scalable criterion of maximum sum of eigenvalues method (SCMSE).

Default 100
nPass=integer

specifies the number of passes to use in bandwidth tuning when the tuning method is the random criterion of maximum sum of eigenvalues method (random CMSE).

Default 10
nSample=integer

specifies the random sample size to use in bandwidth tuning when the tuning method is the random criterion of maximum sum of eigenvalues method (random CMSE).

Default 100
seed=integer

specifies the random seed to use in bandwidth tuning.

center=TRUE | FALSE

when set to True, centers the numeric variables by the mean of each column.

Alias centering
Default FALSE

centroids=list(casouttable)

specifies the output data table in which to save the centroids matrix.

For more information about specifying the centroids parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

centroidsPC=list(casouttable)

specifies the output data table in which to save the centroids matrix of the KPCA principal components.

For more information about specifying the centroidsPC parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

clusCCriterion=double

specifies the convergence criterion for the k-means clustering algorithm.

Alias cc
Default 0.02

clusMaxiter=integer

specifies the maximum number of iterations for the k-means clustering algorithm.

Default 50

clusMethod="FC" | "KMPP" | "RANDOM" | 64-bit-integer

specifies the initial centroid type in k-means clustering.

Default KMPP
FC specifies fast clustering as the initial centroid type.
KMPP specifies k-means plus plus as the initial centroid type.
RANDOM specifies randomization as the initial centroid type.

clusNrestart=integer

specifies the number of restarts when the initial centroids are selected by random.

Default 5

clusRandSeed=integer

specifies the random seed to use in initial centroid selection.

display=list(displayTables)

specifies a list of results tables to send to the client for display.

For more information about specifying the display parameter, see the common displayTables parameter (Appendix A: Common Parameters).

eigenVal=list(casouttable)

specifies the output data table in which to save the eigenVal matrix.

For more information about specifying the eigenVal parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

eigenVec=list(eigOutput)

specifies the output data table in which to save the eigenVector matrix.

The eigOutput value can be one or more of the following:

* casout=list(casouttable)

specifies the output eigenvector table.

For more information about specifying the casout parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

copyVars=list("variable-name-1" <, "variable-name-2", ...>)

copies one or more variables from the input table to the output table.

Alias copyVar

exactScore=TRUE | FALSE

when set to False and the training method is low-rank approximation, implements the fast scoring method.

Alias ES
Default FALSE

id=list("variable-name-1" <, "variable-name-2", ...>)

specifies the variable to use as the record identifier.

inputs=list( list(casinvardesc-1) <, list(casinvardesc-2), ...>)

specifies the variables to use in the analysis.

For more information about specifying the inputs parameter, see the common casinvardesc parameter (Appendix A: Common Parameters).

Alias input

intercept=double

specifies the constant term in the polynomial kernel.

Alias coefficient0
Default 1

kerParam=double

specifies the kernel parameter.

kerType="LINEAR" | "POLYNOMIAL" | "RBF" | 64-bit-integer

specifies the kernel type to use for kernel principal component analysis. "RBF" indicates the radial basis function type.

Aliases kernel
kernelType

mapCoeffs=list(casouttable)

specifies the output data table in which to save the mapping pre-image coefficients matrix.

For more information about specifying the mapCoeffs parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

mapParam=list(mapParm)

specifies the parameter list in the mapping pre-image method.

The mapParm value can be one or more of the following:

intercept=double

specifies the intercept parameter of the polynomial kernel for kernel ridge regression to use in the mapping pre-image method.

Default 1
kerParam=double

specifies the kernel parameter for kernel ridge regression to use in the mapping method.

kerType="LINEAR" | "POLYNOMIAL" | "RBF" | 64-bit-integer

specifies the kernel type for kernel ridge regression to use in the mapping method.

lambda=double

specifies the lambda parameter for the L2 regularization term for kernel ridge regression to use in the mapping pre-image method.

Alias L2
Default 1

maxClus=integer

specifies the maximum number of clusters to use in k-means clustering.

Alias maxc
Default 100

method="APPROXIMATE" | "EXACT" | 64-bit-integer

specifies the computation method to use for kernel principal component analysis.

order=TRUE | FALSE

when set to True, an order is considered for the input dataset based on the KPCA_ROWID variable (or the ROWID variable in the absence of KPCA_ROWID variable) to generate consistent results when data is distributed on multiple grid nodes.

Alias sort
Default FALSE

output=list(kpcaOutputStatement)

specifies the output data table in which to save the score values of the training data.

The kpcaOutputStatement value can be one or more of the following:

* casout=list(casouttable)

specifies the output scoring table for training data.

For more information about specifying the casout parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

copyVars=list("variable-name-1" <, "variable-name-2", ...>)

copies one or more variables from the input table to the output table.

Alias copyVar
npc=integer

specifies the number of principal components to use in scoring the training data.

Default 4

outputTables=list(outputTables)

lists the names of results tables to save as CAS tables on the server.

For more information about specifying the outputTables parameter, see the common outputTables parameter (Appendix A: Common Parameters).

Alias DISPLAYOUT

preimage=TRUE | FALSE

when set to True, implements pre-image training.

Alias pre
Default FALSE

preimageMethod="ITERATIVE" | "MAP" | 64-bit-integer

specifies the method to use in pre-image training.

Alias preMethod
Default ITERATIVE
ITERATIVE uses the iterative method to calculate the pre-image.
MAP uses the mapping method to calculate the pre-image.

preimageNPC=integer

specifies the number of principal components to use in pre-image training. For the mapping method, this is also the number of principal components to use in pre-image scoring.

Alias preNPC
Default 4

rankThreshold=double

specifies the eigenvalue threshold to use for determining the kernel matrix rank.

Alias threshold
Default 1E-08

saveState=list(casouttable)

specifies the output data table in which to save the state of eigenvector matrix for future scoring.

For more information about specifying the saveState parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

scale=TRUE | FALSE

when set to True, scales the numeric variables by the standard deviation of each column.

Alias scaling
Default FALSE

* table=list(castable)

specifies the settings for an input table.

For more information about specifying the table parameter, see the common castable (Form 1) parameter (Appendix A: Common Parameters).

Last updated: November 23, 2025