| 1 | # Modeling.py - Provides methods for modeling and prediction.
|
|---|
| 2 | #
|
|---|
| 3 | # Copyright (C) 2007 Jason J. Roberts and Ben D. Best
|
|---|
| 4 | #
|
|---|
| 5 | # This program is free software; you can redistribute it and/or
|
|---|
| 6 | # modify it under the terms of the GNU General Public License
|
|---|
| 7 | # as published by the Free Software Foundation; either version 2
|
|---|
| 8 | # of the License, or (at your option) any later version.
|
|---|
| 9 | #
|
|---|
| 10 | # This program is distributed in the hope that it will be useful,
|
|---|
| 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|---|
| 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|---|
| 13 | # GNU General Public License (available in the file LICENSE.TXT)
|
|---|
| 14 | # for more details.
|
|---|
| 15 | #
|
|---|
| 16 | # You should have received a copy of the GNU General Public License
|
|---|
| 17 | # along with this program; if not, write to the Free Software
|
|---|
| 18 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|---|
| 19 |
|
|---|
| 20 | import copy
|
|---|
| 21 | import glob
|
|---|
| 22 | import os.path
|
|---|
| 23 | import random
|
|---|
| 24 | import sys
|
|---|
| 25 | import types
|
|---|
| 26 |
|
|---|
| 27 | from GeoEco.ArcGIS import GeoprocessorManager
|
|---|
| 28 | from GeoEco.DatabaseAccess.ArcGIS import ArcGIS91DatabaseConnection
|
|---|
| 29 | from GeoEco.DataManagement.ArcGISRasters import ArcGISRaster
|
|---|
| 30 | from GeoEco.DataManagement.Directories import TemporaryDirectory
|
|---|
| 31 | from GeoEco.DataManagement.Files import File
|
|---|
| 32 | from GeoEco.DynamicDocString import DynamicDocString
|
|---|
| 33 | from GeoEco.Internationalization import _
|
|---|
| 34 | from GeoEco.Logging import Logger
|
|---|
| 35 | from GeoEco.R import R, RPackageDependency
|
|---|
| 36 |
|
|---|
| 37 | # Public classes
|
|---|
| 38 |
|
|---|
| 39 | class GLM(object):
|
|---|
| 40 | __doc__ = DynamicDocString()
|
|---|
| 41 |
|
|---|
| 42 | @classmethod
|
|---|
| 43 | def FitToArcGISTable(cls, inputTable, outputModelFile, formula,
|
|---|
| 44 | family=u'gaussian', where=None, link=None, variance=None,
|
|---|
| 45 | xColumnName=None, yColumnName=None, zColumnName=None, mColumnName=None,
|
|---|
| 46 | selectionMethod=None, logSelectionDetails=True,
|
|---|
| 47 | writeSummaryFile=True, writeDiagnosticPlots=True, numDiagLabels=3, diagLabelField=None, writeTermPlots=True, residuals=False, xAxis=True, commonScale=True, plotFileFormat=u'png', res=1000., width=3000., height=3000., pointSize=10.0, bg=u'white',
|
|---|
| 48 | overwriteExisting=False):
|
|---|
| 49 | cls.__doc__.Obj.ValidateMethodInvocation()
|
|---|
| 50 |
|
|---|
| 51 | # If the caller requested emf format for the plot files,
|
|---|
| 52 | # convert the width and height from thousands of an inch to
|
|---|
| 53 | # inches.
|
|---|
| 54 |
|
|---|
| 55 | if plotFileFormat == u'emf':
|
|---|
| 56 | width = width / 1000
|
|---|
| 57 | height = height / 1000
|
|---|
| 58 |
|
|---|
| 59 | # Load the table into a temporary data frame.
|
|---|
| 60 |
|
|---|
| 61 | if diagLabelField is not None:
|
|---|
| 62 | extraFieldsToLoad = [diagLabelField]
|
|---|
| 63 | else:
|
|---|
| 64 | extraFieldsToLoad = None
|
|---|
| 65 |
|
|---|
| 66 | dataFrameName, xColumnParam, yColumnParam, zColumnParam, mColumnParam, coordinateSystemParam, familyParam = _LoadDataFrameForModelFitting(inputTable, {_(u'formula') : formula}, {_(u'formula') : True}, family, link, variance, None, where, xColumnName, yColumnName, zColumnName, mColumnName, extraFieldsToLoad)
|
|---|
| 67 |
|
|---|
| 68 | # Fit the GLM and create the output files in the temp
|
|---|
| 69 | # directory.
|
|---|
| 70 |
|
|---|
| 71 | r = R.GetInterpreter()
|
|---|
| 72 |
|
|---|
| 73 | try:
|
|---|
| 74 | R.EvaluateFile(os.path.join(os.path.dirname(sys.modules[cls.__module__].__file__), 'SummarizeModel.r'), False)
|
|---|
| 75 | R.EvaluateFile(os.path.join(os.path.dirname(sys.modules[cls.__module__].__file__), 'FitGLMForDataframe.r'), False)
|
|---|
| 76 |
|
|---|
| 77 | tempDir = TemporaryDirectory()
|
|---|
| 78 | tempOutputFile = os.path.join(tempDir.Path, u'output')
|
|---|
| 79 |
|
|---|
| 80 | if selectionMethod is None:
|
|---|
| 81 | selectionMethodParam = u'NULL'
|
|---|
| 82 | else:
|
|---|
| 83 | selectionMethodParam = u'"' + selectionMethod + u'"'
|
|---|
| 84 |
|
|---|
| 85 | if diagLabelField is None:
|
|---|
| 86 | diagLabelField = u'NULL'
|
|---|
| 87 | else:
|
|---|
| 88 | diagLabelField = u'"' + diagLabelField + u'"'
|
|---|
| 89 |
|
|---|
| 90 | numTermPlots = r('FitGLMForDataframe(f=%s, d=%s, fam=%s, outputModelFile="%s", xVar=%s, yVar=%s, zVar=%s, mVar=%s, coordinateSystem=%s, selectionMethod=%s, logSelectionDetails=%s, writeSummaryFile=%s, writeDiagnosticPlots=%s, numDiagLabels=%i, diagLabelField=%s, writeTermPlots=%s, partial.resid=%s, xAxis=%s, commonScale=%s, plotFileFormat="%s", res=%f, width=%f, height=%f, pointSize=%f, bg="%s")' % (formula, dataFrameName, familyParam, tempOutputFile.replace('\\', '\\\\'), xColumnParam, yColumnParam, zColumnParam, mColumnParam, coordinateSystemParam, selectionMethodParam, str(logSelectionDetails).upper(), str(writeSummaryFile).upper(), str(writeDiagnosticPlots).upper(), numDiagLabels, diagLabelField, str(writeTermPlots).upper(), str(residuals).upper(), str(xAxis).upper(), str(commonScale).upper(), plotFileFormat, res, width, height, pointSize, bg))
|
|---|
| 91 |
|
|---|
| 92 | # Move the temporary output files to the requested
|
|---|
| 93 | # location.
|
|---|
| 94 |
|
|---|
| 95 | File.MoveSilent(tempOutputFile, outputModelFile, overwriteExisting=overwriteExisting)
|
|---|
| 96 |
|
|---|
| 97 | outputFilePrefix = os.path.splitext(outputModelFile)[0]
|
|---|
| 98 |
|
|---|
| 99 | if writeSummaryFile:
|
|---|
| 100 | File.MoveSilent(os.path.join(tempDir.Path, u'output_summary.txt'), outputFilePrefix + u'_summary.txt', overwriteExisting=overwriteExisting)
|
|---|
| 101 |
|
|---|
| 102 | if writeDiagnosticPlots and os.path.isfile(os.path.join(tempDir.Path, u'output_resid_fit.%s' % plotFileFormat)):
|
|---|
| 103 | File.MoveSilent(os.path.join(tempDir.Path, u'output_resid_fit.%s' % plotFileFormat), outputFilePrefix + u'_resid_fit.%s' % plotFileFormat, overwriteExisting=overwriteExisting)
|
|---|
| 104 | File.MoveSilent(os.path.join(tempDir.Path, u'output_qq.%s' % plotFileFormat), outputFilePrefix + u'_qq.%s' % plotFileFormat, overwriteExisting=overwriteExisting)
|
|---|
| 105 | File.MoveSilent(os.path.join(tempDir.Path, u'output_scale_loc.%s' % plotFileFormat), outputFilePrefix + u'_scale_loc.%s' % plotFileFormat, overwriteExisting=overwriteExisting)
|
|---|
| 106 | File.MoveSilent(os.path.join(tempDir.Path, u'output_cooks.%s' % plotFileFormat), outputFilePrefix + u'_cooks.%s' % plotFileFormat, overwriteExisting=overwriteExisting)
|
|---|
| 107 | File.MoveSilent(os.path.join(tempDir.Path, u'output_resid_lev.%s' % plotFileFormat), outputFilePrefix + u'_resid_lev.%s' % plotFileFormat, overwriteExisting=overwriteExisting)
|
|---|
| 108 | File.MoveSilent(os.path.join(tempDir.Path, u'output_cooks_lev.%s' % plotFileFormat), outputFilePrefix + u'_cooks_lev.%s' % plotFileFormat, overwriteExisting=overwriteExisting)
|
|---|
| 109 |
|
|---|
| 110 | if writeTermPlots and numTermPlots > 0:
|
|---|
| 111 | for i in range(int(numTermPlots)):
|
|---|
| 112 | File.MoveSilent(os.path.join(tempDir.Path, u'output_term%02i.%s' % (i+1, plotFileFormat)), outputFilePrefix + (u'_term%02i.%s' % (i+1, plotFileFormat)), overwriteExisting=overwriteExisting)
|
|---|
| 113 |
|
|---|
| 114 | # Delete the data frame.
|
|---|
| 115 |
|
|---|
| 116 | finally:
|
|---|
| 117 | r('if (exists("%s")) rm("%s")' % (dataFrameName, dataFrameName))
|
|---|
| 118 |
|
|---|
| 119 | @classmethod
|
|---|
| 120 | def PredictFromArcGISRasters(cls, inputModelFile, inputPredictorRasters, variableNames, outputResponseRaster, templateRaster=None, resamplingTechniques=None, ignoreOutOfRangeValues=True, outputErrorRaster=None, outputBinaryResponseRaster=None, cutoff=None, buildPyramids=False, overwriteExisting=False):
|
|---|
| 121 | cls.__doc__.Obj.ValidateMethodInvocation()
|
|---|
| 122 | _PredictFromArcGISRasters('glm', _(u'Fit GLM'), inputModelFile, inputPredictorRasters, variableNames, outputResponseRaster, templateRaster, resamplingTechniques, ignoreOutOfRangeValues, outputErrorRaster, outputBinaryResponseRaster, cutoff, buildPyramids, overwriteExisting)
|
|---|
| 123 |
|
|---|
| 124 |
|
|---|
| 125 | class GAM(object):
|
|---|
| 126 | __doc__ = DynamicDocString()
|
|---|
| 127 |
|
|---|
| 128 | @classmethod
|
|---|
| 129 | def FitToArcGISTable(cls, inputTable, outputModelFile, formula, family=u'gaussian', rPackage=u'mgcv',
|
|---|
| 130 | where=None, link=None, variance=None, theta=None, method=u'GCV.Cp', optimizer=u'outer', alternativeOptimizer=u'newton',
|
|---|
| 131 | xColumnName=None, yColumnName=None, zColumnName=None, mColumnName=None,
|
|---|
| 132 | selectionMethod=None, logSelectionDetails=True,
|
|---|
| 133 | writeSummaryFile=True, writeDiagnosticPlots=True, writeTermPlots=True, residuals=False, xAxis=True, commonScale=True, plotFileFormat=u'png', res=1000., width=3000., height=3000., pointSize=10.0, bg=u'white',
|
|---|
| 134 | overwriteExisting=False):
|
|---|
| 135 | cls.__doc__.Obj.ValidateMethodInvocation()
|
|---|
| 136 |
|
|---|
| 137 | # Perform additional parameter validation.
|
|---|
| 138 |
|
|---|
| 139 | if rPackage == u'mgcv' and selectionMethod is not None:
|
|---|
| 140 | Logger.RaiseException(ValueError(_(u'Automated model selection is not currently supported when the GAM is fitted using the R mgcv package. To perform automated model selection, you must use the gam package. As an alternative, you can use the mgcv package and use splines with "shrinkage". See the documentation for the Automated Model Selection Method parameter for more information. ')))
|
|---|
| 141 |
|
|---|
| 142 | if family == u'negbin':
|
|---|
| 143 | if rPackage != u'mgcv':
|
|---|
| 144 | Logger.RaiseException(ValueError(_(u'The "negbin" (negative binomial) model family is only available when the GAM is fitted using the R mgcv package.')))
|
|---|
| 145 |
|
|---|
| 146 | if theta is None:
|
|---|
| 147 | Logger.RaiseException(ValueError(_(u'The "negbin" (negative binomial) model family was specifed but the theta parameter was omitted. You must specify a value for the theta parameter when using the negbin model family.')))
|
|---|
| 148 |
|
|---|
| 149 | # If the caller requested emf format for the plot files,
|
|---|
| 150 | # convert the width and height from thousands of an inch to
|
|---|
| 151 | # inches.
|
|---|
| 152 |
|
|---|
| 153 | if plotFileFormat == u'emf':
|
|---|
| 154 | width = width / 1000
|
|---|
| 155 | height = height / 1000
|
|---|
| 156 |
|
|---|
| 157 | # Load the R package.
|
|---|
| 158 |
|
|---|
| 159 | dependency = RPackageDependency(rPackage)
|
|---|
| 160 | dependency.Initialize()
|
|---|
| 161 |
|
|---|
| 162 | # Load the table into a temporary data frame.
|
|---|
| 163 |
|
|---|
| 164 | dataFrameName, xColumnParam, yColumnParam, zColumnParam, mColumnParam, coordinateSystemParam, familyParam = _LoadDataFrameForModelFitting(inputTable, {_(u'formula') : formula}, {_(u'formula') : True}, family, link, variance, theta, where, xColumnName, yColumnName, zColumnName, mColumnName)
|
|---|
| 165 |
|
|---|
| 166 | # Fit the GLM and create the output files in the temp
|
|---|
| 167 | # directory.
|
|---|
| 168 |
|
|---|
| 169 | r = R.GetInterpreter()
|
|---|
| 170 |
|
|---|
| 171 | try:
|
|---|
| 172 | R.EvaluateFile(os.path.join(os.path.dirname(sys.modules[cls.__module__].__file__), 'SummarizeModel.r'), False)
|
|---|
| 173 | R.EvaluateFile(os.path.join(os.path.dirname(sys.modules[cls.__module__].__file__), 'FitGAMForDataframe.r'), False)
|
|---|
| 174 |
|
|---|
| 175 | tempDir = TemporaryDirectory()
|
|---|
| 176 | tempOutputFile = os.path.join(tempDir.Path, u'output')
|
|---|
| 177 |
|
|---|
| 178 | if method is None:
|
|---|
| 179 | methodParam = u'NULL'
|
|---|
| 180 | else:
|
|---|
| 181 | methodParam = u'"' + method + u'"'
|
|---|
| 182 |
|
|---|
| 183 | if optimizer is None:
|
|---|
| 184 | optimizerParam = u'NULL'
|
|---|
| 185 | elif optimizer != u'outer':
|
|---|
| 186 | optimizerParam = u'"' + optimizer + u'"'
|
|---|
| 187 | else:
|
|---|
| 188 | if alternativeOptimizer is None:
|
|---|
| 189 | alternativeOptimizer = u'newton'
|
|---|
| 190 | optimizerParam = u'c("' + optimizer + u'", "' + alternativeOptimizer + u'")'
|
|---|
| 191 |
|
|---|
| 192 | if selectionMethod is None:
|
|---|
| 193 | selectionMethodParam = u'NULL'
|
|---|
| 194 | else:
|
|---|
| 195 | selectionMethodParam = u'"' + selectionMethod + u'"'
|
|---|
| 196 |
|
|---|
| 197 | if rPackage != u'mgcv':
|
|---|
| 198 | writeDiagnosticPlots = False
|
|---|
| 199 |
|
|---|
| 200 | numTermPlots = r('FitGAMForDataframe(f=%s, d=%s, fam=%s, rPackage="%s", outputModelFile="%s", method=%s, optimizer=%s, xVar=%s, yVar=%s, zVar=%s, mVar=%s, coordinateSystem=%s, selectionMethod=%s, logSelectionDetails=%s, writeSummaryFile=%s, writeDiagnosticPlots=%s, writeTermPlots=%s, partial.resid=%s, xAxis=%s, commonScale=%s, plotFileFormat="%s", res=%f, width=%f, height=%f, pointSize=%f, bg="%s")' %
|
|---|
| 201 | (formula, dataFrameName, familyParam, rPackage, tempOutputFile.replace('\\', '\\\\'),
|
|---|
| 202 | methodParam, optimizerParam,
|
|---|
| 203 | xColumnParam, yColumnParam, zColumnParam, mColumnParam, coordinateSystemParam,
|
|---|
| 204 | selectionMethodParam, str(logSelectionDetails).upper(),
|
|---|
| 205 | str(writeSummaryFile).upper(), str(writeDiagnosticPlots).upper(), str(writeTermPlots).upper(), str(residuals).upper(), str(xAxis).upper(), str(commonScale).upper(), plotFileFormat, res, width, height, pointSize, bg))
|
|---|
| 206 |
|
|---|
| 207 | # Move the temporary output files to the requested
|
|---|
| 208 | # location.
|
|---|
| 209 |
|
|---|
| 210 | File.MoveSilent(tempOutputFile, outputModelFile, overwriteExisting=overwriteExisting)
|
|---|
| 211 |
|
|---|
| 212 | outputFilePrefix = os.path.splitext(outputModelFile)[0]
|
|---|
| 213 |
|
|---|
| 214 | if writeSummaryFile:
|
|---|
| 215 | File.MoveSilent(os.path.join(tempDir.Path, u'output_summary.txt'), outputFilePrefix + u'_summary.txt', overwriteExisting=overwriteExisting)
|
|---|
| 216 |
|
|---|
| 217 | if writeDiagnosticPlots and os.path.isfile(os.path.join(tempDir.Path, u'output_diag.%s' % plotFileFormat)):
|
|---|
| 218 | File.MoveSilent(os.path.join(tempDir.Path, u'output_diag.%s' % plotFileFormat), outputFilePrefix + u'_diag.%s' % plotFileFormat, overwriteExisting=overwriteExisting)
|
|---|
| 219 |
|
|---|
| 220 | if writeTermPlots and numTermPlots > 0:
|
|---|
| 221 | for i in range(int(numTermPlots)):
|
|---|
| 222 | File.MoveSilent(os.path.join(tempDir.Path, u'output_term%02i.%s' % (i+1, plotFileFormat)), outputFilePrefix + (u'_term%02i.%s' % (i+1, plotFileFormat)), overwriteExisting=overwriteExisting)
|
|---|
| 223 |
|
|---|
| 224 | # Delete the data frame.
|
|---|
| 225 |
|
|---|
| 226 | finally:
|
|---|
| 227 | r('if (exists("%s")) rm("%s")' % (dataFrameName, dataFrameName))
|
|---|
| 228 |
|
|---|
| 229 | @classmethod
|
|---|
| 230 | def PredictFromArcGISRasters(cls, inputModelFile, inputPredictorRasters, variableNames, outputResponseRaster, templateRaster=None, resamplingTechniques=None, ignoreOutOfRangeValues=True, outputErrorRaster=None, outputBinaryResponseRaster=None, cutoff=None, buildPyramids=False, overwriteExisting=False):
|
|---|
| 231 | cls.__doc__.Obj.ValidateMethodInvocation()
|
|---|
| 232 | _PredictFromArcGISRasters('gam', _(u'Fit GAM'), inputModelFile, inputPredictorRasters, variableNames, outputResponseRaster, templateRaster, resamplingTechniques, ignoreOutOfRangeValues, outputErrorRaster, outputBinaryResponseRaster, cutoff, buildPyramids, overwriteExisting)
|
|---|
| 233 |
|
|---|
| 234 | @classmethod
|
|---|
| 235 | def BayesPredictFromArcGISRasters(cls, inputModelFile, inputPredictorRasters, variableNames, thresholds, outputProbabilityRasters, templateRaster=None, resamplingTechniques=None, ignoreOutOfRangeValues=True, samples=1000, chunks=100, buildPyramids=False, overwriteExisting=False):
|
|---|
| 236 | cls.__doc__.Obj.ValidateMethodInvocation()
|
|---|
| 237 |
|
|---|
| 238 | # Perform additional parameter validation.
|
|---|
| 239 |
|
|---|
| 240 | inputPredictorRastersLower = map(lambda s: unicode(s).lower(), inputPredictorRasters)
|
|---|
| 241 | for i in range(len(outputProbabilityRasters)):
|
|---|
| 242 | raster = outputProbabilityRasters[i].lower()
|
|---|
| 243 | if raster in inputPredictorRastersLower:
|
|---|
| 244 | Logger.RaiseException(ValueError(_(u'The output raster %(out)s also appears in the list of input predictor rasters. This is not allowed. Please specify a different output raster or remove it from the list of input predictor rasters.') % {u'out': outputProbabilityRasters[i]}))
|
|---|
| 245 |
|
|---|
| 246 | # Load the fitted model.
|
|---|
| 247 |
|
|---|
| 248 | r = R.GetInterpreter()
|
|---|
| 249 | gp = GeoprocessorManager.GetWrappedGeoprocessor()
|
|---|
| 250 |
|
|---|
| 251 | r('load("%s")' % inputModelFile.replace('\\', '\\\\'))
|
|---|
| 252 | try:
|
|---|
| 253 | if not r('exists("model")'):
|
|---|
| 254 | Logger.RaiseException(ValueError(_('The input model file %(file)s does not contain a variable called "model", indicating that it was not generated by the Fit GAM tool. Please provide a file that was generated by the Fit GAM tool.') % {u'file': inputModelFile}))
|
|---|
| 255 | if 'gam' not in r('class(model)'):
|
|---|
| 256 | Logger.RaiseException(ValueError(_('The input model file %(file)s contains a variable called "model" but it is not a GAM, indicating that it was not generated by the Fit GAM tool. Please provide a file that was generated by the Fit GAM tool.') % {u'file': inputModelFile}))
|
|---|
| 257 | if not r('exists("rPackage")') or r['rPackage'] != 'mgcv':
|
|---|
| 258 | Logger.RaiseException(ValueError(_('The input model in file %(file)s was fitted by the R %(pkg)s package, but this tool requires that the model be fitted with the mgcv package. Please refit the model using the mgcv package and try again.') % {u'file': inputModelFile, u'pkg': r['rPackage']}))
|
|---|
| 259 |
|
|---|
| 260 | Logger.Info(_(u'Loaded %(type)s model from %(file)s. The model was fitted with the R %(pkg)s package.') % {u'type': r('class(model)[1]').upper(), u'file': inputModelFile, u'pkg': r['rPackage']})
|
|---|
| 261 |
|
|---|
| 262 | # Log a summery of the model, to remind the user what it is.
|
|---|
| 263 |
|
|---|
| 264 | R.EvaluateFile(os.path.join(os.path.dirname(sys.modules[GLM.__module__].__file__), 'SummarizeModel.r'), False)
|
|---|
| 265 |
|
|---|
| 266 | r('writeLines("")')
|
|---|
| 267 | r('writeLines("MODEL SUMMARY:")')
|
|---|
| 268 | r('writeLines("==============")')
|
|---|
| 269 | r('writeLines(SummarizeModel(model))')
|
|---|
| 270 | r('writeLines("")')
|
|---|
| 271 |
|
|---|
| 272 | if r('model$family$family') != 'binomial' or r('model$family$link') != 'logit':
|
|---|
| 273 | Logger.RaiseException(ValueError(_('The input model in the file %(file)s is of the %(fam)s family and uses the %(link)s link function. This tool requires the model to be of the binomial family and use the logit link function. Please refit the model and try again. If this is not appropriate for your modeling problem, please contact the author of this tool for assistance.') % {u'file': inputModelFile, u'pkg': r['rPackage'], u'fam': r('model$family$family'), u'link': r('model$family$link')}))
|
|---|
| 274 |
|
|---|
| 275 | # Prepare the input rasters for the prediction.
|
|---|
| 276 |
|
|---|
| 277 | tempDir = TemporaryDirectory()
|
|---|
| 278 |
|
|---|
| 279 | _PreparePredictorRasters(r, gp, tempDir, variableNames, inputPredictorRasters, templateRaster, resamplingTechniques)
|
|---|
| 280 |
|
|---|
| 281 | # Do the prediction. The prediction rasters are
|
|---|
| 282 | # returned as ArcInfo ASCII grids because rgdal cannot
|
|---|
| 283 | # write rasters (ArcInfo binary grids).
|
|---|
| 284 | #
|
|---|
| 285 | # Force the logging system to log R Info messages as
|
|---|
| 286 | # Debug so that the user is not spammed with "Closing
|
|---|
| 287 | # GDAL dataset handle" messages. There appears to be
|
|---|
| 288 | # no way to suppress these messages from within R.
|
|---|
| 289 |
|
|---|
| 290 | R.EvaluateFile(os.path.join(os.path.dirname(sys.modules[GLM.__module__].__file__), 'BayesPredictGAMForArcGISRasters.r'), False)
|
|---|
| 291 |
|
|---|
| 292 | oldLoggingLevel = r.LogInfoAsDebug
|
|---|
| 293 | r.LogInfoAsDebug = True
|
|---|
| 294 | try:
|
|---|
| 295 | if chunks is not None:
|
|---|
| 296 | chunks = str(chunks)
|
|---|
| 297 | else:
|
|---|
| 298 | chunks = 'NULL'
|
|---|
| 299 |
|
|---|
| 300 | for i in range(len(outputProbabilityRasters)):
|
|---|
| 301 | Logger.Info(_(u'Predicting for threshold %g...') % thresholds[i])
|
|---|
| 302 | r('BayesPredictGAMForArcGISRasters(model, rastersForPredictors, %g, "%s", %s, %i, %s)' % (thresholds[i], os.path.join(tempDir.Path, u'temp_output%i.txt' % i).replace('\\', '\\\\'), str(ignoreOutOfRangeValues).upper(), samples, chunks))
|
|---|
| 303 | finally:
|
|---|
| 304 | r.LogInfoAsDebug = oldLoggingLevel
|
|---|
| 305 |
|
|---|
| 306 | # Convert the ArcInfo ASCII grids to rasters.
|
|---|
| 307 |
|
|---|
| 308 | Logger.Info(_(u'Creating outputs...'))
|
|---|
| 309 |
|
|---|
| 310 | for i in range(len(outputProbabilityRasters)):
|
|---|
| 311 | tempOutputRaster = os.path.join(tempDir.Path, u'output%i' % i)
|
|---|
| 312 | gp.ASCIIToRaster_Conversion(os.path.join(tempDir.Path, u'temp_output%i.txt' % i), tempOutputRaster, u'FLOAT')
|
|---|
| 313 | gp.DefineProjection_management(tempOutputRaster, coordinateSystem) # TODO: Fix this
|
|---|
| 314 | if buildPyramids:
|
|---|
| 315 | gp.BuildPyramids_Management(tempOutputRaster)
|
|---|
| 316 |
|
|---|
| 317 | # Copy the output rasters to the requested destinations.
|
|---|
| 318 |
|
|---|
| 319 | for i in range(len(outputProbabilityRasters)):
|
|---|
| 320 | ArcGISRaster.CopySilent(os.path.join(tempDir.Path, u'output%i' % i), outputProbabilityRasters[i], overwriteExisting=overwriteExisting)
|
|---|
| 321 |
|
|---|
| 322 | # Delete R variables assigned by this function.
|
|---|
| 323 |
|
|---|
| 324 | finally:
|
|---|
| 325 | r('if (exists("rastersForPredictors")) rm("rastersForPredictors")')
|
|---|
| 326 | r('if (exists("model")) rm("model")')
|
|---|
| 327 | r('if (exists("rPackage")) rm("rPackage")')
|
|---|
| 328 | r('if (exists("xVar")) rm("xVar")')
|
|---|
| 329 | r('if (exists("yVar")) rm("yVar")')
|
|---|
| 330 | r('if (exists("zVar")) rm("zVar")')
|
|---|
| 331 | r('if (exists("mVar")) rm("mVar")')
|
|---|
| 332 | r('if (exists("coordinateSystem")) rm("coordinateSystem")')
|
|---|
| 333 |
|
|---|
| 334 |
|
|---|
| 335 | class TreeModel(object):
|
|---|
| 336 | __doc__ = DynamicDocString()
|
|---|
| 337 |
|
|---|
| 338 | @classmethod
|
|---|
| 339 | def FitToArcGISTable(cls, inputTable, outputModelFile, formula, method,
|
|---|
| 340 | where=None, allowMissingCovariates=True, minSplit=20, minBucket=7, cp=0.01, maxCompete=4, maxSurrogate=5, useSurrogate=2, surrogateStyle=0, xval=1000, maxDepth=30, pruningMethod=None, pruningCP=None,
|
|---|
| 341 | xColumnName=None, yColumnName=None, zColumnName=None, mColumnName=None,
|
|---|
| 342 | writeSummaryFile=True, writeDiagnosticPlots=True, writeTreePlot=True, writePrunedTreePlot=True, plotFileFormat=u'png', res=1000., width=3000., height=3000., pointSize=10.0, bg=u'white',
|
|---|
| 343 | treePlotType=0, extra=1, percentage=True, under=True, clipRightLabels=True, fallenLeaves=False, branchType=0, branch=0.2, uniform=True, digits=2, varlen=0, faclen=0, cex=None, tweak=1., compress=True, ycompress=True,
|
|---|
| 344 | overwriteExisting=False):
|
|---|
| 345 | cls.__doc__.Obj.ValidateMethodInvocation()
|
|---|
| 346 |
|
|---|
| 347 | # Perform additional parameter validation.
|
|---|
| 348 |
|
|---|
| 349 | if pruningMethod == u'user specified cp' and pruningCP is None:
|
|---|
| 350 | Logger.RaiseException(ValueError(_(u'The Pruning Method is \'User specified\' but no Complexity Parameter for Pruning was provided. Please provide a complexity parameter for pruning or select a different Pruning Method.')))
|
|---|
| 351 |
|
|---|
| 352 | # If the caller requested emf format for the plot files,
|
|---|
| 353 | # convert the width and height from thousands of an inch to
|
|---|
| 354 | # inches.
|
|---|
| 355 |
|
|---|
| 356 | if plotFileFormat == u'emf':
|
|---|
| 357 | width = width / 1000
|
|---|
| 358 | height = height / 1000
|
|---|
| 359 |
|
|---|
| 360 | # Load the table into a temporary data frame.
|
|---|
| 361 |
|
|---|
| 362 | dataFrameName, xColumnParam, yColumnParam, zColumnParam, mColumnParam, coordinateSystemParam = _LoadDataFrameForModelFitting(inputTable, {_(u'formula') : formula}, {_(u'formula') : True}, None, None, None, None, where, xColumnName, yColumnName, zColumnName, mColumnName)[:-1] # We do not need the familyParam
|
|---|
| 363 |
|
|---|
| 364 | # Fit the tree model and create the output files in the temp
|
|---|
| 365 | # directory.
|
|---|
| 366 |
|
|---|
| 367 | r = R.GetInterpreter()
|
|---|
| 368 |
|
|---|
| 369 | try:
|
|---|
| 370 | R.EvaluateFile(os.path.join(os.path.dirname(sys.modules[cls.__module__].__file__), 'FitTreeModelForDataframe.r'), False)
|
|---|
| 371 |
|
|---|
| 372 | tempDir = TemporaryDirectory()
|
|---|
| 373 | tempOutputFile = os.path.join(tempDir.Path, u'output')
|
|---|
| 374 |
|
|---|
| 375 | if pruningMethod is None:
|
|---|
| 376 | pruningMethod = u'NULL'
|
|---|
| 377 | else:
|
|---|
| 378 | pruningMethod = u'"' + pruningMethod + u'"'
|
|---|
| 379 |
|
|---|
| 380 | if pruningCP is None:
|
|---|
| 381 | pruningCP = u'NULL'
|
|---|
| 382 | else:
|
|---|
| 383 | pruningCP = repr(pruningCP)
|
|---|
| 384 |
|
|---|
| 385 | if cex is None:
|
|---|
| 386 | cex = u'NULL'
|
|---|
| 387 | else:
|
|---|
| 388 | cex = repr(cex)
|
|---|
| 389 |
|
|---|
| 390 | r('FitTreeModelForDataframe(f=%s, d=%s, outputModelFile="%s", method="%s", allowMissingCovariates=%s, minSplit=%i, minBucket=%i, cp=%f, maxCompete=%i, maxSurrogate=%i, useSurrogate=%i, surrogateStyle=%i, xval=%i, maxDepth=%i, pruningMethod=%s, pruningCP=%s, xVar=%s, yVar=%s, zVar=%s, mVar=%s, coordinateSystem=%s, writeSummaryFile=%s, writeDiagnosticPlots=%s, writeTreePlot=%s, writePrunedTreePlot=%s, plotFileFormat="%s", res=%f, width=%f, height=%f, pointSize=%f, bg="%s", treePlotType=%i, extra=%i, percentage=%s, under=%s, clipRightLabels=%s, fallenLeaves=%s, branchType=%i, branch=%f, uniform=%s, digits=%i, varlen=%i, faclen=%i, cex=%s, tweak=%f, compress=%s, ycompress=%s)' %
|
|---|
| 391 | (formula, dataFrameName, tempOutputFile.replace('\\', '\\\\'), method,
|
|---|
| 392 | str(allowMissingCovariates).upper(), minSplit, minBucket, cp, maxCompete, maxSurrogate, useSurrogate, surrogateStyle, xval, maxDepth, pruningMethod, pruningCP,
|
|---|
| 393 | xColumnParam, yColumnParam, zColumnParam, mColumnParam, coordinateSystemParam,
|
|---|
| 394 | str(writeSummaryFile).upper(), str(writeDiagnosticPlots).upper(), str(writeTreePlot).upper(), str(writePrunedTreePlot).upper(), plotFileFormat, res, width, height, pointSize, bg,
|
|---|
| 395 | treePlotType, extra, str(percentage).upper(), str(under).upper(), str(clipRightLabels).upper(), str(fallenLeaves).upper(), branchType, branch, str(uniform).upper(), digits, varlen, faclen, cex, tweak, str(compress).upper(), str(ycompress).upper()))
|
|---|
| 396 |
|
|---|
| 397 | # Move the temporary output files to the requested
|
|---|
| 398 | # location.
|
|---|
| 399 |
|
|---|
| 400 | File.MoveSilent(tempOutputFile, outputModelFile, overwriteExisting=overwriteExisting)
|
|---|
| 401 |
|
|---|
| 402 | outputFilePrefix = os.path.splitext(outputModelFile)[0]
|
|---|
| 403 |
|
|---|
| 404 | if writeSummaryFile:
|
|---|
| 405 | File.MoveSilent(os.path.join(tempDir.Path, u'output_summary.txt'), outputFilePrefix + u'_summary.txt', overwriteExisting=overwriteExisting)
|
|---|
| 406 |
|
|---|
| 407 | if writeDiagnosticPlots:
|
|---|
| 408 | if os.path.isfile(os.path.join(tempDir.Path, u'output_cp.' + plotFileFormat)):
|
|---|
| 409 | File.MoveSilent(os.path.join(tempDir.Path, u'output_cp.' + plotFileFormat), outputFilePrefix + u'_cp.' + plotFileFormat, overwriteExisting=overwriteExisting)
|
|---|
| 410 |
|
|---|
| 411 | if os.path.isfile(os.path.join(tempDir.Path, u'output_rsquare.' + plotFileFormat)):
|
|---|
| 412 | File.MoveSilent(os.path.join(tempDir.Path, u'output_rsquare.' + plotFileFormat), outputFilePrefix + u'_rsquare.' + plotFileFormat, overwriteExisting=overwriteExisting)
|
|---|
| 413 |
|
|---|
| 414 | if os.path.isfile(os.path.join(tempDir.Path, u'output_residuals.' + plotFileFormat)):
|
|---|
| 415 | File.MoveSilent(os.path.join(tempDir.Path, u'output_residuals.' + plotFileFormat), outputFilePrefix + u'_residuals.' + plotFileFormat, overwriteExisting=overwriteExisting)
|
|---|
| 416 |
|
|---|
| 417 | if os.path.isfile(os.path.join(tempDir.Path, u'output_pruned_residuals.' + plotFileFormat)):
|
|---|
| 418 | File.MoveSilent(os.path.join(tempDir.Path, u'output_pruned_residuals.' + plotFileFormat), outputFilePrefix + u'_pruned_residuals.' + plotFileFormat, overwriteExisting=overwriteExisting)
|
|---|
| 419 |
|
|---|
| 420 | if writeTreePlot and os.path.isfile(os.path.join(tempDir.Path, u'output_unpruned_tree.' + plotFileFormat)):
|
|---|
| 421 | File.MoveSilent(os.path.join(tempDir.Path, u'output_unpruned_tree.' + plotFileFormat), outputFilePrefix + u'_unpruned_tree.' + plotFileFormat, overwriteExisting=overwriteExisting)
|
|---|
| 422 |
|
|---|
| 423 | if pruningMethod != u'NULL' and writePrunedTreePlot and os.path.isfile(os.path.join(tempDir.Path, u'output_pruned_tree.' + plotFileFormat)):
|
|---|
| 424 | File.MoveSilent(os.path.join(tempDir.Path, u'output_pruned_tree.' + plotFileFormat), outputFilePrefix + u'_pruned_tree.' + plotFileFormat, overwriteExisting=overwriteExisting)
|
|---|
| 425 |
|
|---|
| 426 | # Delete the data frame.
|
|---|
| 427 |
|
|---|
| 428 | finally:
|
|---|
| 429 | r('if (exists("%s")) rm("%s")' % (dataFrameName, dataFrameName))
|
|---|
| 430 |
|
|---|
| 431 | @classmethod
|
|---|
| 432 | def PredictFromArcGISRasters(cls, inputModelFile, inputPredictorRasters, variableNames, outputResponseRaster, templateRaster=None, resamplingTechniques=None, ignoreOutOfRangeValues=True, buildPyramids=False, overwriteExisting=False):
|
|---|
| 433 | cls.__doc__.Obj.ValidateMethodInvocation()
|
|---|
| 434 | _PredictFromArcGISRasters('rpart', _(u'Fit Tree Model'), inputModelFile, inputPredictorRasters, variableNames, outputResponseRaster, templateRaster, resamplingTechniques, ignoreOutOfRangeValues, None, None, None, buildPyramids, overwriteExisting)
|
|---|
| 435 |
|
|---|
| 436 |
|
|---|
| 437 | class LinearMixedModel(object):
|
|---|
| 438 | __doc__ = DynamicDocString()
|
|---|
| 439 |
|
|---|
| 440 | @classmethod
|
|---|
| 441 | def FitToArcGISTable(cls, inputTable, outputModelFile, fixedFormula, randomFormula,
|
|---|
| 442 | where=None, method=u'REML', correlationStructure=None, correlationFormula=None, range_=None, nugget=None, metric=None, fixed=False,
|
|---|
| 443 | xColumnName=None, yColumnName=None, zColumnName=None, mColumnName=None,
|
|---|
| 444 | writeSummaryFile=True, writeDiagnosticPlots=True, plotFileFormat=u'png', res=1000., width=3000., height=3000., pointSize=10.0, bg=u'white',
|
|---|
| 445 | overwriteExisting=False):
|
|---|
| 446 | cls.__doc__.Obj.ValidateMethodInvocation()
|
|---|
| 447 |
|
|---|
| 448 | # Perform additional validation.
|
|---|
| 449 |
|
|---|
| 450 | if correlationStructure is not None and correlationFormula is None or correlationStructure is None and correlationFormula is not None:
|
|---|
| 451 | Logger.RaiseException(ValueError(_('To fit a model that includes for within-group correlation, you must specify both the a correlation structure and a correlation formula. If you do not wish to include within-group correlation, you must omit both of those parameters.')))
|
|---|
| 452 |
|
|---|
| 453 | if correlationStructure is not None and range_ is None and nugget is not None:
|
|---|
| 454 | Logger.RaiseException(ValueError(_('If the Nugget parameter is specified, the Range parameter must also be specified.')))
|
|---|
| 455 |
|
|---|
| 456 | # If the caller requested emf format for the plot files,
|
|---|
| 457 | # convert the width and height from thousands of an inch to
|
|---|
| 458 | # inches.
|
|---|
| 459 |
|
|---|
| 460 | if plotFileFormat == u'emf':
|
|---|
| 461 | width = width / 1000
|
|---|
| 462 | height = height / 1000
|
|---|
| 463 |
|
|---|
| 464 | # Load the table into a temporary data frame.
|
|---|
| 465 |
|
|---|
| 466 | dataFrameName, xColumnParam, yColumnParam, zColumnParam, mColumnParam, coordinateSystemParam, familyParam = _LoadDataFrameForModelFitting(inputTable, {_(u'fixed effects formula') : fixedFormula, _(u'random effects formula') : randomFormula, _(u'correlation formula') : correlationFormula}, {_(u'fixed effects formula') : True, _(u'random effects formula') : False, _(u'correlation formula') : False}, None, None, None, None, where, xColumnName, yColumnName, zColumnName, mColumnName, None)
|
|---|
| 467 |
|
|---|
| 468 | # Fit the model with the nlme package and create the output
|
|---|
| 469 | # files in the temp directory.
|
|---|
| 470 |
|
|---|
| 471 | r = R.GetInterpreter()
|
|---|
| 472 |
|
|---|
| 473 | try:
|
|---|
| 474 | R.EvaluateFile(os.path.join(os.path.dirname(sys.modules[cls.__module__].__file__), 'SummarizeModel.r'), False)
|
|---|
| 475 | R.EvaluateFile(os.path.join(os.path.dirname(sys.modules[cls.__module__].__file__), 'FitLMEForDataframe.r'), False)
|
|---|
| 476 |
|
|---|
| 477 | tempDir = TemporaryDirectory()
|
|---|
| 478 | tempOutputFile = os.path.join(tempDir.Path, u'output')
|
|---|
| 479 |
|
|---|
| 480 | if randomFormula is None:
|
|---|
| 481 | randomFormula = u'NULL'
|
|---|
| 482 |
|
|---|
| 483 | if correlationStructure is not None:
|
|---|
| 484 | if correlationStructure == u'exponential':
|
|---|
| 485 | correlation = u'corExp('
|
|---|
| 486 | elif correlationStructure == u'gaussian':
|
|---|
| 487 | correlation = u'corGaus('
|
|---|
| 488 | elif correlationStructure == u'linear':
|
|---|
| 489 | correlation = u'corLin('
|
|---|
| 490 | elif correlationStructure == u'rational quadratic':
|
|---|
| 491 | correlation = u'corRatio('
|
|---|
| 492 | elif correlationStructure == u'spherical':
|
|---|
| 493 | correlation = u'corSpher('
|
|---|
| 494 | else:
|
|---|
| 495 | Logger.RaiseException(RuntimeError(_('Unknown correlationStructure value \'%(cs)s\'. Please contact the author of this tool for assistance.') % {u'cs': correlationStructure}))
|
|---|
| 496 |
|
|---|
| 497 | if range_ is not None:
|
|---|
| 498 | if nugget is None:
|
|---|
| 499 | correlation += repr(range_) + ', '
|
|---|
| 500 | else:
|
|---|
| 501 | correlation += u'c(' + repr(range_) + u', ' + repr(nugget) + u'), '
|
|---|
| 502 |
|
|---|
| 503 | correlation += u'form=' + correlationFormula + u', nugget=' + repr(nugget is not None).upper()
|
|---|
| 504 |
|
|---|
| 505 | if metric is not None:
|
|---|
| 506 | correlation += u', metric="' + metric + u'"'
|
|---|
| 507 |
|
|---|
| 508 | correlation += u', fixed=' + repr(fixed).upper() + u')'
|
|---|
| 509 |
|
|---|
| 510 | else:
|
|---|
| 511 | correlation = u'NULL'
|
|---|
| 512 |
|
|---|
| 513 | r('library(nlme)') # Required here so we can instantiate correlation classes
|
|---|
| 514 |
|
|---|
| 515 | r('FitLMEForDataframe(fixed=%s, data=%s, outputModelFile="%s", random=%s, correlation=%s, method="%s", xVar=%s, yVar=%s, zVar=%s, mVar=%s, coordinateSystem=%s, writeSummaryFile=%s, writeDiagnosticPlots=%s, plotFileFormat="%s", res=%f, width=%f, height=%f, pointSize=%f, bg="%s")' %
|
|---|
| 516 | (fixedFormula, dataFrameName, tempOutputFile.replace('\\', '\\\\'), randomFormula, correlation, method,
|
|---|
| 517 | xColumnParam, yColumnParam, zColumnParam, mColumnParam, coordinateSystemParam,
|
|---|
| 518 | str(writeSummaryFile).upper(), str(writeDiagnosticPlots).upper(), plotFileFormat, res, width, height, pointSize, bg))
|
|---|
| 519 |
|
|---|
| 520 | # Move the temporary output files to the requested
|
|---|
| 521 | # location.
|
|---|
| 522 |
|
|---|
| 523 | File.MoveSilent(tempOutputFile, outputModelFile, overwriteExisting=overwriteExisting)
|
|---|
| 524 |
|
|---|
| 525 | outputFilePrefix = os.path.splitext(outputModelFile)[0]
|
|---|
| 526 |
|
|---|
| 527 | if writeSummaryFile:
|
|---|
| 528 | File.MoveSilent(os.path.join(tempDir.Path, u'output_summary.txt'), outputFilePrefix + u'_summary.txt', overwriteExisting=overwriteExisting)
|
|---|
| 529 |
|
|---|
| 530 | if writeDiagnosticPlots:
|
|---|
| 531 | files = glob.glob(os.path.join(tempDir.Path, u'output_*.' + plotFileFormat))
|
|---|
| 532 | for f in files:
|
|---|
| 533 | File.MoveSilent(f, outputFilePrefix + '_' + os.path.basename(f).split('_', 1)[1], overwriteExisting=overwriteExisting)
|
|---|
| 534 |
|
|---|
| 535 | # Delete the data frame.
|
|---|
| 536 |
|
|---|
| 537 | finally:
|
|---|
| 538 | r('if (exists("%s")) rm("%s")' % (dataFrameName, dataFrameName))
|
|---|
| 539 |
|
|---|
| 540 | @classmethod
|
|---|
| 541 | def PredictFromArcGISRasters(cls, inputModelFile, inputPredictorRasters, variableNames, outputResponseRaster, templateRaster=None, resamplingTechniques=None, ignoreOutOfRangeValues=True, buildPyramids=False, overwriteExisting=False):
|
|---|
| 542 | cls.__doc__.Obj.ValidateMethodInvocation()
|
|---|
| 543 | _PredictFromArcGISRasters('lme', _(u'Fit Linear Mixed Model'), inputModelFile, inputPredictorRasters, variableNames, outputResponseRaster, templateRaster, resamplingTechniques, ignoreOutOfRangeValues, None, None, None, buildPyramids, overwriteExisting)
|
|---|
| 544 |
|
|---|
| 545 |
|
|---|
| 546 | class ModelEvaluation(object):
|
|---|
| 547 | __doc__ = DynamicDocString()
|
|---|
| 548 |
|
|---|
| 549 | @classmethod
|
|---|
| 550 | def PlotPerformanceOfBinaryClassificationModel(cls, inputModelFile, measure1, measure2=None, summaryStats=None, title=None,
|
|---|
| 551 | evaluationDataTable=None, where=None,
|
|---|
| 552 | outputPlotFile=None, res=1000., width=3000., height=3000., pointSize=10.0, bg=u'white',
|
|---|
| 553 | overwriteExisting=False):
|
|---|
| 554 | cls.__doc__.Obj.ValidateMethodInvocation()
|
|---|
| 555 |
|
|---|
| 556 | # If the caller requested emf format for the plot file,
|
|---|
| 557 | # convert the width and height from thousands of an inch to
|
|---|
| 558 | # inches.
|
|---|
| 559 |
|
|---|
| 560 | if outputPlotFile is not None and outputPlotFile.lower().endswith(u'.emf'):
|
|---|
| 561 | width = width / 1000
|
|---|
| 562 | height = height / 1000
|
|---|
| 563 |
|
|---|
| 564 | # Load the fitted model.
|
|---|
| 565 |
|
|---|
| 566 | r = R.GetInterpreter()
|
|---|
| 567 | dataFrameName = None
|
|---|
| 568 | newPredictionsName = None
|
|---|
| 569 |
|
|---|
| 570 | r('load("%s")' % inputModelFile.replace('\\', '\\\\'))
|
|---|
| 571 | try:
|
|---|
| 572 | if not r('exists("model")'):
|
|---|
| 573 | Logger.RaiseException(ValueError(_('The input model file %(file)s does not contain a variable called "model", indicating that it was not generated by the one of the MGET model fitting tools, such as Fit GLM. Please provide a file that was generated by the one of the MGET model fitting tools.') % {u'file': inputModelFile}))
|
|---|
| 574 |
|
|---|
| 575 | # If the model required an R package, load it.
|
|---|
| 576 |
|
|---|
| 577 | if r('exists("rPackage")'):
|
|---|
| 578 | dependency = RPackageDependency(unicode(r['rPackage']))
|
|---|
| 579 | dependency.Initialize()
|
|---|
| 580 |
|
|---|
| 581 | # If the caller specified a evaluation data table, load it
|
|---|
| 582 | # and create new predictions.
|
|---|
| 583 |
|
|---|
| 584 | if r('exists("rPackage")') and r['rPackage'] == 'rpart':
|
|---|
| 585 | if r('model$method').lower() != 'class':
|
|---|
| 586 | Logger.RaiseException(ValueError(_('The input model file %(file)s contains a tree model that was fitted with the "%(method)s" splitting method. Models fitted with that method are not supported by this tool. This tool only supports models fitted with the "class" method.') % {u'file': inputModelFile, u'method': r('model$method')}))
|
|---|
| 587 | if r('length(attr(model, "ylevels"))') != 2:
|
|---|
| 588 | Logger.RaiseException(ValueError(_('The classification tree model in %(file)s contains %(classes)i classes. This tool only supports models with two classes.') % {u'file': inputModelFile, u'classes': r('length(attr(model, "ylevels"))')}))
|
|---|
| 589 |
|
|---|
| 590 | if evaluationDataTable is None:
|
|---|
| 591 | predictedValues = 'as.vector(predict(model, type="prob")[,2])'
|
|---|
| 592 | actualValues = 'as.vector(model$y) - 1'
|
|---|
| 593 | else:
|
|---|
| 594 | dataFrameName, newPredictionsName = cls._PredictionsForArcGISTable(inputModelFile, 'model', evaluationDataTable, where)
|
|---|
| 595 | predictedValues = newPredictionsName
|
|---|
| 596 | actualValues = 'as.vector(unclass(factor(%s$%s, levels=attr(model, "ylevels")))) - 1' % (dataFrameName, r('all.vars(attr(model$terms, "variables")[[2]])[[1]]'))
|
|---|
| 597 |
|
|---|
| 598 | else:
|
|---|
| 599 | if evaluationDataTable is None:
|
|---|
| 600 | predictedValues = 'model$fitted'
|
|---|
| 601 | actualValues = 'model$y'
|
|---|
| 602 | else:
|
|---|
| 603 | dataFrameName, newPredictionsName = cls._PredictionsForArcGISTable(inputModelFile, 'model', evaluationDataTable, where)
|
|---|
| 604 | predictedValues = newPredictionsName
|
|---|
| 605 | actualValues = 'na.omit(%s)$%s' % (dataFrameName, r('all.vars(attr(terms(model$formula), "variables")[[2]])[[1]]'))
|
|---|
| 606 |
|
|---|
| 607 | # Create the plot.
|
|---|
| 608 |
|
|---|
| 609 | R.EvaluateFile(os.path.join(os.path.dirname(sys.modules[cls.__module__].__file__), 'PerfPlotForBinaryModel.r'), False)
|
|---|
| 610 |
|
|---|
| 611 | if measure2 is None:
|
|---|
| 612 | measure2 = u'cutoff'
|
|---|
| 613 |
|
|---|
| 614 | if title is not None:
|
|---|
| 615 | title = '"' + title.replace('"', '\\"') + '"'
|
|---|
| 616 | else:
|
|---|
| 617 | title = 'NULL'
|
|---|
| 618 |
|
|---|
| 619 | r['summaryStats'] = summaryStats
|
|---|
| 620 |
|
|---|
| 621 | if outputPlotFile is None:
|
|---|
| 622 | summaryStatValues = r('PerfPlotForBinaryModel(predictedValues=%s, actualValues=%s, measure1="%s", measure2="%s", summaryStats=summaryStats, .title=%s)' % (predictedValues, actualValues, measure1, measure2, title))
|
|---|
| 623 | else:
|
|---|
| 624 | tempDir = TemporaryDirectory()
|
|---|
| 625 | tempOutputFile = os.path.join(tempDir.Path, u'plot' + os.path.splitext(outputPlotFile)[1])
|
|---|
| 626 | Logger.Info(_(u'Writing plot to %(file)s...') % {u'file': outputPlotFile})
|
|---|
| 627 | summaryStatValues = r('PerfPlotForBinaryModelToFile("%s", predictedValues=%s, actualValues=%s, measure1="%s", measure2="%s", summaryStats=summaryStats, .title=%s, res=%f, width=%f, height=%f, pointsize=%f, bg="%s")' % (tempOutputFile.replace('\\', '\\\\'), predictedValues, actualValues, measure1, measure2, title, res, width, height, pointSize, bg))
|
|---|
| 628 | File.MoveSilent(tempOutputFile, outputPlotFile, overwriteExisting=overwriteExisting)
|
|---|
| 629 |
|
|---|
| 630 | # Delete R variables assigned by this function.
|
|---|
| 631 |
|
|---|
| 632 | finally:
|
|---|
| 633 | r('if (exists("summaryStats")) rm("summaryStats")')
|
|---|
| 634 | r('if (exists("model")) rm("model")')
|
|---|
| 635 | r('if (exists("rPackage")) rm("rPackage")')
|
|---|
| 636 | r('if (exists("xVar")) rm("xVar")')
|
|---|
| 637 | r('if (exists("yVar")) rm("yVar")')
|
|---|
| 638 | r('if (exists("zVar")) rm("zVar")')
|
|---|
| 639 | r('if (exists("mVar")) rm("mVar")')
|
|---|
| 640 | r('if (exists("coordinateSystem")) rm("coordinateSystem")')
|
|---|
| 641 | r('if (exists("f20985982305")) rm("f20985982305", pos=globalenv())')
|
|---|
| 642 | r('if (exists("data20985982305")) rm("data20985982305", pos=globalenv())')
|
|---|
| 643 | r('if (exists("fam20985982305")) rm("fam20985982305", pos=globalenv())')
|
|---|
| 644 | if dataFrameName is not None:
|
|---|
| 645 | r('if (exists("%s")) rm("%s")' % (dataFrameName, dataFrameName))
|
|---|
| 646 | if newPredictionsName is not None:
|
|---|
| 647 | r('if (exists("%s")) rm("%s")' % (newPredictionsName, newPredictionsName))
|
|---|
| 648 |
|
|---|
| 649 | # Return the summary statistics' values.
|
|---|
| 650 |
|
|---|
| 651 | return summaryStatValues
|
|---|
| 652 |
|
|---|
| 653 | @classmethod
|
|---|
| 654 | def PlotROCOfBinaryClassificationModel(cls, inputModelFile, cutoff=u'Automatic', cutoffValue=None, colorize=True, title=None,
|
|---|
| 655 | evaluationDataTable=None, where=None,
|
|---|
| 656 | outputSummaryFile=None, outputPlotFile=None, res=1000., width=3000., height=3000., pointSize=10.0, bg=u'white',
|
|---|
| 657 | overwriteExisting=False):
|
|---|
| 658 | cls.__doc__.Obj.ValidateMethodInvocation()
|
|---|
| 659 |
|
|---|
| 660 | # Perform additional parameter validation.
|
|---|
| 661 |
|
|---|
| 662 | if cutoff == u'specified' and cutoffValue is None:
|
|---|
| 663 | Logger.RaiseException(ValueError(_(u'When you provide the value "Specified" for the Cutoff parameter, you must also provide a value for the Cutoff Value parameter.')))
|
|---|
| 664 |
|
|---|
| 665 | # If the caller requested emf format for the plot file,
|
|---|
| 666 | # convert the width and height from thousands of an inch to
|
|---|
| 667 | # inches.
|
|---|
| 668 |
|
|---|
| 669 | if outputPlotFile is not None and outputPlotFile.lower().endswith(u'.emf'):
|
|---|
| 670 | width = width / 1000
|
|---|
| 671 | height = height / 1000
|
|---|
| 672 |
|
|---|
| 673 | # Load the fitted model.
|
|---|
| 674 |
|
|---|
| 675 | r = R.GetInterpreter()
|
|---|
| 676 | dataFrameName = None
|
|---|
| 677 | newPredictionsName = None
|
|---|
| 678 |
|
|---|
| 679 | r('load("%s")' % inputModelFile.replace('\\', '\\\\'))
|
|---|
| 680 | try:
|
|---|
| 681 | if not r('exists("model")'):
|
|---|
| 682 | Logger.RaiseException(ValueError(_('The input model file %(file)s does not contain a variable called "model", indicating that it was not generated by the one of the MGET model fitting tools, such as Fit GLM. Please provide a file that was generated by the one of the MGET model fitting tools.') % {u'file': inputModelFile}))
|
|---|
| 683 |
|
|---|
| 684 | # If the model required an R package, load it.
|
|---|
| 685 |
|
|---|
| 686 | if r('exists("rPackage")'):
|
|---|
| 687 | dependency = RPackageDependency(unicode(r['rPackage']))
|
|---|
| 688 | dependency.Initialize()
|
|---|
| 689 |
|
|---|
| 690 | # If the caller specified a evaluation data table, load it
|
|---|
| 691 | # and create new predictions.
|
|---|
| 692 |
|
|---|
| 693 | if r('exists("rPackage")') and r['rPackage'] == 'rpart':
|
|---|
| 694 | if r('model$method').lower() != 'class':
|
|---|
| 695 | Logger.RaiseException(ValueError(_('The input model file %(file)s contains a tree model that was fitted with the "%(method)s" splitting method. Models fitted with that method are not supported by this tool. This tool only supports models fitted with the "class" method.') % {u'file': inputModelFile, u'method': r('model$method')}))
|
|---|
| 696 | if r('length(attr(model, "ylevels"))') != 2:
|
|---|
| 697 | Logger.RaiseException(ValueError(_('The classification tree model in %(file)s contains %(classes)i classes. This tool only supports models with two classes.') % {u'file': inputModelFile, u'classes': r('length(attr(model, "ylevels"))')}))
|
|---|
| 698 |
|
|---|
| 699 | if evaluationDataTable is None:
|
|---|
| 700 | predictedValues = 'as.vector(predict(model, type="prob")[,2])'
|
|---|
| 701 | actualValues = 'as.vector(model$y) - 1'
|
|---|
| 702 | else:
|
|---|
| 703 | dataFrameName, newPredictionsName = cls._PredictionsForArcGISTable(inputModelFile, 'model', evaluationDataTable, where)
|
|---|
| 704 | predictedValues = newPredictionsName
|
|---|
| 705 | actualValues = 'as.vector(unclass(factor(%s$%s, levels=attr(model, "ylevels")))) - 1' % (dataFrameName, r('all.vars(attr(model$terms, "variables")[[2]])[[1]]'))
|
|---|
| 706 |
|
|---|
| 707 | else:
|
|---|
| 708 | if evaluationDataTable is None:
|
|---|
| 709 | predictedValues = 'model$fitted'
|
|---|
| 710 | actualValues = 'model$y'
|
|---|
| 711 | else:
|
|---|
| 712 | dataFrameName, newPredictionsName = cls._PredictionsForArcGISTable(inputModelFile, 'model', evaluationDataTable, where)
|
|---|
| 713 | predictedValues = newPredictionsName
|
|---|
| 714 | actualValues = 'na.omit(%s)$%s' % (dataFrameName, r('all.vars(attr(terms(model$formula), "variables")[[2]])[[1]]'))
|
|---|
| 715 |
|
|---|
| 716 | # Create the plot.
|
|---|
| 717 |
|
|---|
| 718 | R.EvaluateFile(os.path.join(os.path.dirname(sys.modules[cls.__module__].__file__), 'ROCPlotForBinaryModel.r'), False)
|
|---|
| 719 |
|
|---|
| 720 | if cutoffValue is None:
|
|---|
| 721 | cutoffValue = 0.0
|
|---|
| 722 |
|
|---|
| 723 | if title is not None:
|
|---|
| 724 | title = '"' + title.replace('"', '\\"') + '"'
|
|---|
| 725 | else:
|
|---|
| 726 | title = 'NULL'
|
|---|
| 727 |
|
|---|
| 728 | if outputSummaryFile is not None:
|
|---|
| 729 | outputSummaryFile = '"' + outputSummaryFile.replace('\\', '\\\\') + '"'
|
|---|
| 730 | else:
|
|---|
| 731 | outputSummaryFile = 'NULL'
|
|---|
| 732 |
|
|---|
| 733 | if outputPlotFile is None:
|
|---|
| 734 | outputCutoff, tp, fp, fn, tn, auc, mxe, prbe, rmse = r('ROCPlotForBinaryModel(predictedValues=%s, actualValues=%s, cutoff="%s", cutoffValue=%f, colorize=%s, .title=%s, summaryFile=%s)' % (predictedValues, actualValues, cutoff, cutoffValue, str(colorize).upper(), title, outputSummaryFile))
|
|---|
| 735 | else:
|
|---|
| 736 | tempDir = TemporaryDirectory()
|
|---|
| 737 | tempOutputFile = os.path.join(tempDir.Path, u'plot' + os.path.splitext(outputPlotFile)[1])
|
|---|
| 738 | Logger.Info(_(u'Writing plot to %(file)s...') % {u'file': outputPlotFile})
|
|---|
| 739 | outputCutoff, tp, fp, fn, tn, auc, mxe, prbe, rmse = r('ROCPlotForBinaryModelToFile("%s", predictedValues=%s, actualValues=%s, cutoff="%s", cutoffValue=%f, colorize=%s, .title=%s, summaryFile=%s, res=%f, width=%f, height=%f, pointsize=%f, bg="%s")' % (tempOutputFile.replace('\\', '\\\\'), predictedValues, actualValues, cutoff, cutoffValue, str(colorize).upper(), title, outputSummaryFile, res, width, height, pointSize, bg))
|
|---|
| 740 | File.MoveSilent(tempOutputFile, outputPlotFile, overwriteExisting=overwriteExisting)
|
|---|
| 741 |
|
|---|
| 742 | # Delete R variables assigned by this function.
|
|---|
| 743 |
|
|---|
| 744 | finally:
|
|---|
| 745 | r('if (exists("model")) rm("model")')
|
|---|
| 746 | r('if (exists("rPackage")) rm("rPackage")')
|
|---|
| 747 | r('if (exists("xVar")) rm("xVar")')
|
|---|
| 748 | r('if (exists("yVar")) rm("yVar")')
|
|---|
| 749 | r('if (exists("zVar")) rm("zVar")')
|
|---|
| 750 | r('if (exists("mVar")) rm("mVar")')
|
|---|
| 751 | r('if (exists("coordinateSystem")) rm("coordinateSystem")')
|
|---|
| 752 | r('if (exists("f20985982305")) rm("f20985982305", pos=globalenv())')
|
|---|
| 753 | r('if (exists("data20985982305")) rm("data20985982305", pos=globalenv())')
|
|---|
| 754 | r('if (exists("fam20985982305")) rm("fam20985982305", pos=globalenv())')
|
|---|
| 755 | if dataFrameName is not None:
|
|---|
| 756 | r('if (exists("%s")) rm("%s")' % (dataFrameName, dataFrameName))
|
|---|
| 757 | if newPredictionsName is not None:
|
|---|
| 758 | r('if (exists("%s")) rm("%s")' % (newPredictionsName, newPredictionsName))
|
|---|
| 759 |
|
|---|
| 760 | # Return successfully.
|
|---|
| 761 |
|
|---|
| 762 | return outputCutoff, tp, fp, fn, tn, auc, mxe, prbe, rmse
|
|---|
| 763 |
|
|---|
| 764 | @classmethod
|
|---|
| 765 | def _PredictionsForArcGISTable(cls, inputModelFile, modelName, evaluationDataTable, where):
|
|---|
| 766 |
|
|---|
| 767 | # First verify that the table contains all of the fields used
|
|---|
| 768 | # to fit the model.
|
|---|
| 769 |
|
|---|
| 770 | r = R.GetInterpreter()
|
|---|
| 771 | gp = GeoprocessorManager.GetWrappedGeoprocessor()
|
|---|
| 772 |
|
|---|
| 773 | fields = []
|
|---|
| 774 | xColumnName = None
|
|---|
| 775 | yColumnName = None
|
|---|
| 776 | zColumnName = None
|
|---|
| 777 | mColumnName = None
|
|---|
| 778 | d = gp.Describe(evaluationDataTable)
|
|---|
| 779 | gotPointFeatures = hasattr(d, u'ShapeType') and isinstance(d.ShapeType, basestring) and d.ShapeType.lower() == u'point'
|
|---|
| 780 |
|
|---|
| 781 | if r('exists("rPackage")') and r['rPackage'] == 'rpart':
|
|---|
| 782 | allVars = r('all.vars(%s$terms)' % modelName)
|
|---|
| 783 | else:
|
|---|
| 784 | allVars = r('all.vars(%s$formula)' % modelName)
|
|---|
| 785 | if isinstance(allVars, basestring):
|
|---|
| 786 | allVars = [allVars]
|
|---|
| 787 |
|
|---|
| 788 | for var in allVars:
|
|---|
| 789 | conn = ArcGIS91DatabaseConnection()
|
|---|
| 790 | if conn.FieldExists(evaluationDataTable, var):
|
|---|
| 791 | fields.append(var)
|
|---|
| 792 | else:
|
|---|
| 793 | if var == r['xVar']:
|
|---|
| 794 | if gotPointFeatures:
|
|---|
| 795 | xColumnName = var
|
|---|
| 796 | else:
|
|---|
| 797 | Logger.RaiseException(ValueError(_('The input model in %(file)s was fitted to a point feature class and the point X coordinate was used as the variable "%(var)s" in the model, but the evaluation data table %(table)s does not contain point features, nor does it contain a field named "%(var)s". For the evaluation data, you must provide either a point feature class or a table that contains a field named "%(var)s".') % {u'file': inputModelFile, u'table': evaluationDataTable, u'var': var}))
|
|---|
| 798 | elif var == r['yVar']:
|
|---|
| 799 | if gotPointFeatures:
|
|---|
| 800 | yColumnName = var
|
|---|
| 801 | else:
|
|---|
| 802 | Logger.RaiseException(ValueError(_('The input model in %(file)s was fitted to a point feature class and the point Y coordinate was used as the variable "%(var)s" in the model, but the evaluation data table %(table)s does not contain point features, nor does it contain a field named "%(var)s". For the evaluation data, you must provide either a point feature class or a table that contains a field named "%(var)s".') % {u'file': inputModelFile, u'table': evaluationDataTable, u'var': var}))
|
|---|
| 803 | elif var == r['zVar']:
|
|---|
| 804 | if gotPointFeatures and d.HasZ:
|
|---|
| 805 | zColumnName = var
|
|---|
| 806 | elif gotPointFeatures:
|
|---|
| 807 | Logger.RaiseException(ValueError(_('The input model in %(file)s was fitted to a point feature class and the point Z coordinate was used as the variable "%(var)s" in the model, but the point feature class %(table)s provided as the evaluation data table does not have Z coordinates, nor does it contain a field named "%(var)s". For the evaluation data, you must provide either a point feature class that has Z values or a table that contains a field named "%(var)s".') % {u'file': inputModelFile, u'table': evaluationDataTable, u'var': var}))
|
|---|
| 808 | else:
|
|---|
| 809 | Logger.RaiseException(ValueError(_('The input model in %(file)s was fitted to a point feature class and the point Z coordinate was used as the variable "%(var)s" in the model, but the evaluation data table %(table)s does not contain point features, nor does it contain a field named "%(var)s". For the evaluation data, you must provide either a point feature class that has Z values or a table that contains a field named "%(var)s".') % {u'file': inputModelFile, u'table': evaluationDataTable, u'var': var}))
|
|---|
| 810 | elif var == r['mVar']:
|
|---|
| 811 | if gotPointFeatures and d.HasM:
|
|---|
| 812 | mColumnName = var
|
|---|
| 813 | elif gotPointFeatures:
|
|---|
| 814 | Logger.RaiseException(ValueError(_('The input model in %(file)s was fitted to a point feature class and the point M value was used as the variable "%(var)s" in the model, but the point feature class %(table)s provided as the evaluation data table does not have M values, nor does it contain a field named "%(var)s". For the evaluation data, you must provide either a point feature class that has M values or a table that contains a field named "%(var)s".') % {u'file': inputModelFile, u'table': evaluationDataTable, u'var': var}))
|
|---|
| 815 | else:
|
|---|
| 816 | Logger.RaiseException(ValueError(_('The input model in %(file)s was fitted to a point feature class and the point M value was used as the variable "%(var)s" in the model, but the evaluation data table %(table)s does not contain point features, nor does it contain a field named "%(var)s". For the evaluation data, you must provide either a point feature class that has M values or a table that contains a field named "%(var)s".') % {u'file': inputModelFile, u'table': evaluationDataTable, u'var': var}))
|
|---|
| 817 | else:
|
|---|
| 818 | Logger.RaiseException(ValueError(_('The input model in %(file)s was fitted to a table containing a field named "%(var)s" but the evaluation data table %(table)s does not contain this field. You must provide a table that contains this field.') % {u'file': inputModelFile, u'table': evaluationDataTable, u'var': var}))
|
|---|
| 819 |
|
|---|
| 820 | # Load the table into a temporary data frame.
|
|---|
| 821 |
|
|---|
| 822 | dataFrameName = R.GetUniqueVariableName()
|
|---|
| 823 | R.LoadDataFrameFromArcGISTable(evaluationDataTable, dataFrameName, fields=fields, where=where, xColumnName=xColumnName, yColumnName=yColumnName, zColumnName=zColumnName, mColumnName=mColumnName)
|
|---|
| 824 |
|
|---|
| 825 | if r('length(%s)' % dataFrameName) <= 0:
|
|---|
| 826 | if where is not None:
|
|---|
| 827 | Logger.RaiseException(ValueError(_(u'The where clause "%(where)s" did not select any rows from the table %(table)s.') % {u'table': evaluationDataTable, u'where': where}))
|
|---|
| 828 | else:
|
|---|
| 829 | Logger.RaiseException(ValueError(_(u'The table %(table)s is empty.') % {u'table': evaluationDataTable}))
|
|---|
| 830 |
|
|---|
| 831 | # Create predictions from the evaluation data.
|
|---|
| 832 |
|
|---|
| 833 | try:
|
|---|
| 834 | Logger.Info(_(u'Predicting the response for the evaluation data...'))
|
|---|
| 835 | r('if (exists("rPackage")) library(rPackage, character.only=TRUE)')
|
|---|
| 836 | newPredictionsName = R.GetUniqueVariableName()
|
|---|
| 837 |
|
|---|
| 838 | if r('exists("rPackage")') and r['rPackage'] == 'rpart':
|
|---|
| 839 | r('%s <- as.vector(predict(%s, newdata=%s, type="prob")[,2])' % (newPredictionsName, modelName, dataFrameName))
|
|---|
| 840 | else:
|
|---|
| 841 | r('%s <- as.vector(predict(%s, newdata=na.omit(%s), type="response"))' % (newPredictionsName, modelName, dataFrameName))
|
|---|
| 842 | except:
|
|---|
| 843 | r('if (exists("%s")) rm("%s")' % (dataFrameName, dataFrameName))
|
|---|
| 844 | r('if (exists("%s")) rm("%s")' % (newPredictionsName, newPredictionsName))
|
|---|
| 845 | raise
|
|---|
| 846 |
|
|---|
| 847 | # Return the names of the R variables we created for the data
|
|---|
| 848 | # frame and the predictions.
|
|---|
| 849 |
|
|---|
| 850 | return dataFrameName, newPredictionsName
|
|---|
| 851 |
|
|---|
| 852 | @classmethod
|
|---|
| 853 | def RandomlySplitArcGISTableIntoTrainingAndEvaluationRecords(cls, table, percentEvaluation=33.33333, field=u'IsEvalData', where=None):
|
|---|
| 854 | cls.__doc__.Obj.ValidateMethodInvocation()
|
|---|
| 855 |
|
|---|
| 856 | # If the field already exists, validate that it is the correct
|
|---|
| 857 | # type.
|
|---|
| 858 |
|
|---|
| 859 | conn = ArcGIS91DatabaseConnection()
|
|---|
| 860 | dataType = conn.GetFieldDataType(table, field)
|
|---|
| 861 | if dataType is not None:
|
|---|
| 862 | if dataType not in [u'SHORT', u'LONG', u'FLOAT', u'DOUBLE']:
|
|---|
| 863 | Logger.RaiseException(ValueError(_(u'Existing field %(field)s of table %(table)s cannot be used because it has the data type %(dt)s. Please specify a field that has the data type SHORT, LONG, FLOAT, or DOUBLE.') % {u'field': field, u'table': table, u'dt': dataType}))
|
|---|
| 864 |
|
|---|
| 865 | # If the field does not already exist, create it.
|
|---|
| 866 |
|
|---|
| 867 | else:
|
|---|
| 868 | conn.AddField(table, field, u'SHORT')
|
|---|
| 869 | Logger.Info(_(u'Added field %(field)s to table %(table)s.') % {u'field': field, u'table': table})
|
|---|
| 870 |
|
|---|
| 871 | # If the caller provided a where clause, create a table view.
|
|---|
| 872 |
|
|---|
| 873 | gp = GeoprocessorManager.GetWrappedGeoprocessor()
|
|---|
| 874 |
|
|---|
| 875 | if where is None:
|
|---|
| 876 | tableView = table
|
|---|
| 877 | else:
|
|---|
| 878 | tableView = GeoprocessorManager.GetUniqueLayerName()
|
|---|
| 879 | gp.MakeTableView_management(table, tableView, where)
|
|---|
| 880 | try:
|
|---|
| 881 |
|
|---|
| 882 | # If there were no rows, warn the user.
|
|---|
| 883 |
|
|---|
| 884 | totalRows = conn.GetRowCount(tableView)
|
|---|
| 885 | if totalRows <= 0:
|
|---|
| 886 | if where is not None:
|
|---|
| 887 | Logger.Warning(_(u'No rows of table %(table)s were changed because the where clause "%(where)s" did not select any rows from the table.') % {u'table': table, u'where': where})
|
|---|
| 888 | else:
|
|---|
| 889 | Logger.Warning(_(u'Table %(table)s is empty.') % {u'table': table})
|
|---|
| 890 |
|
|---|
| 891 | # Otherwise, compute the number of rows that should be
|
|---|
| 892 | # designated training and evaluation, create a shuffled
|
|---|
| 893 | # list of 0 and 1 values to draw from, and designate the
|
|---|
| 894 | # rows.
|
|---|
| 895 |
|
|---|
| 896 | else:
|
|---|
| 897 | evaluationRows = int(round(totalRows * percentEvaluation / 100.0))
|
|---|
| 898 | trainingRows = totalRows - evaluationRows
|
|---|
| 899 | values = [0] * trainingRows + [1] * evaluationRows
|
|---|
| 900 | random.shuffle(values)
|
|---|
| 901 |
|
|---|
| 902 | Logger.Info(_(u'Updating table %(table)s: designating %(t)i rows as training records and %(e)i rows as evaluation records...') % {u'table': table, u't': trainingRows, u'e': evaluationRows})
|
|---|
| 903 |
|
|---|
| 904 | i = 0
|
|---|
| 905 | cur = conn.OpenUpdateCursor(tableView, rowCount=totalRows)
|
|---|
| 906 | try:
|
|---|
| 907 | while cur.NextRow():
|
|---|
| 908 | cur.SetValue(field, values[i])
|
|---|
| 909 | cur.UpdateRow()
|
|---|
| 910 | i += 1
|
|---|
| 911 | if i >= len(values): # This should never happen
|
|---|
| 912 | break
|
|---|
| 913 | finally:
|
|---|
| 914 | del cur
|
|---|
| 915 |
|
|---|
| 916 | # Delete the table view, if we created one.
|
|---|
| 917 |
|
|---|
| 918 | finally:
|
|---|
| 919 | try:
|
|---|
| 920 | if tableView != table and GeoprocessorManager.GetArcGISMajorVersion() == 9 and GeoprocessorManager.GetArcGISMajorVersion() == 1:
|
|---|
| 921 | gp.Delete(tableView)
|
|---|
| 922 | except:
|
|---|
| 923 | pass
|
|---|
| 924 |
|
|---|
| 925 | # Return successfully.
|
|---|
| 926 |
|
|---|
| 927 | return table
|
|---|
| 928 |
|
|---|
| 929 |
|
|---|
| 930 | # Private helper functions
|
|---|
| 931 |
|
|---|
| 932 | def _LoadDataFrameForModelFitting(inputTable, formulas, formulaIsRequired, family, link, variance, theta, where, xColumnName, yColumnName, zColumnName, mColumnName, extraFieldsToLoad=None):
|
|---|
| 933 |
|
|---|
| 934 | # Determine if the input table is a point feature class.
|
|---|
| 935 |
|
|---|
| 936 | gp = GeoprocessorManager.GetWrappedGeoprocessor()
|
|---|
| 937 | d = gp.Describe(inputTable)
|
|---|
| 938 | gotPointFeatures = hasattr(d, u'ShapeType') and isinstance(d.ShapeType, basestring) and d.ShapeType.lower() == u'point'
|
|---|
| 939 |
|
|---|
| 940 | # Create a list of all of the fields in the formula.
|
|---|
| 941 |
|
|---|
| 942 | r = R.GetInterpreter()
|
|---|
| 943 | fields = set()
|
|---|
| 944 |
|
|---|
| 945 | for name, formula in formulas.items():
|
|---|
| 946 | if formula is not None:
|
|---|
| 947 | terms = r('all.vars(%s)' % formula)
|
|---|
| 948 | if isinstance(terms, types.ListType):
|
|---|
| 949 | fields = fields.union(set(terms))
|
|---|
| 950 | elif isinstance(terms, basestring):
|
|---|
| 951 | fields = fields.union(set([terms]))
|
|---|
| 952 | elif formulaIsRequired[name]:
|
|---|
| 953 | Logger.RaiseException(ValueError(_(u'The %(name)s "%(formula)s" did not include any terms. Please provide a %(name)s that includes at least one term.') % {u'name': name, u'formula': formula}))
|
|---|
| 954 |
|
|---|
| 955 | if extraFieldsToLoad is not None:
|
|---|
| 956 | fields = fields.union(set(extraFieldsToLoad))
|
|---|
| 957 |
|
|---|
| 958 | fields = list(fields)
|
|---|
| 959 |
|
|---|
| 960 | # If the caller provided an X, Y, Z, or M column name and it
|
|---|
| 961 | # appears in a formula, report an error if the input table is not
|
|---|
| 962 | # a point feature class.
|
|---|
| 963 |
|
|---|
| 964 | if xColumnName is not None and xColumnName in fields:
|
|---|
| 965 | if gotPointFeatures:
|
|---|
| 966 | if xColumnName == u'x' or xColumnName == u'X':
|
|---|
| 967 | Logger.RaiseException(ValueError(_(u'The variable representing the X coordinates of point features cannot be named "x". We apologize for this inconvenient restriction. Please specify a different name.'))) # Imposed by rgdal
|
|---|
| 968 | fields.remove(xColumnName)
|
|---|
| 969 | else:
|
|---|
| 970 | Logger.RaiseException(ValueError(_(u'The variable "%(var)s" is designated as the X coordinate of point features, but the input table "%(table)s" does not contain point features. Please specify a table that includes point features or remove this variable from the model.') % {u'var': xColumnName, u'table': inputTable}))
|
|---|
| 971 | else:
|
|---|
| 972 | xColumnName = None
|
|---|
| 973 |
|
|---|
| 974 | if yColumnName is not None and yColumnName in fields:
|
|---|
| 975 | if gotPointFeatures:
|
|---|
| 976 | if yColumnName == u'y' or yColumnName == u'Y':
|
|---|
| 977 | Logger.RaiseException(ValueError(_(u'The variable representing the Y coordinates of point features cannot be named "y". We apologize for this inconvenient restriction. Please specify a different name.'))) # Imposed by rgdal
|
|---|
| 978 | fields.remove(yColumnName)
|
|---|
| 979 | else:
|
|---|
| 980 | Logger.RaiseException(ValueError(_(u'The variable "%(var)s" is designated as the Y coordinate of point features, but the input table "%(table)s" does not contain point features. Please specify a table that includes point features or remove this variable from the model.') % {u'var': yColumnName, u'table': inputTable}))
|
|---|
| 981 | else:
|
|---|
| 982 | yColumnName = None
|
|---|
| 983 |
|
|---|
| 984 | if zColumnName is not None and zColumnName in fields:
|
|---|
| 985 | if gotPointFeatures:
|
|---|
| 986 | if d.HasZ:
|
|---|
| 987 | fields.remove(zColumnName)
|
|---|
| 988 | else:
|
|---|
| 989 | Logger.RaiseException(ValueError(_(u'The variable "%(var)s" is designated as the Z coordinate of point features, but the feature class "%(table)s" does not have Z coordinates. Please specify a feature class that includes Z coordinates or remove this variable from the model.') % {u'var': zColumnName, u'table': inputTable}))
|
|---|
| 990 | else:
|
|---|
| 991 | Logger.RaiseException(ValueError(_(u'The variable "%(var)s" is designated as the X coordinate of point features, but the input table "%(table)s" does not contain point features. Please specify a table that includes point features or remove this variable from the model.') % {u'var': zColumnName, u'table': inputTable}))
|
|---|
| 992 | else:
|
|---|
| 993 | zColumnName = None
|
|---|
| 994 |
|
|---|
| 995 | if mColumnName is not None and mColumnName in fields:
|
|---|
| 996 | if gotPointFeatures:
|
|---|
| 997 | if d.HasZ:
|
|---|
| 998 | fields.remove(mColumnName)
|
|---|
| 999 | else:
|
|---|
| 1000 | Logger.RaiseException(ValueError(_(u'The variable "%(var)s" is designated as the M value of point features, but the feature class "%(table)s" does not have M values. Please specify a feature class that includes M values or remove this variable from the model.') % {u'var': mColumnName, u'table': inputTable}))
|
|---|
| 1001 | else:
|
|---|
| 1002 | Logger.RaiseException(ValueError(_(u'The variable "%(var)s" is designated as the M value of point features, but the input table "%(table)s" does not contain point features. Please specify a table that includes point features or remove this variable from the model.') % {u'var': mColumnName, u'table': inputTable}))
|
|---|
| 1003 | else:
|
|---|
| 1004 | mColumnName = None
|
|---|
| 1005 |
|
|---|
| 1006 | if u'x' in fields or u'X' in fields or u'y' in fields or u'Y' in fields:
|
|---|
| 1007 | Logger.RaiseException(ValueError(_(u'The model cannot reference a field named "x" or "y". We apologize for this inconvenient restriction. To use this field in the model, rename it to something other than "x" or "y".'))) # Imposed by rgdal
|
|---|
| 1008 |
|
|---|
| 1009 | # Load the table into an R data frame.
|
|---|
| 1010 |
|
|---|
| 1011 | dataFrameName = R.GetUniqueVariableName()
|
|---|
| 1012 | R.LoadDataFrameFromArcGISTable(inputTable, dataFrameName, fields=fields, where=where, xColumnName=xColumnName, yColumnName=yColumnName, zColumnName=zColumnName, mColumnName=mColumnName)
|
|---|
| 1013 | try:
|
|---|
| 1014 |
|
|---|
| 1015 | # Validate that the data frame contains at least one row.
|
|---|
| 1016 |
|
|---|
| 1017 | if r('length(%s)' % dataFrameName) <= 0:
|
|---|
| 1018 | if where is not None:
|
|---|
| 1019 | Logger.RaiseException(ValueError(_(u'The where clause "%(where)s" did not select any rows from the table %(table)s.') % {u'table': inputTable, u'where': where}))
|
|---|
| 1020 | else:
|
|---|
| 1021 | Logger.RaiseException(ValueError(_(u'The table %(table)s is empty.') % {u'table': inputTable}))
|
|---|
| 1022 |
|
|---|
| 1023 | # Prepare the column names to be passed as parameters to the R
|
|---|
| 1024 | # model-fitting function.
|
|---|
| 1025 |
|
|---|
| 1026 | if xColumnName is not None:
|
|---|
| 1027 | xColumnParam = u'"' + xColumnName + u'"'
|
|---|
| 1028 | else:
|
|---|
| 1029 | xColumnParam = u'NULL'
|
|---|
| 1030 |
|
|---|
| 1031 | if yColumnName is not None:
|
|---|
| 1032 | yColumnParam = u'"' + yColumnName + u'"'
|
|---|
| 1033 | else:
|
|---|
| 1034 | yColumnParam = u'NULL'
|
|---|
| 1035 |
|
|---|
| 1036 | if zColumnName is not None:
|
|---|
| 1037 | zColumnParam = u'"' + zColumnName + u'"'
|
|---|
| 1038 | else:
|
|---|
| 1039 | zColumnParam = u'NULL'
|
|---|
| 1040 |
|
|---|
| 1041 | if mColumnName is not None:
|
|---|
| 1042 | mColumnParam = u'"' + mColumnName + u'"'
|
|---|
| 1043 | else:
|
|---|
| 1044 | mColumnParam = u'NULL'
|
|---|
| 1045 |
|
|---|
| 1046 | # If the input table is a point feature class, prepare its
|
|---|
| 1047 | # coordinate system string to be passed as a parameter to the
|
|---|
| 1048 | # R model-fitting function.
|
|---|
| 1049 |
|
|---|
| 1050 | if gotPointFeatures:
|
|---|
| 1051 | coordinateSystemParam = u'"' + gp.CreateSpatialReference_management(d.SpatialReference).split(u';')[0] + u'"'
|
|---|
| 1052 | else:
|
|---|
| 1053 | coordinateSystemParam = u'NULL'
|
|---|
| 1054 |
|
|---|
| 1055 | # Create a family string to be passed as a parameter to the R
|
|---|
| 1056 | # model-fitting function.
|
|---|
| 1057 |
|
|---|
| 1058 | if family is not None:
|
|---|
| 1059 | if link is None:
|
|---|
| 1060 | if family == u'binomial':
|
|---|
| 1061 | link = u'logit'
|
|---|
| 1062 | elif family == u'gaussian':
|
|---|
| 1063 | link = u'identity'
|
|---|
| 1064 | elif family == u'Gamma':
|
|---|
| 1065 | link = u'inverse'
|
|---|
| 1066 | elif family == u'inverse.gaussian':
|
|---|
| 1067 | link = u'1/mu^2'
|
|---|
| 1068 | elif family == u'negbin':
|
|---|
| 1069 | link = u'log'
|
|---|
| 1070 | elif family == u'poisson':
|
|---|
| 1071 | link = u'log'
|
|---|
| 1072 | elif family == u'quasi':
|
|---|
| 1073 | link = u'identity'
|
|---|
| 1074 | elif family == u'quasibinomial':
|
|---|
| 1075 | link = u'logit'
|
|---|
| 1076 | elif family == u'quasipoisson':
|
|---|
| 1077 | link = u'log'
|
|---|
| 1078 | else:
|
|---|
| 1079 | Logger.RaiseException(_(u'Programming error in this tool: unknown model family "%s". Please contact the author of this tool for assistance.') % family)
|
|---|
| 1080 |
|
|---|
| 1081 | if family == u'negbin':
|
|---|
| 1082 | familyParam = '%s(theta=%s, link="%s")' % (family, theta, link)
|
|---|
| 1083 |
|
|---|
| 1084 | elif family == u'quasi':
|
|---|
| 1085 | if variance is None:
|
|---|
| 1086 | variance = u'constant'
|
|---|
| 1087 | familyParam = '%s(link="%s", variance="%s")' % (family, link, variance)
|
|---|
| 1088 |
|
|---|
| 1089 | else:
|
|---|
| 1090 | familyParam = '%s(link="%s")' % (family, link)
|
|---|
| 1091 |
|
|---|
| 1092 | else:
|
|---|
| 1093 | familyParam = None
|
|---|
| 1094 |
|
|---|
| 1095 | except:
|
|---|
| 1096 | r('rm("%s")' % dataFrameName)
|
|---|
| 1097 | raise
|
|---|
| 1098 |
|
|---|
| 1099 | # Return successfully.
|
|---|
| 1100 |
|
|---|
| 1101 | return dataFrameName, xColumnParam, yColumnParam, zColumnParam, mColumnParam, coordinateSystemParam, familyParam
|
|---|
| 1102 |
|
|---|
| 1103 | def _PredictFromArcGISRasters(modelType, fitToolName, inputModelFile, inputPredictorRasters, variableNames, outputResponseRaster, templateRaster, resamplingTechniques, ignoreOutOfRangeValues, outputErrorRaster, outputBinaryResponseRaster, cutoff, buildPyramids, overwriteExisting):
|
|---|
| 1104 |
|
|---|
| 1105 | # Perform additional parameter validation.
|
|---|
| 1106 |
|
|---|
| 1107 | gp = GeoprocessorManager.GetWrappedGeoprocessor()
|
|---|
| 1108 |
|
|---|
| 1109 | for raster in inputPredictorRasters:
|
|---|
| 1110 | d = gp.Describe(raster)
|
|---|
| 1111 | if d.SpatialReference.Name is None or d.SpatialReference.Name.lower() == u'unknown':
|
|---|
| 1112 | Logger.RaiseException(ValueError(_(u'The predictor raster %(raster)s does not have a coordinate system defined. Please define the coordinate system for this raster and try again. (Use the ArcGIS Define Projection tool.)') % {u'raster': raster}))
|
|---|
| 1113 |
|
|---|
| 1114 | if outputResponseRaster.lower() in map(lambda s: unicode(s).lower(), inputPredictorRasters):
|
|---|
| 1115 | Logger.RaiseException(ValueError(_(u'The output response raster %(out)s also appears in the list of input predictor rasters. This is not allowed. Please specify a different output response raster or remove it from the list of input predictor rasters.') % {u'out': outputResponseRaster}))
|
|---|
| 1116 |
|
|---|
| 1117 | if outputErrorRaster is not None and outputErrorRaster.lower() in map(lambda s: unicode(s).lower(), inputPredictorRasters):
|
|---|
| 1118 | Logger.RaiseException(ValueError(_(u'The output standard error raster %(out)s also appears in the list of input predictor rasters. This is not allowed. Please specify a different output standard error raster or remove it from the list of input predictor rasters.') % {u'out': outputResponseRaster}))
|
|---|
| 1119 |
|
|---|
| 1120 | if outputBinaryResponseRaster is not None and cutoff is None:
|
|---|
| 1121 | Logger.RaiseException(ValueError(_(u'To create a binary response raster, you must also supply a value for the Cutoff parameter.')))
|
|---|
| 1122 |
|
|---|
| 1123 | # Look up the coordinate system, cell size, and extent of the
|
|---|
| 1124 | # template raster.
|
|---|
| 1125 |
|
|---|
| 1126 | if templateRaster is None:
|
|---|
| 1127 | templateRaster = inputPredictorRasters[0]
|
|---|
| 1128 |
|
|---|
| 1129 | templateDescribe = gp.Describe(templateRaster)
|
|---|
| 1130 | if templateDescribe.SpatialReference.Name is None or templateDescribe.SpatialReference.Name.lower() == u'unknown':
|
|---|
| 1131 | Logger.RaiseException(ValueError(_(u'The template raster %(raster)s does not have a coordinate system defined. Please define the coordinate system for this raster and try again. (Use the ArcGIS Define Projection tool.)') % {u'raster': templateRaster}))
|
|---|
| 1132 |
|
|---|
| 1133 | coordinateSystem = gp.CreateSpatialReference(templateDescribe.SpatialReference).split(';')[0]
|
|---|
| 1134 | cellSize = templateDescribe.MeanCellWidth
|
|---|
| 1135 | extent = templateDescribe.Extent
|
|---|
| 1136 |
|
|---|
| 1137 | # Load the fitted model.
|
|---|
| 1138 |
|
|---|
| 1139 | r = R.GetInterpreter()
|
|---|
| 1140 |
|
|---|
| 1141 | r('load("%s")' % inputModelFile.replace('\\', '\\\\'))
|
|---|
| 1142 | try:
|
|---|
| 1143 | if not r('exists("model")'):
|
|---|
| 1144 | Logger.RaiseException(ValueError(_('The input model file %(file)s does not contain a variable called "model", indicating that it was not generated by the Fit %(type)s tool. Please provide a file that was generated by the %(fitToolName)s tool.') % {u'file': inputModelFile, u'fitToolName': fitToolName}))
|
|---|
| 1145 | if modelType.lower() not in r('class(model)'):
|
|---|
| 1146 | Logger.RaiseException(ValueError(_('The input model file %(file)s contains a variable called "model" but it is not a %(type)s, indicating that it was not generated by the %(fitToolName)s tool. Please provide a file that was generated by the %(fitToolName)s tool.') % {u'file': inputModelFile, u'type': modelType, u'fitToolName': fitToolName}))
|
|---|
| 1147 |
|
|---|
| 1148 | if r('exists("rPackage")'):
|
|---|
| 1149 | Logger.Info(_(u'Loaded %(type)s from %(file)s. The model was fitted with the R %(pkg)s package.') % {u'type': r('class(model)[1]').upper(), u'file': inputModelFile, u'pkg': r['rPackage']})
|
|---|
| 1150 | else:
|
|---|
| 1151 | Logger.Info(_(u'Loaded %(type)s from %(file)s.') % {u'type': r('class(model)[1]').upper(), u'file': inputModelFile, })
|
|---|
| 1152 |
|
|---|
| 1153 | # If the model required an R package, load it.
|
|---|
| 1154 |
|
|---|
| 1155 | if r('exists("rPackage")'):
|
|---|
| 1156 | dependency = RPackageDependency(unicode(r['rPackage']))
|
|---|
| 1157 | dependency.Initialize()
|
|---|
| 1158 |
|
|---|
| 1159 | # If it is a GAM fitted by the R gam package, report an error
|
|---|
| 1160 | # if the caller specified an outputErrorRaster, because that
|
|---|
| 1161 | # package's predict.gam function cannot generate predictions
|
|---|
| 1162 | # from the newdata parameter.
|
|---|
| 1163 |
|
|---|
| 1164 | if outputErrorRaster is not None and modelType.lower() == 'gam' and r['rPackage'] == 'gam':
|
|---|
| 1165 | Logger.RaiseException(ValueError(_('The input model from file %(file)s is a GAM that was fitted using the R gam package. Because the gam package cannot return standard errors for responses predicted from data other than those used to fit the model, a standard error raster cannot be produced. Please do not specify a standard error raster, or, if you must have one, re-fit the GAM using the R mgcv package.') % {u'file': inputModelFile}))
|
|---|
| 1166 |
|
|---|
| 1167 | # If it is a GAM fitted by the R gam package, we have to work
|
|---|
| 1168 | # around a bug in predict.gam. The situation is described in
|
|---|
| 1169 | # more detail in a message to the R help mailing list titled
|
|---|
| 1170 | # "use of step.gam (from package 'gam') and superassignment
|
|---|
| 1171 | # inside functions" dated 28-Feb-2008 (see
|
|---|
| 1172 | # https://stat.ethz.ch/pipermail/r-help/2008-February/155586.html).
|
|---|
| 1173 | # Define uniquely-named global variables for the formula, data
|
|---|
| 1174 | # and family parameters. This is very hacky but it is the only
|
|---|
| 1175 | # way I could get it working. Note the use of the <<-
|
|---|
| 1176 | # operator. Hopefully these names are unique.
|
|---|
| 1177 |
|
|---|
| 1178 | if modelType.lower() == 'gam' and r['rPackage'] == 'gam':
|
|---|
| 1179 | r('f20985982305 <<- model$formula')
|
|---|
| 1180 | r('data20985982305 <<- model$data')
|
|---|
| 1181 | r('fam20985982305 <<- model$family')
|
|---|
| 1182 |
|
|---|
| 1183 | # Log a summery of the model, to remind the user what it is.
|
|---|
| 1184 |
|
|---|
| 1185 | if r('exists("rPackage")') and r['rPackage'] != 'rpart':
|
|---|
| 1186 | R.EvaluateFile(os.path.join(os.path.dirname(sys.modules[GLM.__module__].__file__), 'SummarizeModel.r'), False)
|
|---|
| 1187 |
|
|---|
| 1188 | r('writeLines("")')
|
|---|
| 1189 | r('writeLines("MODEL SUMMARY:")')
|
|---|
| 1190 | r('writeLines("==============")')
|
|---|
| 1191 | r('writeLines(SummarizeModel(model))')
|
|---|
| 1192 | r('writeLines("")')
|
|---|
| 1193 |
|
|---|
| 1194 | # If the caller provided a non-rpart, non-lme model without any
|
|---|
| 1195 | # predictor variables, then the predicted response and
|
|---|
| 1196 | # standard errors will be constant values. Report a warning
|
|---|
| 1197 | # and create constant rasters having these values.
|
|---|
| 1198 |
|
|---|
| 1199 | if (not r('exists("rPackage")') or r['rPackage'] not in ['rpart', 'nlme']) and isinstance(r('all.vars(model$formula)'), basestring): # If only one term is in the model (i.e. the response variable), rpy will return it as a string, rather than a list with one string in it.
|
|---|
| 1200 | if outputErrorRaster is not None:
|
|---|
| 1201 | Logger.Warning(_(u'The model\'s formula does not contain any predictor variables. Because of this, the output response and standard error rasters will contain constant values.'))
|
|---|
| 1202 | else:
|
|---|
| 1203 | Logger.Warning(_(u'The model\'s formula does not contain any predictor variables. Because of this, the output response raster will contain a constant value.'))
|
|---|
| 1204 |
|
|---|
| 1205 | responseValue = r('predict(model, newdata=data.frame(0), type="response")')
|
|---|
| 1206 | gp.CreateRandomRaster_management(os.path.dirname(outputResponseRaster), os.path.basename(outputResponseRaster), u'UNIFORM %g %g' % (responseValue, responseValue), extent, cellSize)
|
|---|
| 1207 | gp.DefineProjection_management(outputResponseRaster, coordinateSystem)
|
|---|
| 1208 | if buildPyramids:
|
|---|
| 1209 | gp.BuildPyramids_Management(outputResponseRaster)
|
|---|
| 1210 |
|
|---|
| 1211 | if outputErrorRaster is not None:
|
|---|
| 1212 | errorValue = r('predict(model, newdata=data.frame(0), type="response", se.fit=TRUE)$se.fit')
|
|---|
| 1213 | gp.CreateRandomRaster_management(os.path.dirname(outputErrorRaster), os.path.basename(outputErrorRaster), u'UNIFORM %g %g' % (errorValue, errorValue), extent, cellSize)
|
|---|
| 1214 | gp.DefineProjection_management(outputErrorRaster, coordinateSystem)
|
|---|
| 1215 | if buildPyramids:
|
|---|
| 1216 | gp.BuildPyramids_Management(outputResponseRaster)
|
|---|
| 1217 |
|
|---|
| 1218 | # Otherwise, the model contains at least one predictor
|
|---|
| 1219 | # variable, do the prediction.
|
|---|
| 1220 |
|
|---|
| 1221 | else:
|
|---|
| 1222 | tempDir = TemporaryDirectory()
|
|---|
| 1223 |
|
|---|
| 1224 | # First prepare the input rasters for the prediction.
|
|---|
| 1225 |
|
|---|
| 1226 | _PreparePredictorRasters(r, gp, tempDir, variableNames, inputPredictorRasters, templateRaster, templateDescribe, coordinateSystem, cellSize, extent, resamplingTechniques)
|
|---|
| 1227 |
|
|---|
| 1228 | # Do the prediction. This creates files in GDAL-compatible
|
|---|
| 1229 | # .bil format.
|
|---|
| 1230 | #
|
|---|
| 1231 | # Force the logging system to log R Info messages as
|
|---|
| 1232 | # Debug so that the user is not spammed with "Closing
|
|---|
| 1233 | # GDAL dataset handle" messages. There appears to be
|
|---|
| 1234 | # no way to suppress these messages from within R.
|
|---|
| 1235 |
|
|---|
| 1236 | Logger.Info(_(u'Predicting...'))
|
|---|
| 1237 |
|
|---|
| 1238 | R.EvaluateFile(os.path.join(os.path.dirname(sys.modules[GLM.__module__].__file__), 'Utils.r'), False)
|
|---|
| 1239 | R.EvaluateFile(os.path.join(os.path.dirname(sys.modules[GLM.__module__].__file__), 'PredictLMForArcGISRasters.r'), False)
|
|---|
| 1240 |
|
|---|
| 1241 | oldLoggingLevel = r.LogInfoAsDebug
|
|---|
| 1242 | r.LogInfoAsDebug = True
|
|---|
| 1243 | try:
|
|---|
| 1244 | if r('exists("rPackage")'):
|
|---|
| 1245 | rPackage = '"' + r['rPackage'] + '"'
|
|---|
| 1246 | else:
|
|---|
| 1247 | rPackage = 'NULL'
|
|---|
| 1248 |
|
|---|
| 1249 | tempResponseFile = os.path.join(tempDir.Path, u'temp_response')
|
|---|
| 1250 |
|
|---|
| 1251 | if outputErrorRaster is None:
|
|---|
| 1252 | r('PredictLMForArcGISRasters(model, %s, rastersForPredictors, "%s", ignoreOutOfRangeValues=%s)' % (rPackage, tempResponseFile.replace('\\', '\\\\'), str(ignoreOutOfRangeValues).upper()))
|
|---|
| 1253 | else:
|
|---|
| 1254 | tempErrorFile = os.path.join(tempDir.Path, u'temp_standarderror')
|
|---|
| 1255 | r('PredictLMForArcGISRasters(model, %s, rastersForPredictors, "%s", "%s", ignoreOutOfRangeValues=%s)' % (rPackage, tempResponseFile.replace('\\', '\\\\'), tempErrorFile.replace('\\', '\\\\'), str(ignoreOutOfRangeValues).upper()))
|
|---|
| 1256 | finally:
|
|---|
| 1257 | r.LogInfoAsDebug = oldLoggingLevel
|
|---|
| 1258 |
|
|---|
| 1259 | # Copy the .bil files to the destination rasters.
|
|---|
| 1260 |
|
|---|
| 1261 | Logger.Info(_(u'Creating outputs...'))
|
|---|
| 1262 |
|
|---|
| 1263 | from GeoEco.Datasets import Dataset
|
|---|
| 1264 | from GeoEco.Datasets.GDAL import GDALDataset
|
|---|
| 1265 | from GeoEco.Datasets.ArcGIS import ArcGISWorkspace, ArcGISRaster
|
|---|
| 1266 |
|
|---|
| 1267 | bilFile = GDALDataset(tempResponseFile + '.bil', lazyPropertyValues={'SpatialReference': Dataset.ConvertSpatialReference('arcgis', coordinateSystem, 'obj')})
|
|---|
| 1268 | workspace = ArcGISWorkspace(os.path.dirname(outputResponseRaster), ArcGISRaster, pathCreationExpressions=[os.path.basename(outputResponseRaster)])
|
|---|
| 1269 | try:
|
|---|
| 1270 | workspace.ImportDatasets(bilFile.QueryDatasets(reportProgress=False), {False: 'Add', True: 'Replace'}[overwriteExisting], reportProgress=False, calculateStatistics=True, buildPyramids=buildPyramids)
|
|---|
| 1271 | finally:
|
|---|
| 1272 | del bilFile
|
|---|
| 1273 | del workspace
|
|---|
| 1274 |
|
|---|
| 1275 | if outputErrorRaster is not None:
|
|---|
| 1276 | bilFile = GDALDataset(tempErrorFile + '.bil', lazyPropertyValues={'SpatialReference': Dataset.ConvertSpatialReference('arcgis', coordinateSystem, 'obj')})
|
|---|
| 1277 | workspace = ArcGISWorkspace(os.path.dirname(outputErrorRaster), ArcGISRaster, pathCreationExpressions=[os.path.basename(outputErrorRaster)])
|
|---|
| 1278 | try:
|
|---|
| 1279 | workspace.ImportDatasets(bilFile.QueryDatasets(reportProgress=False), {False: 'Add', True: 'Replace'}[overwriteExisting], reportProgress=False, calculateStatistics=True, buildPyramids=buildPyramids)
|
|---|
| 1280 | finally:
|
|---|
| 1281 | del bilFile
|
|---|
| 1282 | del workspace
|
|---|
| 1283 |
|
|---|
| 1284 | # If the caller requested a binary response raster,
|
|---|
| 1285 | # create it.
|
|---|
| 1286 |
|
|---|
| 1287 | if outputBinaryResponseRaster is not None:
|
|---|
| 1288 | gp.Con_sa(outputResponseRaster, 1, outputBinaryResponseRaster, 0, 'Value >= ' + str(cutoff))
|
|---|
| 1289 | if buildPyramids:
|
|---|
| 1290 | gp.BuildPyramids_Management(outputBinaryResponseRaster)
|
|---|
| 1291 |
|
|---|
| 1292 | # Delete R variables assigned by this function.
|
|---|
| 1293 |
|
|---|
| 1294 | finally:
|
|---|
| 1295 | r('if (exists("rastersForPredictors")) rm("rastersForPredictors")')
|
|---|
| 1296 | r('if (exists("model")) rm("model")')
|
|---|
| 1297 | r('if (exists("rPackage")) rm("rPackage")')
|
|---|
| 1298 | r('if (exists("xVar")) rm("xVar")')
|
|---|
| 1299 | r('if (exists("yVar")) rm("yVar")')
|
|---|
| 1300 | r('if (exists("zVar")) rm("zVar")')
|
|---|
| 1301 | r('if (exists("mVar")) rm("mVar")')
|
|---|
| 1302 | r('if (exists("coordinateSystem")) rm("coordinateSystem")')
|
|---|
| 1303 | r('if (exists("f20985982305")) rm("f20985982305", pos=globalenv())')
|
|---|
| 1304 | r('if (exists("data20985982305")) rm("data20985982305", pos=globalenv())')
|
|---|
| 1305 | r('if (exists("fam20985982305")) rm("fam20985982305", pos=globalenv())')
|
|---|
| 1306 |
|
|---|
| 1307 | def _PreparePredictorRasters(r, gp, tempDir, variableNames, inputPredictorRasters, templateRaster, templateDescribe, coordinateSystem, cellSize, extent, resamplingTechniques):
|
|---|
| 1308 |
|
|---|
| 1309 | # Build a dictionary that maps predictor variable names in the
|
|---|
| 1310 | # model to rasters provided by the caller.
|
|---|
| 1311 |
|
|---|
| 1312 | rastersForPredictors = {}
|
|---|
| 1313 | createXRaster = False
|
|---|
| 1314 | createYRaster = False
|
|---|
| 1315 |
|
|---|
| 1316 | if r('exists("rPackage")') and r['rPackage'] == 'rpart':
|
|---|
| 1317 | allVars = r('all.vars(model$terms)')[1:]
|
|---|
| 1318 | if isinstance(allVars, basestring):
|
|---|
| 1319 | allVars = [allVars]
|
|---|
| 1320 |
|
|---|
| 1321 | elif r('exists("rPackage")') and r['rPackage'] == 'nlme':
|
|---|
| 1322 | fixed = r('all.vars(fixed)')[1:]
|
|---|
| 1323 | if isinstance(fixed, basestring):
|
|---|
| 1324 | fixed = [fixed]
|
|---|
| 1325 | random = r('all.vars(random)')
|
|---|
| 1326 | if isinstance(random, basestring):
|
|---|
| 1327 | random = [random]
|
|---|
| 1328 | allVars = list(set(fixed).union(set(random)))
|
|---|
| 1329 |
|
|---|
| 1330 | else:
|
|---|
| 1331 | allVars = r('all.vars(model$formula)')[1:]
|
|---|
| 1332 | if isinstance(allVars, basestring):
|
|---|
| 1333 | allVars = [allVars]
|
|---|
| 1334 |
|
|---|
| 1335 | for variable in allVars:
|
|---|
| 1336 | if variable not in variableNames:
|
|---|
| 1337 | if variable == r['xVar']:
|
|---|
| 1338 | createXRaster = True
|
|---|
| 1339 | elif variable == r['yVar']:
|
|---|
| 1340 | createYRaster = True
|
|---|
| 1341 | else:
|
|---|
| 1342 | Logger.RaiseException(ValueError(_('The predictor variable %(var)s appears in the model\'s formula but does not appear in the list of model variable names for predictor rasters provided to this tool. Add that predictor variable to the list and try again.') % {u'var': variable}))
|
|---|
| 1343 | else:
|
|---|
| 1344 | rastersForPredictors[variable] = inputPredictorRasters[variableNames.index(variable)]
|
|---|
| 1345 |
|
|---|
| 1346 | # Generate the X and Y coordinate rasters, if needed and possible.
|
|---|
| 1347 |
|
|---|
| 1348 | if createXRaster:
|
|---|
| 1349 | if r['coordinateSystem'] != coordinateSystem:
|
|---|
| 1350 | Logger.RaiseException(ValueError(_('The model formula includes the X coordinate as predictor variable %(var)s, but the list of predictor rasters does not include this variable. Because the model was fitted to points that used a different coordinate system than the one you want to use for the predictions, this tool cannot automatically generate a raster for the X coordinates. Please generate one yourself and add it to the list of predictor rasters') % {u'var': r['xVar']}))
|
|---|
| 1351 |
|
|---|
| 1352 | xRaster = os.path.join(tempDir.Path, u'xvar')
|
|---|
| 1353 | oldLogInfoAsDebug = Logger.LogInfoAndSetInfoToDebug(_(u'Creating a predictor raster for the variable "%(var)s" representing the X coordinate...') % {u'var': r['xVar']})
|
|---|
| 1354 | try:
|
|---|
| 1355 | ArcGISRaster.CreateXRaster(xRaster, extent, cellSize, u'Center', coordinateSystem)
|
|---|
| 1356 | finally:
|
|---|
| 1357 | Logger.SetLogInfoAsDebug(oldLogInfoAsDebug)
|
|---|
| 1358 | rastersForPredictors[r['xVar']] = xRaster
|
|---|
| 1359 |
|
|---|
| 1360 | if createYRaster:
|
|---|
| 1361 | if r['coordinateSystem'] != coordinateSystem:
|
|---|
| 1362 | Logger.RaiseException(ValueError(_('The model formula includes the Y coordinate as predictor variable %(var)s, but the list of predictor rasters does not include this variable. Because the model was fitted to points that used a different coordinate system than the one you want to use for the predictions, this tool cannot automatically generate a raster for the Y coordinates. Please generate one yourself and add it to the list of predictor rasters') % {u'var': r['yVar']}))
|
|---|
| 1363 |
|
|---|
| 1364 | yRaster = os.path.join(tempDir.Path, u'yvar')
|
|---|
| 1365 | oldLogInfoAsDebug = Logger.LogInfoAndSetInfoToDebug(_(u'Creating a predictor raster for the variable "%(var)s" representing the Y coordinate...') % {u'var': r['yVar']})
|
|---|
| 1366 | try:
|
|---|
| 1367 | ArcGISRaster.CreateYRaster(yRaster, extent, cellSize, u'Center', coordinateSystem)
|
|---|
| 1368 | finally:
|
|---|
| 1369 | Logger.SetLogInfoAsDebug(oldLogInfoAsDebug)
|
|---|
| 1370 | rastersForPredictors[r['yVar']] = yRaster
|
|---|
| 1371 |
|
|---|
| 1372 | # Check each predictor raster to see if it needs to be copied,
|
|---|
| 1373 | # projected or clipped before we can use it.
|
|---|
| 1374 |
|
|---|
| 1375 | Logger.Info(_('Checking coordinate systems, extents, and cell sizes of predictor rasters and reprojecting and clipping as needed to make them conform to the template raster...'))
|
|---|
| 1376 |
|
|---|
| 1377 | from GeoEco.Types import EnvelopeTypeMetadata
|
|---|
| 1378 | [templateLeft, templateBottom, templateRight, templateTop] = EnvelopeTypeMetadata.ParseFromArcGISString(extent)
|
|---|
| 1379 |
|
|---|
| 1380 | vars = copy.deepcopy(rastersForPredictors.keys()) # I'm going to modify the dictionary in the loop below; not sure if deepcopy is needed, but playing it safe.
|
|---|
| 1381 | rasters = copy.deepcopy(rastersForPredictors.values())
|
|---|
| 1382 |
|
|---|
| 1383 | for i in range(len(vars)):
|
|---|
| 1384 | needToCopy = False
|
|---|
| 1385 | needToProject = False
|
|---|
| 1386 | needToClip = False
|
|---|
| 1387 |
|
|---|
| 1388 | # If this is predictor raster is a raster layer, we need to
|
|---|
| 1389 | # make a copy of it in the file system so that it can be read
|
|---|
| 1390 | # by the R code using the rgdal package.
|
|---|
| 1391 |
|
|---|
| 1392 | d = gp.Describe(rasters[i])
|
|---|
| 1393 | if d.DataType.lower() == u'rasterlayer':
|
|---|
| 1394 | needToCopy = True
|
|---|
| 1395 |
|
|---|
| 1396 | # If this is not the template raster, check to see whether it
|
|---|
| 1397 | # needs to be projected or clipped.
|
|---|
| 1398 |
|
|---|
| 1399 | if rasters[i] != templateRaster:
|
|---|
| 1400 | cs = gp.CreateSpatialReference_management(d.SpatialReference).split(u';')[0]
|
|---|
| 1401 |
|
|---|
| 1402 | if cs.lower() != coordinateSystem.lower():
|
|---|
| 1403 | Logger.Info(_(u'Projecting and clipping %(raster)s. Its coordinate system does not match that of the template raster.') % {u'raster': rasters[i]})
|
|---|
| 1404 | needToProject = True
|
|---|
| 1405 |
|
|---|
| 1406 | elif abs(1.0 - cellSize / d.MeanCellWidth) > 0.00001:
|
|---|
| 1407 | Logger.Info(_(u'Projecting and clipping %(raster)s. Its cell size (%(cs1)s) does not match the cell size of the template raster (%(cs2)s).') % {u'raster': rasters[i], u'cs1': repr(d.MeanCellWidth), u'cs2': repr(cellSize)})
|
|---|
| 1408 | needToProject = True
|
|---|
| 1409 |
|
|---|
| 1410 | else:
|
|---|
| 1411 | [rasterLeft, rasterBottom, rasterRight, rasterTop] = EnvelopeTypeMetadata.ParseFromArcGISString(d.Extent)
|
|---|
| 1412 |
|
|---|
| 1413 | deltaLeft = (templateLeft - rasterLeft) / cellSize # Difference, in cells, between the left edge of the template and this raster
|
|---|
| 1414 | deltaBottom = (templateBottom - rasterBottom) / cellSize # Difference, in cells, between the bottom edge of the template and this raster
|
|---|
| 1415 | deltaRight = (templateRight - rasterRight) / cellSize # Difference, in cells, between the right edge of the template and this raster
|
|---|
| 1416 | deltaTop = (templateTop - rasterTop) / cellSize # Difference, in cells, between the top edge of the template and this raster
|
|---|
| 1417 |
|
|---|
| 1418 | if deltaLeft < -0.01:
|
|---|
| 1419 | Logger.RaiseException(ValueError(_(u'The template raster is too large; its left edge falls (%(e1)s) outside the left edge of the predictor raster %(raster)s (%(e2)s). Please reduce the extent of the template raster such that it does not exceed the extents of any of the predictor rasters.') % {u'raster': rasters[i], u'e1': repr(templateLeft), u'e2': repr(rasterLeft)}))
|
|---|
| 1420 | if deltaRight > 0.01:
|
|---|
| 1421 | Logger.RaiseException(ValueError(_(u'The template raster is too large; its right edge falls (%(e1)s) outside the right edge of the predictor raster %(raster)s (%(e2)s). Please reduce the extent of the template raster such that it does not exceed the extents of any of the predictor rasters.') % {u'raster': rasters[i], u'e1': repr(templateRight), u'e2': repr(rasterRight)}))
|
|---|
| 1422 | if deltaTop > 0.01:
|
|---|
| 1423 | Logger.RaiseException(ValueError(_(u'The template raster is too large; its top edge falls (%(e1)s) outside the top edge of the predictor raster %(raster)s (%(e2)s). Please reduce the extent of the template raster such that it does not exceed the extents of any of the predictor rasters.') % {u'raster': rasters[i], u'e1': repr(templateTop), u'e2': repr(rasterTop)}))
|
|---|
| 1424 | if deltaBottom < -0.01:
|
|---|
| 1425 | Logger.RaiseException(ValueError(_(u'The template raster is too large; its bottom edge falls (%(e1)s) outside the bottom edge of the predictor raster %(raster)s (%(e2)s). Please reduce the extent of the template raster such that it does not exceed the extents of any of the predictor rasters.') % {u'raster': rasters[i], u'e1': repr(templateBottom), u'e2': repr(rasterBottom)}))
|
|---|
| 1426 |
|
|---|
| 1427 | if abs(deltaLeft) < 0.01 and abs(deltaBottom) < 0.01 and abs(deltaRight) < 0.01 and abs(deltaTop) < 0.01 and d.Width == templateDescribe.Width and d.Height == templateDescribe.Height:
|
|---|
| 1428 | Logger.Debug(_(u'%(raster)s has the same coordinate system, cell size, and extent as the template raster.') % {u'raster': rasters[i]})
|
|---|
| 1429 |
|
|---|
| 1430 | elif abs(deltaLeft - round(deltaLeft)) < 0.01 and abs(deltaBottom - round(deltaBottom)) < 0.01 and abs(deltaRight - round(deltaRight)) < 0.01 and abs(deltaTop - round(deltaTop)) < 0.01:
|
|---|
| 1431 | Logger.Info(_(u'Clipping %(raster)s. Although it has the same coordinate system and cell size as the template raster and is aligned on the same grid coordinates, it has a different extent.') % {u'raster': rasters[i]})
|
|---|
| 1432 | needToClip = True
|
|---|
| 1433 |
|
|---|
| 1434 | else:
|
|---|
| 1435 | Logger.Info(_(u'Projecting and clipping %(raster)s. Although it has the same coordinate system and cell size as the template raster, it is aligned on different grid coordinates and has a different extent.') % {u'raster': rasters[i]})
|
|---|
| 1436 | needToProject = True
|
|---|
| 1437 |
|
|---|
| 1438 | # If this raster does not need to be copied, projected, or
|
|---|
| 1439 | # clipped, go on to the next one.
|
|---|
| 1440 |
|
|---|
| 1441 | if not (needToCopy or needToProject or needToClip):
|
|---|
| 1442 | continue
|
|---|
| 1443 |
|
|---|
| 1444 | # If we need to project, do it now. This will also take care
|
|---|
| 1445 | # of copying and clipping the raster, if needed.
|
|---|
| 1446 |
|
|---|
| 1447 | if needToProject:
|
|---|
| 1448 |
|
|---|
| 1449 | # Use the caller's resampling technique, if one was
|
|---|
| 1450 | # provided. Otherwise use BILINEAR if it is a
|
|---|
| 1451 | # floating-point raster, or NEAREST if it is not.
|
|---|
| 1452 |
|
|---|
| 1453 | if resamplingTechniques is not None and vars[i] in variableNames and variableNames.index(vars[i]) < len(resamplingTechniques):
|
|---|
| 1454 | resamplingTechnique = resamplingTechniques[variableNames.index(vars[i])]
|
|---|
| 1455 | elif d.PixelType[0] in ['F', 'f']:
|
|---|
| 1456 | resamplingTechnique = u'BILINEAR'
|
|---|
| 1457 | else:
|
|---|
| 1458 | resamplingTechnique = u'NEAREST'
|
|---|
| 1459 |
|
|---|
| 1460 | # Set the geoprocessor's Extent environment
|
|---|
| 1461 | # variable to the output extent. This will
|
|---|
| 1462 | # force ProjectRaster to clip to the desired
|
|---|
| 1463 | # extent.
|
|---|
| 1464 |
|
|---|
| 1465 | oldGPExtent = gp.Extent
|
|---|
| 1466 | if oldGPExtent is None:
|
|---|
| 1467 | oldGPExtent = u''
|
|---|
| 1468 | gp.Extent = extent
|
|---|
| 1469 |
|
|---|
| 1470 | try:
|
|---|
| 1471 | # If this is ArcGIS 9.3, set the SnapRaster
|
|---|
| 1472 | # environment variable to the template raster.
|
|---|
| 1473 |
|
|---|
| 1474 | if GeoprocessorManager.GetArcGISMajorVersion() > 9 or GeoprocessorManager.GetArcGISMajorVersion() == 9 and GeoprocessorManager.GetArcGISMinorVersion() >= 3:
|
|---|
| 1475 | oldSnapRaster = gp.SnapRaster
|
|---|
| 1476 | if oldSnapRaster is None:
|
|---|
| 1477 | oldSnapRaster = u''
|
|---|
| 1478 | gp.SnapRaster = templateRaster
|
|---|
| 1479 |
|
|---|
| 1480 | # Project the raster. If this is ArcGIS 9.3 or later,
|
|---|
| 1481 | # the SnapRaster and Extent environment variables will
|
|---|
| 1482 | # ensure the projected cells are snapped and clipped
|
|---|
| 1483 | # as requested by the caller. If it is 9.2, use the
|
|---|
| 1484 | # Registration_Point parameter instead of SnapRaster,
|
|---|
| 1485 | # which did not exist in 9.2 (there was something like
|
|---|
| 1486 | # it, but it did not work correctly).
|
|---|
| 1487 |
|
|---|
| 1488 | try:
|
|---|
| 1489 | tempRaster = os.path.join(tempDir.Path, u'projected%i' % i)
|
|---|
| 1490 | if GeoprocessorManager.GetArcGISMajorVersion() > 9 or GeoprocessorManager.GetArcGISMajorVersion() == 9 and GeoprocessorManager.GetArcGISMinorVersion() >= 3:
|
|---|
| 1491 | gp.ProjectRaster_management(rasters[i], tempRaster, coordinateSystem, resamplingTechnique, cellSize)
|
|---|
| 1492 | else:
|
|---|
| 1493 | gp.ProjectRaster_management(rasters[i], tempRaster, coordinateSystem, resamplingTechnique, cellSize, None, extent.rsplit(' ', 2)[0])
|
|---|
| 1494 |
|
|---|
| 1495 | # Even though we provided an extent and cell size
|
|---|
| 1496 | # of an existing raster (the template raster),
|
|---|
| 1497 | # using values we obtained through ArcGIS APIs,
|
|---|
| 1498 | # the ProjectRaster tool will not necessarily
|
|---|
| 1499 | # create a raster that has exactly that extent. It
|
|---|
| 1500 | # may be one cell larger or smaller in any of the
|
|---|
| 1501 | # four directions. Different versions of ArcGIS
|
|---|
| 1502 | # seem to work differently in this respect. It is
|
|---|
| 1503 | # probably due to whether it uses 32-bit or 64-bit
|
|---|
| 1504 | # floats, and other various floating-point
|
|---|
| 1505 | # rounding issues.
|
|---|
| 1506 | #
|
|---|
| 1507 | # The prediction code in R requires that all
|
|---|
| 1508 | # predictor rasters have the same extent (really,
|
|---|
| 1509 | # the same number of rows and columns). If ArcGIS
|
|---|
| 1510 | # produced a raster that was too small or large in
|
|---|
| 1511 | # any dimension, try ProjectRaster again with
|
|---|
| 1512 | # values that will hopefully yield what we want.
|
|---|
| 1513 |
|
|---|
| 1514 | describeTemp = gp.Describe(tempRaster)
|
|---|
| 1515 | newEdges = _CheckResultingEdges(tempRaster, describeTemp.Extent, templateLeft, templateBottom, templateRight, templateTop, cellSize)
|
|---|
| 1516 |
|
|---|
| 1517 | if newEdges is not None:
|
|---|
| 1518 | Logger.Debug(_(u'Reprojecting because the first attempt produced a raster with an extent (%(e1)s) that did not match the extent of the template raster (%(e2)s).') % {u'e1': describeTemp.Extent, u'e2': extent})
|
|---|
| 1519 | gp.Extent = u' '.join(map(unicode, newEdges))
|
|---|
| 1520 | tempRaster = os.path.join(tempDir.Path, u'projected%ia' % i)
|
|---|
| 1521 | if GeoprocessorManager.GetArcGISMajorVersion() > 9 or GeoprocessorManager.GetArcGISMajorVersion() == 9 and GeoprocessorManager.GetArcGISMinorVersion() >= 3:
|
|---|
| 1522 | gp.ProjectRaster_management(rasters[i], tempRaster, coordinateSystem, resamplingTechnique, cellSize)
|
|---|
| 1523 | else:
|
|---|
| 1524 | gp.ProjectRaster_management(rasters[i], tempRaster, coordinateSystem, resamplingTechnique, cellSize, None, extent.rsplit(' ', 2)[0])
|
|---|
| 1525 |
|
|---|
| 1526 | # Check the extent of the new attempt. If we
|
|---|
| 1527 | # still did not get it, report an error.
|
|---|
| 1528 |
|
|---|
| 1529 | describeTemp = gp.Describe(tempRaster)
|
|---|
| 1530 | [projectedLeft, projectedBottom, projectedRight, projectedTop] = EnvelopeTypeMetadata.ParseFromArcGISString(describeTemp.Extent)
|
|---|
| 1531 |
|
|---|
| 1532 | if projectedLeft != templateLeft or projectedBottom != templateBottom or projectedRight != templateRight or projectedTop != templateTop:
|
|---|
| 1533 | Logger.RaiseException(RuntimeError(_(u'This tool was unable to project %(raster)s to the coordinate system of the template raster and obtain a projected raster with an extent that matched that of the template raster. The extent of the projected raster was %(e1)s, while the extent of the template raster is %(e2)s. (The extent values are ordered LEFT, BOTTOM, RIGHT, TOP.) If the extent of the template is larger than the projected raster, it may mean that the template is too large (i.e. that the predictor raster does not cover the entire extent of the template). In that case, try reducing the extent of the template. If that is not the problem, try projecting the predictor raster yourself prior to providing it to this tool, such that its coordinate system, cell size, and extent match that of the template.') % {u'raster': rasters[i], u'e1': describeTemp.Extent, u'e2': extent}))
|
|---|
| 1534 |
|
|---|
| 1535 | # If this is ArcGIS 9.3, reset the SnapRaster ArcGIS
|
|---|
| 1536 | # environment variable to what it was before.
|
|---|
| 1537 |
|
|---|
| 1538 | finally:
|
|---|
| 1539 | try:
|
|---|
| 1540 | if GeoprocessorManager.GetArcGISMajorVersion() > 9 or GeoprocessorManager.GetArcGISMajorVersion() == 9 and GeoprocessorManager.GetArcGISMinorVersion() >= 3:
|
|---|
| 1541 | gp.SnapRaster = oldSnapRaster
|
|---|
| 1542 | except:
|
|---|
| 1543 | pass
|
|---|
| 1544 |
|
|---|
| 1545 | # Reset the Extent ArcGIS environment variable to what it
|
|---|
| 1546 | # was before.
|
|---|
| 1547 |
|
|---|
| 1548 | finally:
|
|---|
| 1549 | try:
|
|---|
| 1550 | gp.Extent = oldGPExtent
|
|---|
| 1551 | except:
|
|---|
| 1552 | pass
|
|---|
| 1553 |
|
|---|
| 1554 | # If we need to clip (but not project), do it now. This will
|
|---|
| 1555 | # also take care of copying the raster, if needed.
|
|---|
| 1556 |
|
|---|
| 1557 | elif needToClip:
|
|---|
| 1558 |
|
|---|
| 1559 | # Clip the raster. If this is ArcGIS 9.2, pass rasters[i]
|
|---|
| 1560 | # for the in_template_dataset parameter, to work around
|
|---|
| 1561 | # the 9.2 bug described in
|
|---|
| 1562 | # http://forums.esri.com/Thread.asp?c=93&f=1729&t=230897&mc=11.
|
|---|
| 1563 |
|
|---|
| 1564 | tempRaster = os.path.join(tempDir.Path, u'clipped%i' % i)
|
|---|
| 1565 | if GeoprocessorManager.GetArcGISMajorVersion() == 9 and GeoprocessorManager.GetArcGISMinorVersion() == 2:
|
|---|
| 1566 | gp.Clip_management(rasters[i], extent, tempRaster, rasters[i])
|
|---|
| 1567 | else:
|
|---|
| 1568 | gp.Clip_management(rasters[i], extent, tempRaster)
|
|---|
| 1569 |
|
|---|
| 1570 | # Even though we provided an extent of an existing raster
|
|---|
| 1571 | # (the template raster) that has the same cell size and is
|
|---|
| 1572 | # aligned on the same coordinates, using values we
|
|---|
| 1573 | # obtained through ArcGIS APIs, the Clip tool will not
|
|---|
| 1574 | # necessarily create a raster that has exactly that
|
|---|
| 1575 | # extent. It may be one cell larger or smaller in any of
|
|---|
| 1576 | # the four directions. Different versions of ArcGIS seem
|
|---|
| 1577 | # to work differently in this respect. It is probably due
|
|---|
| 1578 | # to whether it uses 32-bit or 64-bit floats, and other
|
|---|
| 1579 | # various floating-point rounding issues.
|
|---|
| 1580 | #
|
|---|
| 1581 | # The prediction code in R requires that all predictor
|
|---|
| 1582 | # rasters have the same extent (really, the same number of
|
|---|
| 1583 | # rows and columns). If ArcGIS produced a raster that was
|
|---|
| 1584 | # too small or large in any dimension, try Clip again with
|
|---|
| 1585 | # values that will hopefully yield what we want.
|
|---|
| 1586 |
|
|---|
| 1587 | describeTemp = gp.Describe(tempRaster)
|
|---|
| 1588 | newEdges = _CheckResultingEdges(tempRaster, describeTemp.Extent, templateLeft, templateBottom, templateRight, templateTop, cellSize)
|
|---|
| 1589 |
|
|---|
| 1590 | if newEdges is not None:
|
|---|
| 1591 | Logger.Debug(_(u'Reclipping because the first attempt produced a raster with an extent (%(e1)s) that did not match the extent of the template raster (%(e2)s).') % {u'e1': describeTemp.Extent, u'e2': extent})
|
|---|
| 1592 | tempRaster = os.path.join(tempDir.Path, u'clipped%ia' % i)
|
|---|
| 1593 | if GeoprocessorManager.GetArcGISMajorVersion() == 9 and GeoprocessorManager.GetArcGISMinorVersion() == 2:
|
|---|
| 1594 | gp.Clip_management(rasters[i], u' '.join(map(unicode, newEdges)), tempRaster, rasters[i])
|
|---|
| 1595 | else:
|
|---|
| 1596 | gp.Clip_management(rasters[i], u' '.join(map(unicode, newEdges)), tempRaster)
|
|---|
| 1597 |
|
|---|
| 1598 | # If we got here but did not need to project or clip, we
|
|---|
| 1599 | # needed to copy. Create a copy of the raster in ArcInfo
|
|---|
| 1600 | # binary grid format.
|
|---|
| 1601 |
|
|---|
| 1602 | else:
|
|---|
| 1603 | tempRaster = os.path.join(tempDir.Path, u'copied%i' % i)
|
|---|
| 1604 | ArcGISRaster.CopySilent(rasters[i], tempRaster)
|
|---|
| 1605 |
|
|---|
| 1606 | # If we had to project or clip, make absolutely sure that the
|
|---|
| 1607 | # projected or clipped raster has the expected number of rows
|
|---|
| 1608 | # and columns.
|
|---|
| 1609 |
|
|---|
| 1610 | if needToProject or needToClip:
|
|---|
| 1611 | d = gp.Describe(tempRaster)
|
|---|
| 1612 | if d.Height != templateDescribe.Height or d.Width != templateDescribe.Width:
|
|---|
| 1613 | Logger.RaiseException(RuntimeError(_(u'Internal error in this tool. Please contact the MGET development team for assistance. Error details: after %(raster)s was projected and/or clipped, the resulting raster had %(cols1)i columns and %(rows1)i rows, while the template raster has %(cols2)i columns and %(rows2)i rows. This tool expected that the resulting raster would have the same number of rows and columns as the template raster. You can work around this problem by projecting and/or clipping the raster yourself, so it has the same coordinate system, cell size, and extent as the template raster.') % {u'raster': rasters[i], u'rows1': d.Height, u'cols1': d.Width, u'rows2': templateDescribe.Height, u'cols2': templateDescribe.Width}))
|
|---|
| 1614 |
|
|---|
| 1615 | # Update our dictionary to use the projected/clipped/copied
|
|---|
| 1616 | # raster.
|
|---|
| 1617 |
|
|---|
| 1618 | rastersForPredictors[vars[i]] = tempRaster
|
|---|
| 1619 |
|
|---|
| 1620 | # Pass the dictionary that maps predictor variables to rasters to
|
|---|
| 1621 | # R, so the R code can read it.
|
|---|
| 1622 |
|
|---|
| 1623 | r['rastersForPredictors'] = rastersForPredictors
|
|---|
| 1624 |
|
|---|
| 1625 | def _CheckResultingEdges(resultingRaster, resultingExtent, templateLeft, templateBottom, templateRight, templateTop, cellSize):
|
|---|
| 1626 | from GeoEco.Types import EnvelopeTypeMetadata
|
|---|
| 1627 |
|
|---|
| 1628 | [resultingLeft, resultingBottom, resultingRight, resultingTop] = EnvelopeTypeMetadata.ParseFromArcGISString(resultingExtent)
|
|---|
| 1629 | needToTryAgain = False
|
|---|
| 1630 | newEdges = [None, None, None, None]
|
|---|
| 1631 |
|
|---|
| 1632 | if resultingLeft < templateLeft - cellSize * 0.01:
|
|---|
| 1633 | needToTryAgain = True
|
|---|
| 1634 | newEdges[0] = templateLeft + cellSize * 0.5
|
|---|
| 1635 | elif resultingLeft > templateLeft + cellSize * 0.01:
|
|---|
| 1636 | needToTryAgain = True
|
|---|
| 1637 | newEdges[0] = templateLeft - cellSize * 0.5
|
|---|
| 1638 | else:
|
|---|
| 1639 | newEdges[0] = templateLeft
|
|---|
| 1640 |
|
|---|
| 1641 | if resultingBottom < templateBottom - cellSize * 0.01:
|
|---|
| 1642 | needToTryAgain = True
|
|---|
| 1643 | newEdges[1] = templateBottom + cellSize * 0.5
|
|---|
| 1644 | elif resultingBottom > templateBottom + cellSize * 0.01:
|
|---|
| 1645 | needToTryAgain = True
|
|---|
| 1646 | newEdges[1] = templateBottom - cellSize * 0.5
|
|---|
| 1647 | else:
|
|---|
| 1648 | newEdges[1] = templateBottom
|
|---|
| 1649 |
|
|---|
| 1650 | if resultingRight < templateRight - cellSize * 0.01:
|
|---|
| 1651 | needToTryAgain = True
|
|---|
| 1652 | newEdges[2] = templateRight + cellSize * 0.5
|
|---|
| 1653 | elif resultingRight > templateRight + cellSize * 0.01:
|
|---|
| 1654 | needToTryAgain = True
|
|---|
| 1655 | newEdges[2] = templateRight - cellSize * 0.5
|
|---|
| 1656 | else:
|
|---|
| 1657 | newEdges[2] = templateRight
|
|---|
| 1658 |
|
|---|
| 1659 | if resultingTop < templateTop - cellSize * 0.01:
|
|---|
| 1660 | needToTryAgain = True
|
|---|
| 1661 | newEdges[3] = templateTop + cellSize * 0.5
|
|---|
| 1662 | elif resultingTop > templateTop + cellSize * 0.01:
|
|---|
| 1663 | needToTryAgain = True
|
|---|
| 1664 | newEdges[3] = templateTop - cellSize * 0.5
|
|---|
| 1665 | else:
|
|---|
| 1666 | newEdges[3] = templateTop
|
|---|
| 1667 |
|
|---|
| 1668 | if needToTryAgain:
|
|---|
| 1669 | return newEdges
|
|---|
| 1670 | return None
|
|---|
| 1671 |
|
|---|
| 1672 |
|
|---|
| 1673 | ###############################################################################
|
|---|
| 1674 | # Metadata: module
|
|---|
| 1675 | ###############################################################################
|
|---|
| 1676 |
|
|---|
| 1677 | from GeoEco.ArcGIS import ArcGISDependency, ArcGISExtensionDependency
|
|---|
| 1678 | from GeoEco.DatabaseAccess.ArcGIS import ArcGIS91SelectCursor
|
|---|
| 1679 | from GeoEco.Dependencies import PythonModuleDependency
|
|---|
| 1680 | from GeoEco.R import RDependency, RPackageDependency
|
|---|
| 1681 | from GeoEco.Metadata import *
|
|---|
| 1682 | from GeoEco.Types import *
|
|---|
| 1683 |
|
|---|
| 1684 | AddModuleMetadata(shortDescription=_(u'Provides methods for modeling and prediction.'))
|
|---|
| 1685 |
|
|---|
| 1686 | ###############################################################################
|
|---|
| 1687 | # Metadata: GLM class
|
|---|
| 1688 | ###############################################################################
|
|---|
| 1689 |
|
|---|
| 1690 | AddClassMetadata(GLM,
|
|---|
| 1691 | shortDescription=_(u'Provides methods for modeling and prediction using Generalized Linear Models (GLMs).'),
|
|---|
| 1692 | isExposedAsCOMServer=True,
|
|---|
| 1693 | comIID=u'{4A349150-2A25-4B35-851C-A9BFFADF53F3}',
|
|---|
| 1694 | comCLSID=u'{16411D8A-C8CA-40C6-A9D3-F34EA8CD8D6A}')
|
|---|
| 1695 |
|
|---|
| 1696 | # Public method: GLM.FitToArcGISTable
|
|---|
| 1697 |
|
|---|
| 1698 | AddMethodMetadata(GLM.FitToArcGISTable,
|
|---|
| 1699 | shortDescription=_(u'Fits a generalized linear model (GLM) to data in an ArcGIS table using the R glm function.'),
|
|---|
| 1700 | isExposedToPythonCallers=True,
|
|---|
| 1701 | isExposedByCOM=True,
|
|---|
| 1702 | isExposedAsArcGISTool=True,
|
|---|
| 1703 | arcGISDisplayName=_(u'Fit GLM'),
|
|---|
| 1704 | arcGISToolCategory=_(u'Statistics\\Model Data\\Generalized Linear Models'),
|
|---|
| 1705 | dependencies=[ArcGISDependency(9, 1), RDependency(2, 6, 0)])
|
|---|
| 1706 |
|
|---|
| 1707 | AddArgumentMetadata(GLM.FitToArcGISTable, u'cls',
|
|---|
| 1708 | typeMetadata=ClassOrClassInstanceTypeMetadata(cls=GLM),
|
|---|
| 1709 | description=_(u'%s class or an instance of it.') % GLM.__name__)
|
|---|
| 1710 |
|
|---|
| 1711 | AddArgumentMetadata(GLM.FitToArcGISTable, u'inputTable',
|
|---|
| 1712 | typeMetadata=ArcGISTableViewTypeMetadata(mustExist=True),
|
|---|
| 1713 | description=_(
|
|---|
| 1714 | u"""ArcGIS table, table view, feature class, or feature layer
|
|---|
| 1715 | containing the data for which the model should be fitted."""),
|
|---|
| 1716 | arcGISDisplayName=_(u'Input table'))
|
|---|
| 1717 |
|
|---|
| 1718 | AddArgumentMetadata(GLM.FitToArcGISTable, u'outputModelFile',
|
|---|
| 1719 | typeMetadata=FileTypeMetadata(mustBeDifferentThanArguments=[u'inputTable'], deleteIfParameterIsTrue=u'overwriteExisting', createParentDirectories=True),
|
|---|
| 1720 | description=_(
|
|---|
| 1721 | u"""Output file to receive the fitted model. The file will not be in a
|
|---|
| 1722 | user-readable format. After the model is fitted, you can provide the
|
|---|
| 1723 | file to other tools that perform further analysis or visualization of
|
|---|
| 1724 | the fitted model.
|
|---|
| 1725 |
|
|---|
| 1726 | It is suggested, but not required, that you give the file an .Rdata
|
|---|
| 1727 | extension."""),
|
|---|
| 1728 | direction=u'Output',
|
|---|
| 1729 | arcGISDisplayName=_(u'Output model file'))
|
|---|
| 1730 |
|
|---|
| 1731 | _FormulaDescription = _(
|
|---|
| 1732 | u"""The formula must be in the format expected by the R glm function::
|
|---|
| 1733 |
|
|---|
| 1734 | response ~ term1 + term2 + ... + termN
|
|---|
| 1735 |
|
|---|
| 1736 | response is the table field that will be modeled as the response
|
|---|
| 1737 | variable and the terms are the table fields that will serve as the
|
|---|
| 1738 | predictor variables. The field names are case sensitive. If any field
|
|---|
| 1739 | used in the formula is NULL for a given row, that row will not be used
|
|---|
| 1740 | in fitting the model.
|
|---|
| 1741 |
|
|---|
| 1742 | For example, if you have a field Presence that indicates the presence
|
|---|
| 1743 | or absence of a species (1 or 0) and you want to model it in terms of
|
|---|
| 1744 | sampled environmental covariates stored in the SST, ChlDensity, and
|
|---|
| 1745 | Depth fields, you would use the formula::
|
|---|
| 1746 |
|
|---|
| 1747 | Presence ~ SST + ChlDensity + Depth
|
|---|
| 1748 |
|
|---|
| 1749 | By default, all terms are treated as continuous variables. To indicate
|
|---|
| 1750 | that a term should be treated as a categorical variable, use the
|
|---|
| 1751 | factor function. For example, if SubstrateType is an integer code that
|
|---|
| 1752 | should be treated as categorical::
|
|---|
| 1753 |
|
|---|
| 1754 | Presence ~ SST + ChlDensity + Depth + factor(SubstrateType)
|
|---|
| 1755 |
|
|---|
| 1756 | The model terms may also use these operators:
|
|---|
| 1757 |
|
|---|
| 1758 | * The : operator denotes the interaction of variables a and b. For
|
|---|
| 1759 | example: a:b.
|
|---|
| 1760 |
|
|---|
| 1761 | * The * operator denotes "crossing". For example, a*b is identical to
|
|---|
| 1762 | a+b+a:b.
|
|---|
| 1763 |
|
|---|
| 1764 | * The ^ operator denotes crossing to the Nth degree. For example,
|
|---|
| 1765 | (a+b+c)^2 is identical to (a+b+c)*(a+b+c) which in turn expands to a
|
|---|
| 1766 | formula containing the main effects for a, b and c together with
|
|---|
| 1767 | their second-order interactions.
|
|---|
| 1768 |
|
|---|
| 1769 | * The %in% operator indicates that the terms on its left are nested
|
|---|
| 1770 | within those on the right. For example a + b %in% a expands to the
|
|---|
| 1771 | formula a + a:b.
|
|---|
| 1772 |
|
|---|
| 1773 | * The - operator (minus) removes the specified terms, so that
|
|---|
| 1774 | (a+b+c)^2 - a:b is identical to a + b + c + b:c + a:c. It can also
|
|---|
| 1775 | used to remove the intercept term: y ~ x - 1 is a line through the
|
|---|
| 1776 | origin. A model with no intercept can be also specified as y ~ x + 0
|
|---|
| 1777 | or y ~ 0 + x.
|
|---|
| 1778 |
|
|---|
| 1779 | While formulae usually involve just variable names, they can also
|
|---|
| 1780 | involve arithmetic expressions. The formula log(y) ~ a + log(x) is
|
|---|
| 1781 | quite legal. When such arithmetic expressions involve operators which
|
|---|
| 1782 | are also used symbolically in model formulae, there can be confusion
|
|---|
| 1783 | between arithmetic and symbolic operator use.
|
|---|
| 1784 |
|
|---|
| 1785 | To avoid this confusion, the function I() can be used to bracket those
|
|---|
| 1786 | portions of a model formula where the operators are used in their
|
|---|
| 1787 | arithmetic sense. For example, in the formula y ~ a + I(b+c), the term
|
|---|
| 1788 | b+c is to be interpreted as the sum of b and c.
|
|---|
| 1789 |
|
|---|
| 1790 | Please see the topics "glm" and "formula" in the R documentation for
|
|---|
| 1791 | more information.""")
|
|---|
| 1792 |
|
|---|
| 1793 | AddArgumentMetadata(GLM.FitToArcGISTable, u'formula',
|
|---|
| 1794 | typeMetadata=UnicodeStringTypeMetadata(),
|
|---|
| 1795 | description=_(
|
|---|
| 1796 | u"""Formula that specifies the table field that is the response
|
|---|
| 1797 | variable and the table fields that are the terms of the model.
|
|---|
| 1798 |
|
|---|
| 1799 | """ + _FormulaDescription),
|
|---|
| 1800 | arcGISDisplayName=_(u'Formula'))
|
|---|
| 1801 |
|
|---|
| 1802 | AddArgumentMetadata(GLM.FitToArcGISTable, u'family',
|
|---|
| 1803 | typeMetadata=UnicodeStringTypeMetadata(allowedValues=[u'binomial', u'gaussian', u'Gamma', u'inverse.gaussian', u'poisson', u'quasi', u'quasibinomial', u'quasipoisson']),
|
|---|
| 1804 | description=_(
|
|---|
| 1805 | u"""Name of the family for the model. The family, together with the
|
|---|
| 1806 | link function and variance parameters, describes the error
|
|---|
| 1807 | distribution that will be used by the model. The family name is case
|
|---|
| 1808 | sensitive."""),
|
|---|
| 1809 | arcGISDisplayName=_(u'Family'))
|
|---|
| 1810 |
|
|---|
| 1811 | AddArgumentMetadata(GLM.FitToArcGISTable, u'where',
|
|---|
| 1812 | typeMetadata=SQLWhereClauseTypeMetadata(canBeNone=True),
|
|---|
| 1813 | description=ArcGIS91SelectCursor.__init__.__doc__.Obj.GetArgumentByName(u'where').Description,
|
|---|
| 1814 | arcGISParameterDependencies=[u'inputTable'],
|
|---|
| 1815 | arcGISDisplayName=_(u'Where clause'),
|
|---|
| 1816 | arcGISCategory=_(u'Model options'))
|
|---|
| 1817 |
|
|---|
| 1818 | AddArgumentMetadata(GLM.FitToArcGISTable, u'link',
|
|---|
| 1819 | typeMetadata=UnicodeStringTypeMetadata(allowedValues=[u'1/mu^2', u'cauchit', u'cloglog', u'identity', u'inverse', u'log', u'logit', u'probit', u'sqrt'], canBeNone=True),
|
|---|
| 1820 | description=_(
|
|---|
| 1821 | u"""Name of the link function for the model. The allowed link
|
|---|
| 1822 | functions depend on the model family:
|
|---|
| 1823 |
|
|---|
| 1824 | * binomial family - cauchit, cloglog, log, logit, and probit
|
|---|
| 1825 |
|
|---|
| 1826 | * gaussian family - identity, inverse, and log
|
|---|
| 1827 |
|
|---|
| 1828 | * Gamma family - identity, inverse, and log
|
|---|
| 1829 |
|
|---|
| 1830 | * inverse.gaussian family - 1/mu^2, inverse, identity, and log
|
|---|
| 1831 |
|
|---|
| 1832 | * poisson family - identity, log, and sqrt
|
|---|
| 1833 |
|
|---|
| 1834 | * quasi family - 1/mu^2, cloglog, identity, inverse, log, logit, probit, and sqrt
|
|---|
| 1835 |
|
|---|
| 1836 | * quasibinomial family - cauchit, cloglog, log, logit, and probit
|
|---|
| 1837 |
|
|---|
| 1838 | * quasipoisson family - identity, log, and sqrt
|
|---|
| 1839 |
|
|---|
| 1840 | If a link function is not specified, the one that is used by default
|
|---|
| 1841 | also depends on the model family:
|
|---|
| 1842 |
|
|---|
| 1843 | * binomial family - logit
|
|---|
| 1844 |
|
|---|
| 1845 | * gaussian family - identity
|
|---|
| 1846 |
|
|---|
| 1847 | * Gamma family - inverse
|
|---|
| 1848 |
|
|---|
| 1849 | * inverse.gaussian family - 1/mu^2
|
|---|
| 1850 |
|
|---|
| 1851 | * poisson family - log
|
|---|
| 1852 |
|
|---|
| 1853 | * quasi family - identity
|
|---|
| 1854 |
|
|---|
| 1855 | * quasibinomial family - logit
|
|---|
| 1856 |
|
|---|
| 1857 | * quasipoisson family - log
|
|---|
| 1858 | """),
|
|---|
| 1859 | arcGISDisplayName=_(u'Link function'),
|
|---|
| 1860 | arcGISCategory=_(u'Model options'))
|
|---|
| 1861 |
|
|---|
| 1862 | AddArgumentMetadata(GLM.FitToArcGISTable, u'variance',
|
|---|
| 1863 | typeMetadata=UnicodeStringTypeMetadata(allowedValues=[u'constant', u'mu(1-mu)', u'mu', u'mu^2', u'mu^3'], canBeNone=True),
|
|---|
| 1864 | description=_(
|
|---|
| 1865 | u"""Variance function to use when the "quasi" family is used. For all
|
|---|
| 1866 | families other than quasi, this parameter is ignored because the
|
|---|
| 1867 | variance function is determined by the family. If the quasi family is
|
|---|
| 1868 | used but a variance function is not specified, the "constant" variance
|
|---|
| 1869 | function will be used by default."""),
|
|---|
| 1870 | arcGISDisplayName=_(u'Variance function for quasi family'),
|
|---|
| 1871 | arcGISCategory=_(u'Model options'))
|
|---|
| 1872 |
|
|---|
| 1873 | AddArgumentMetadata(GLM.FitToArcGISTable, u'xColumnName',
|
|---|
| 1874 | typeMetadata=ArcGISFieldTypeMetadata(canBeNone=True),
|
|---|
| 1875 | description=_(
|
|---|
| 1876 | u"""Name to use in the formula for the X coordinates of point
|
|---|
| 1877 | features. If the input table is a point feature class or layer, the X
|
|---|
| 1878 | coordinates will be extracted from the points and be accessible in the
|
|---|
| 1879 | formula using the name provided for this parameter."""),
|
|---|
| 1880 | arcGISDisplayName=_(u'Name to use for X coordinates of points'),
|
|---|
| 1881 | arcGISCategory=_(u'Point feature options'))
|
|---|
| 1882 |
|
|---|
| 1883 | AddArgumentMetadata(GLM.FitToArcGISTable, u'yColumnName',
|
|---|
| 1884 | typeMetadata=ArcGISFieldTypeMetadata(canBeNone=True, mustBeDifferentThanArguments=[u'xColumnName']),
|
|---|
| 1885 | description=_(
|
|---|
| 1886 | u"""Name to use in the formula for the Y coordinates of point
|
|---|
| 1887 | features. If the input table is a point feature class or layer, the Y
|
|---|
| 1888 | coordinates will be extracted from the points and be accessible in the
|
|---|
| 1889 | formula using the name provided for this parameter."""),
|
|---|
| 1890 | arcGISDisplayName=_(u'Name to use for Y coordinates of points'),
|
|---|
| 1891 | arcGISCategory=_(u'Point feature options'))
|
|---|
| 1892 |
|
|---|
| 1893 | AddArgumentMetadata(GLM.FitToArcGISTable, u'zColumnName',
|
|---|
| 1894 | typeMetadata=ArcGISFieldTypeMetadata(canBeNone=True, mustBeDifferentThanArguments=[u'xColumnName', u'yColumnName']),
|
|---|
| 1895 | description=_(
|
|---|
| 1896 | u"""Name to use in the formula for the Z coordinates of point
|
|---|
| 1897 | features. If the input table is a point feature class or layer that
|
|---|
| 1898 | has Z coordinates, the Z coordinates will be extracted from the points
|
|---|
| 1899 | and be accessible in the formula using the name provided for this
|
|---|
| 1900 | parameter."""),
|
|---|
| 1901 | arcGISDisplayName=_(u'Name to use for Z coordinates of points'),
|
|---|
| 1902 | arcGISCategory=_(u'Point feature options'))
|
|---|
| 1903 |
|
|---|
| 1904 | AddArgumentMetadata(GLM.FitToArcGISTable, u'mColumnName',
|
|---|
| 1905 | typeMetadata=ArcGISFieldTypeMetadata(canBeNone=True, mustBeDifferentThanArguments=[u'xColumnName', u'yColumnName', u'zColumnName']),
|
|---|
| 1906 | description=_(
|
|---|
| 1907 | u"""Name to use in the formula for the measure values of point
|
|---|
| 1908 | features. If the input table is a point feature class or layer that
|
|---|
| 1909 | has measure values, the measure values will be extracted from the
|
|---|
| 1910 | points and be accessible in the formula using the name provided for this
|
|---|
| 1911 | parameter."""),
|
|---|
| 1912 | arcGISDisplayName=_(u'Name to use for M values of points'),
|
|---|
| 1913 | arcGISCategory=_(u'Point feature options'))
|
|---|
| 1914 |
|
|---|
| 1915 | AddArgumentMetadata(GLM.FitToArcGISTable, u'selectionMethod',
|
|---|
| 1916 | typeMetadata=UnicodeStringTypeMetadata(canBeNone=True, allowedValues=[u'Stepwise backward'], makeLowercase=True),
|
|---|
| 1917 | description=_(
|
|---|
| 1918 | u"""Automated model selection method to execute after fitting the
|
|---|
| 1919 | model using the original formula you specified. If automated model
|
|---|
| 1920 | selection is performed, the output model file will contain the final
|
|---|
| 1921 | model that was selected, and the output summary and plots will be for
|
|---|
| 1922 | this model.
|
|---|
| 1923 |
|
|---|
| 1924 | The selection methods currently available are:
|
|---|
| 1925 |
|
|---|
| 1926 | * Stepwise backward - backward stepwise model selection by AIC.
|
|---|
| 1927 | The basic idea of this method is to keep dropping model terms from
|
|---|
| 1928 | the original formula so long as a better model fit can be achieved.
|
|---|
| 1929 | Model selection occurs in a loop. First, the Akaike Information
|
|---|
| 1930 | Criterion (AIC) is computed for the current model. Then, N candidate
|
|---|
| 1931 | model are generated, where N equals the number of terms in the
|
|---|
| 1932 | current model. Each candidate model drops a different single term
|
|---|
| 1933 | from the model. The AIC is computed for each candidate model, and
|
|---|
| 1934 | the candidate model that provides the greatest reduction in AIC
|
|---|
| 1935 | becomes the new current model. The loop proceeds until the AIC can
|
|---|
| 1936 | no longer be reduced by dropping terms.
|
|---|
| 1937 |
|
|---|
| 1938 | Additional methods may be implemented in a future release of this
|
|---|
| 1939 | tool.
|
|---|
| 1940 |
|
|---|
| 1941 | Automated model selection will increase the run time and possibly the
|
|---|
| 1942 | memory utilization of this tool. The amount of increase depends on the
|
|---|
| 1943 | complexity of your model and the amount of data in your table.
|
|---|
| 1944 |
|
|---|
| 1945 | Model selection can be a difficult task. You should never blindly rely
|
|---|
| 1946 | on an automated selection method that you do not fully understand. For
|
|---|
| 1947 | the best results, always consult a statistician."""),
|
|---|
| 1948 | arcGISDisplayName=_(u'Automated model selection method'),
|
|---|
| 1949 | arcGISCategory=_(u'Automated model selection options'))
|
|---|
| 1950 |
|
|---|
| 1951 | AddArgumentMetadata(GLM.FitToArcGISTable, u'logSelectionDetails',
|
|---|
| 1952 | typeMetadata=BooleanTypeMetadata(),
|
|---|
| 1953 | description=_(
|
|---|
| 1954 | u"""If True, detailed information will be logged as automated model
|
|---|
| 1955 | selection proceeds. You can use this to monitor the progress of
|
|---|
| 1956 | automated model selection and diagnose how the selection method
|
|---|
| 1957 | arrived at the final model.
|
|---|
| 1958 |
|
|---|
| 1959 | If False, no detailed information will be logged during automated
|
|---|
| 1960 | model selection."""),
|
|---|
| 1961 | arcGISDisplayName=_(u'Log model selection details'),
|
|---|
| 1962 | arcGISCategory=_(u'Automated model selection options'))
|
|---|
| 1963 |
|
|---|
| 1964 | AddArgumentMetadata(GLM.FitToArcGISTable, u'writeSummaryFile',
|
|---|
| 1965 | typeMetadata=BooleanTypeMetadata(),
|
|---|
| 1966 | description=_(
|
|---|
| 1967 | u"""If True, this tool will write summary information about the fitted
|
|---|
| 1968 | model to a text file. (This is the same information that the tool
|
|---|
| 1969 | outputs as log messages.) If automated model selection is performed,
|
|---|
| 1970 | the output text file will contain summary information for both the
|
|---|
| 1971 | initial model and the final model.
|
|---|
| 1972 |
|
|---|
| 1973 | The file will have the name X_summary.txt, where X is the name of the
|
|---|
| 1974 | output model file, minus any extension."""),
|
|---|
| 1975 | arcGISDisplayName=_(u'Write model summary file'),
|
|---|
| 1976 | arcGISCategory=_(u'Additional output options'))
|
|---|
| 1977 |
|
|---|
| 1978 | AddArgumentMetadata(GLM.FitToArcGISTable, u'writeDiagnosticPlots',
|
|---|
| 1979 | typeMetadata=BooleanTypeMetadata(),
|
|---|
| 1980 | description=_(
|
|---|
| 1981 | u"""If True, this tool will write six diagnostic plots for the fitted
|
|---|
| 1982 | model. If automated model selection is performed, plots will be
|
|---|
| 1983 | generated only for the final model.
|
|---|
| 1984 |
|
|---|
| 1985 | Each plot will be written to a PNG file:
|
|---|
| 1986 |
|
|---|
| 1987 | * X_resid_fit.png - residuals vs. fitted values
|
|---|
| 1988 |
|
|---|
| 1989 | * X_qq.png - normal Q-Q plot
|
|---|
| 1990 |
|
|---|
| 1991 | * X_scale_loc.png - scale-location plot of sqrt(abs(residuals)) vs.
|
|---|
| 1992 | fitted values
|
|---|
| 1993 |
|
|---|
| 1994 | * X_cooks.png - Cook's distances vs. row labels
|
|---|
| 1995 |
|
|---|
| 1996 | * X_resid_lev.png - residuals vs. leverages
|
|---|
| 1997 |
|
|---|
| 1998 | * X_cooks_lev.png - Cook's distances vs. leverage/(1-leverage)
|
|---|
| 1999 |
|
|---|
| 2000 | In the file names above, X is the name of the output model file, minus
|
|---|
| 2001 | any extension. Please see the R documentation for the plot.lm function
|
|---|
| 2002 | for more information about the plots."""),
|
|---|
| 2003 | arcGISDisplayName=_(u'Write diagnostic plots'),
|
|---|
| 2004 | arcGISCategory=_(u'Additional output options'))
|
|---|
| 2005 |
|
|---|
| 2006 | AddArgumentMetadata(GLM.FitToArcGISTable, u'numDiagLabels',
|
|---|
| 2007 | typeMetadata=IntegerTypeMetadata(minValue=0),
|
|---|
| 2008 | description=_(
|
|---|
| 2009 | u"""Number of extreme points to label in diagnostic plots.
|
|---|
| 2010 |
|
|---|
| 2011 | This parameter has no effect when diagnostic plots are not
|
|---|
| 2012 | written."""),
|
|---|
| 2013 | arcGISParameterDependencies=[u'inputTable'],
|
|---|
| 2014 | arcGISDisplayName=_(u'Number of extreme points to label in diagnostic plots'),
|
|---|
| 2015 | arcGISCategory=_(u'Additional output options'))
|
|---|
| 2016 |
|
|---|
| 2017 | AddArgumentMetadata(GLM.FitToArcGISTable, u'diagLabelField',
|
|---|
| 2018 | typeMetadata=ArcGISFieldTypeMetadata(mustExist=True, canBeNone=True),
|
|---|
| 2019 | description=_(
|
|---|
| 2020 | u"""Table field to for labeling extreme points in diagnostic plots.
|
|---|
| 2021 |
|
|---|
| 2022 | Usually, you choose a field that uniquely identifies the table row. If
|
|---|
| 2023 | you do not specify a field, R will assign labels for you, and it may
|
|---|
| 2024 | be difficult to determine which row a given point corresponds to.
|
|---|
| 2025 |
|
|---|
| 2026 | This parameter has no effect when diagnostic plots are not
|
|---|
| 2027 | written."""),
|
|---|
| 2028 | arcGISParameterDependencies=[u'inputTable'],
|
|---|
| 2029 | arcGISDisplayName=_(u'Field for labeling extreme points in diagnostic plots'),
|
|---|
| 2030 | arcGISCategory=_(u'Additional output options'))
|
|---|
| 2031 |
|
|---|
| 2032 | AddArgumentMetadata(GLM.FitToArcGISTable, u'writeTermPlots',
|
|---|
| 2033 | typeMetadata=BooleanTypeMetadata(),
|
|---|
| 2034 | description=_(
|
|---|
| 2035 | u"""If True, this tool will write a partial plot for each term in the
|
|---|
| 2036 | fitted model's formula. If automated model selection is performed,
|
|---|
| 2037 | plots will be generated only for the final model.
|
|---|
| 2038 |
|
|---|
| 2039 | Each plot will be written to a PNG file named X_termY.png, where X is
|
|---|
| 2040 | the name of the output model file, minus any extension, and Y is the
|
|---|
| 2041 | term number in the model (minimum of two digits). For example, if the
|
|---|
| 2042 | model had this formula::
|
|---|
| 2043 |
|
|---|
| 2044 | Presence ~ SST + log(ChlDensity) + Depth
|
|---|
| 2045 |
|
|---|
| 2046 | The output files would be X_term01.png, X_term02.png, and
|
|---|
| 2047 | X_term03.png, which correspond to the SST, log(ChlDensity), and Depth
|
|---|
| 2048 | terms respectively."""),
|
|---|
| 2049 | arcGISDisplayName=_(u'Write plots of model terms'),
|
|---|
| 2050 | arcGISCategory=_(u'Additional output options'))
|
|---|
| 2051 |
|
|---|
| 2052 | AddArgumentMetadata(GLM.FitToArcGISTable, u'residuals',
|
|---|
| 2053 | typeMetadata=BooleanTypeMetadata(),
|
|---|
| 2054 | description=_(
|
|---|
| 2055 | u"""If True, residuals will be plotted in model term plots of
|
|---|
| 2056 | continuous terms (i.e. terms that are not factors).
|
|---|
| 2057 |
|
|---|
| 2058 | Residuals are calculated using the method employed by the mgcv
|
|---|
| 2059 | package. That is, they are "the working residuals from the IRLS
|
|---|
| 2060 | iteration weighted by the IRLS weights. [These] are the residuals that
|
|---|
| 2061 | would be obtained by dropping the term concerned from the model, while
|
|---|
| 2062 | leaving all other estimates fixed (i.e. the estimates for the term
|
|---|
| 2063 | plus the residuals)." """),
|
|---|
| 2064 | arcGISDisplayName=_(u'Plot residuals'),
|
|---|
| 2065 | arcGISCategory=_(u'Additional output options'))
|
|---|
| 2066 |
|
|---|
| 2067 | AddArgumentMetadata(GLM.FitToArcGISTable, u'xAxis',
|
|---|
| 2068 | typeMetadata=BooleanTypeMetadata(),
|
|---|
| 2069 | description=_(
|
|---|
| 2070 | u"""If True, a straight line will be drawn at y=0 in model term plots
|
|---|
| 2071 | of continuous terms (i.e. terms that are not factors)."""),
|
|---|
| 2072 | arcGISDisplayName=_(u'Plot x-axis'),
|
|---|
| 2073 | arcGISCategory=_(u'Additional output options'))
|
|---|
| 2074 |
|
|---|
| 2075 | AddArgumentMetadata(GLM.FitToArcGISTable, u'commonScale',
|
|---|
| 2076 | typeMetadata=BooleanTypeMetadata(),
|
|---|
| 2077 | description=_(
|
|---|
| 2078 | u"""If True, model term plots of continuous terms (i.e. terms that are
|
|---|
| 2079 | not factors) will have the same y-axis scale, allowing the relative
|
|---|
| 2080 | importance of different terms to be ascertained by comparing the plots
|
|---|
| 2081 | side by side."""),
|
|---|
| 2082 | arcGISDisplayName=_(u'Use common y-axis scale'),
|
|---|
| 2083 | arcGISCategory=_(u'Additional output options'))
|
|---|
| 2084 |
|
|---|
| 2085 | AddArgumentMetadata(GLM.FitToArcGISTable, u'plotFileFormat',
|
|---|
| 2086 | typeMetadata=UnicodeStringTypeMetadata(allowedValues=[u'emf', u'png'], makeLowercase=True),
|
|---|
| 2087 | description=_(
|
|---|
| 2088 | u"""Plot file format, one of:
|
|---|
| 2089 |
|
|---|
| 2090 | * emf - Windows enhanced metafile (EMF) format. This is a vector
|
|---|
| 2091 | format that may be printed and resized without any pixelation and is
|
|---|
| 2092 | therefore suitable for use in printable documents that recognize
|
|---|
| 2093 | this format (e.g. Microsoft Word or Microsoft Visio).
|
|---|
| 2094 |
|
|---|
| 2095 | * png - Portable network graphics (PNG) format. This is a compressed,
|
|---|
| 2096 | lossless, highly portable raster format suitable for use in web
|
|---|
| 2097 | pages or other locations where a raster format is desired. Most
|
|---|
| 2098 | scientific journals accept PNG; they typically request that files
|
|---|
| 2099 | have a resolution of at least 1000 DPI.
|
|---|
| 2100 | """),
|
|---|
| 2101 | arcGISDisplayName=_(u'Plot file format'),
|
|---|
| 2102 | arcGISCategory=_(u'Additional output options'))
|
|---|
| 2103 |
|
|---|
| 2104 | AddArgumentMetadata(GLM.FitToArcGISTable, u'res',
|
|---|
| 2105 | typeMetadata=FloatTypeMetadata(mustBeGreaterThan=0.),
|
|---|
| 2106 | description=_(
|
|---|
| 2107 | u"""PNG plot file resolution, in dots per inch (DPI). The default is
|
|---|
| 2108 | set to a high value (1000) because this is the minimum resolution
|
|---|
| 2109 | typically required by scientific journals that accept figures in PNG
|
|---|
| 2110 | format.
|
|---|
| 2111 |
|
|---|
| 2112 | This parameter is ignored for EMF format because it is a vector
|
|---|
| 2113 | format."""),
|
|---|
| 2114 | arcGISDisplayName=_(u'Plot resolution, in DPI'),
|
|---|
| 2115 | arcGISCategory=_(u'Additional output options'))
|
|---|
| 2116 |
|
|---|
| 2117 | AddArgumentMetadata(GLM.FitToArcGISTable, u'width',
|
|---|
| 2118 | typeMetadata=FloatTypeMetadata(mustBeGreaterThan=0.),
|
|---|
| 2119 | description=_(
|
|---|
| 2120 | u"""Plot file width in thousandths of inches (for EMF format; e.g. the
|
|---|
| 2121 | value 3000 is 3 inches) or pixels (for PNG format)."""),
|
|---|
| 2122 | arcGISDisplayName=_(u'Plot width'),
|
|---|
| 2123 | arcGISCategory=_(u'Additional output options'))
|
|---|
| 2124 |
|
|---|
| 2125 | AddArgumentMetadata(GLM.FitToArcGISTable, u'height',
|
|---|
| 2126 | typeMetadata=FloatTypeMetadata(mustBeGreaterThan=0.),
|
|---|
| 2127 | description=_(
|
|---|
| 2128 | u"""Plot file height in thousandths of inches (for EMF format; e.g. the
|
|---|
| 2129 | value 3000 is 3 inches) or pixels (for PNG format)."""),
|
|---|
| 2130 | arcGISDisplayName=_(u'Plot height'),
|
|---|
| 2131 | arcGISCategory=_(u'Additional output options'))
|
|---|
| 2132 |
|
|---|
| 2133 | AddArgumentMetadata(GLM.FitToArcGISTable, u'pointSize',
|
|---|
| 2134 | typeMetadata=FloatTypeMetadata(minValue=1.0),
|
|---|
| 2135 | description=_(
|
|---|
| 2136 | u"""The default pointsize of plotted text."""),
|
|---|
| 2137 | arcGISDisplayName=_(u'Default pointsize of plotted text'),
|
|---|
| 2138 | arcGISCategory=_(u'Additional output options'))
|
|---|
| 2139 |
|
|---|
| 2140 | AddArgumentMetadata(GLM.FitToArcGISTable, u'bg',
|
|---|
| 2141 | typeMetadata=UnicodeStringTypeMetadata(),
|
|---|
| 2142 | description=_(
|
|---|
| 2143 | u"""PNG plot file background color. The color must be a valid name in
|
|---|
| 2144 | R's color palette, or "transparent" if there is no background color.
|
|---|
| 2145 | This parameter is ignored if the plot format file is EMF."""),
|
|---|
| 2146 | arcGISDisplayName=_(u'Plot background color'),
|
|---|
| 2147 | arcGISCategory=_(u'Additional output options'))
|
|---|
| 2148 |
|
|---|
| 2149 | AddArgumentMetadata(GLM.FitToArcGISTable, u'overwriteExisting',
|
|---|
| 2150 | typeMetadata=BooleanTypeMetadata(),
|
|---|
| 2151 | description=_(
|
|---|
| 2152 | u"""If True, the output files will be overwritten, if it exists. If
|
|---|
| 2153 | False, a ValueError will be raised if an output file exists."""),
|
|---|
| 2154 | initializeToArcGISGeoprocessorVariable=u'OverwriteOutput')
|
|---|
| 2155 |
|
|---|
| 2156 | # Public method: GLM.PredictFromArcGISRasters
|
|---|
| 2157 |
|
|---|
| 2158 | AddMethodMetadata(GLM.PredictFromArcGISRasters,
|
|---|
| 2159 | shortDescription=_(u'Using a fitted generalized linear model (GLM), this tool creates a raster representing the response variable predicted from ArcGIS rasters representing the predictor variables.'),
|
|---|
| 2160 | isExposedToPythonCallers=True,
|
|---|
| 2161 | isExposedByCOM=True,
|
|---|
| 2162 | isExposedAsArcGISTool=True,
|
|---|
| 2163 | arcGISDisplayName=_(u'Predict GLM From Rasters'),
|
|---|
| 2164 | arcGISToolCategory=_(u'Statistics\\Model Data\\Generalized Linear Models'),
|
|---|
| 2165 | dependencies=[ArcGISDependency(9, 2), RDependency(2, 6, 0), RPackageDependency(u'rgdal')])
|
|---|
| 2166 |
|
|---|
| 2167 | CopyArgumentMetadata(GLM.FitToArcGISTable, u'cls', GLM.PredictFromArcGISRasters, u'cls')
|
|---|
| 2168 |
|
|---|
| 2169 | AddArgumentMetadata(GLM.PredictFromArcGISRasters, u'inputModelFile',
|
|---|
| 2170 | typeMetadata=FileTypeMetadata(mustExist=True),
|
|---|
| 2171 | description=_(
|
|---|
| 2172 | u"""File that contains the fitted model, generated by the Fit GLM
|
|---|
| 2173 | tool."""),
|
|---|
| 2174 | arcGISDisplayName=_(u'Input model file'))
|
|---|
| 2175 |
|
|---|
| 2176 | AddArgumentMetadata(GLM.PredictFromArcGISRasters, u'inputPredictorRasters',
|
|---|
| 2177 | typeMetadata=ListTypeMetadata(ArcGISRasterLayerTypeMetadata(mustExist=True), minLength=1),
|
|---|
| 2178 | description=_(
|
|---|
| 2179 | u"""Rasters that represent the predictor variables used in the model.
|
|---|
| 2180 |
|
|---|
| 2181 | This list must include one raster for each predictor variable that
|
|---|
| 2182 | appears in the model's formula. The list provided for the Model
|
|---|
| 2183 | Variable Names for Predictor Rasters parameter specifies the predictor
|
|---|
| 2184 | variable that each raster represents.
|
|---|
| 2185 |
|
|---|
| 2186 | For example, if your model used the formula::
|
|---|
| 2187 |
|
|---|
| 2188 | Presence ~ SST + Chl + Depth
|
|---|
| 2189 |
|
|---|
| 2190 | You must specify three rasters, one for each predictor variable. If
|
|---|
| 2191 | you want to predict Presence for the month of January 1999, you might
|
|---|
| 2192 | use a static bathymetry raster and SST and chlorophyll rasters for
|
|---|
| 2193 | that month::
|
|---|
| 2194 |
|
|---|
| 2195 | C:\\Data\\etopo2v2
|
|---|
| 2196 | C:\\Data\\SST\\sst199901
|
|---|
| 2197 | C:\\Data\\Chlorophyll\\chl199901
|
|---|
| 2198 |
|
|---|
| 2199 | Then, for the Model Variable Names for Predictor Rasters parameter,
|
|---|
| 2200 | you would specify the predictor variable that each raster represents::
|
|---|
| 2201 |
|
|---|
| 2202 | Depth
|
|---|
| 2203 | SST
|
|---|
| 2204 | Chl
|
|---|
| 2205 |
|
|---|
| 2206 | You can specify extra predictors that do not appear in the formula.
|
|---|
| 2207 | They will be ignored. In the example above, you might include the
|
|---|
| 2208 | variable SSH (sea surface height) and the raster
|
|---|
| 2209 | C:\\\\Data\\\\SSH\\\\ssh199901. Because this variable does not appear
|
|---|
| 2210 | in the model formula, it will be ignored.
|
|---|
| 2211 |
|
|---|
| 2212 | All of the predictor rasters must have a coordinate system defined.
|
|---|
| 2213 | They must all share the same datum. If you also specify the Template
|
|---|
| 2214 | Raster For Outputs parameter, they must have the datum of the template
|
|---|
| 2215 | raster. If you do not specify that parameter, the first predictor
|
|---|
| 2216 | raster will be used as the template raster.
|
|---|
| 2217 |
|
|---|
| 2218 | The predictor rasters need not have the same coordinate system,
|
|---|
| 2219 | extent, or cell size as the template raster. If these characteristics
|
|---|
| 2220 | differ from the template raster, this tool will automatically project
|
|---|
| 2221 | and clip the predictor rasters to conform to the template raster. The
|
|---|
| 2222 | predictor rasters must be able to be projected to the template
|
|---|
| 2223 | raster's coordinate system without requiring the specification of a
|
|---|
| 2224 | geographic transformation. An error will be reported if a geographic
|
|---|
| 2225 | transformation must be specified. In this case, you must project the
|
|---|
| 2226 | predictor rasters manually before providing them to this tool.
|
|---|
| 2227 |
|
|---|
| 2228 | By default, floating point predictor rasters will be projected using
|
|---|
| 2229 | bilinear interpolation and integer predictor rasters will be projected
|
|---|
| 2230 | using nearest neighbor assignment. Bilinear interpolation is
|
|---|
| 2231 | appropriate for continuous variables such as sea surface temperature,
|
|---|
| 2232 | while nearest neighbor is appropriate for categorical variables such
|
|---|
| 2233 | as bottom substrate type. If these defaults are not appropriate for
|
|---|
| 2234 | your predictors, specify different algorithms using the Resampling
|
|---|
| 2235 | Techniques for Predictor Rasters parameter."""),
|
|---|
| 2236 | arcGISDisplayName=_(u'Input predictor rasters'))
|
|---|
| 2237 |
|
|---|
| 2238 | AddArgumentMetadata(GLM.PredictFromArcGISRasters, u'variableNames',
|
|---|
| 2239 | typeMetadata=ListTypeMetadata(UnicodeStringTypeMetadata(), mustBeSameLengthAsArgument=u'inputPredictorRasters'),
|
|---|
| 2240 | description=_(
|
|---|
| 2241 | u"""Model variable names for the input predictor rasters. The variable
|
|---|
| 2242 | names are case sensitive. Please see the documentation for the Input
|
|---|
| 2243 | Predictor Rasters parameter for more information."""),
|
|---|
| 2244 | arcGISDisplayName=_(u'Model variable names for predictor rasters'))
|
|---|
| 2245 |
|
|---|
| 2246 | AddArgumentMetadata(GLM.PredictFromArcGISRasters, u'outputResponseRaster',
|
|---|
| 2247 | typeMetadata=ArcGISRasterTypeMetadata(mustBeDifferentThanArguments=[u'inputModelFile'], deleteIfParameterIsTrue=u'overwriteExisting', createParentDirectories=True),
|
|---|
| 2248 | description=_(
|
|---|
| 2249 | u"""Output raster representing the response predicted by the model
|
|---|
| 2250 | from the input predictor rasters.
|
|---|
| 2251 |
|
|---|
| 2252 | If any pixel is NoData in any predictor raster (after it has been
|
|---|
| 2253 | projected and clipped as needed), the pixel will be NoData in the
|
|---|
| 2254 | output raster as well. This is because the response variable cannot be
|
|---|
| 2255 | predicted by the model if any of the predictor variables are
|
|---|
| 2256 | missing."""),
|
|---|
| 2257 | direction=u'Output',
|
|---|
| 2258 | arcGISDisplayName=_(u'Output response raster'))
|
|---|
| 2259 |
|
|---|
| 2260 | AddArgumentMetadata(GLM.PredictFromArcGISRasters, u'templateRaster',
|
|---|
| 2261 | typeMetadata=ArcGISRasterLayerTypeMetadata(mustExist=True, mustBeDifferentThanArguments=[u'inputModelFile', u'outputResponseRaster'], canBeNone=True),
|
|---|
| 2262 | description=_(
|
|---|
| 2263 | u"""Template raster that defines the coordinate system, extent, and
|
|---|
| 2264 | cell size of the output rasters produced by this tool.
|
|---|
| 2265 |
|
|---|
| 2266 | If you do not specify a template raster, the first predictor raster
|
|---|
| 2267 | will be used instead.
|
|---|
| 2268 |
|
|---|
| 2269 | All predictor rasters must share the same datum as the template
|
|---|
| 2270 | raster. The predictor rasters need not have the same coordinate
|
|---|
| 2271 | system, extent, or cell size as the template raster. If these
|
|---|
| 2272 | characteristics differ from the template raster, this tool will
|
|---|
| 2273 | automatically project and clip the predictor rasters to conform to the
|
|---|
| 2274 | template raster. Please see the documentation for the Input Predictor
|
|---|
| 2275 | Rasters parameter for more information."""),
|
|---|
| 2276 | arcGISDisplayName=_(u'Template raster for outputs'))
|
|---|
| 2277 |
|
|---|
| 2278 | AddArgumentMetadata(GLM.PredictFromArcGISRasters, u'resamplingTechniques',
|
|---|
| 2279 | typeMetadata=ListTypeMetadata(elementType=UnicodeStringTypeMetadata(allowedValues=[u'NEAREST', u'BILINEAR', u'CUBIC'], makeLowercase=True), canBeNone=True),
|
|---|
| 2280 | description=_(
|
|---|
| 2281 | u"""Resampling technique to be used for each input predictor raster if
|
|---|
| 2282 | that raster needs to be automatically projected, as described in the
|
|---|
| 2283 | documentation for the Input Predictor Rasters parameter.
|
|---|
| 2284 |
|
|---|
| 2285 | The available resampling techniques are:
|
|---|
| 2286 |
|
|---|
| 2287 | * NEAREST - Nearest neighbor assignment
|
|---|
| 2288 |
|
|---|
| 2289 | * BILINEAR - Bilinear interpolation
|
|---|
| 2290 |
|
|---|
| 2291 | * CUBIC - Cubic convolution
|
|---|
| 2292 |
|
|---|
| 2293 | If you do not specify a resampling technique for a given input
|
|---|
| 2294 | predictor raster, BILINEAR will be used.
|
|---|
| 2295 |
|
|---|
| 2296 | The ArcGIS 9.2 documentation provides the following information about the
|
|---|
| 2297 | resampling techniques:
|
|---|
| 2298 |
|
|---|
| 2299 | * The NEAREST option, which performs a nearest neighbor assignment, is
|
|---|
| 2300 | the fastest of the interpolation methods. It is primarily used for
|
|---|
| 2301 | categorical data, such as a land use classification, because it will
|
|---|
| 2302 | not change the cell values. Do not use NEAREST for continuous data,
|
|---|
| 2303 | such as elevation surfaces.
|
|---|
| 2304 |
|
|---|
| 2305 | * The BILINEAR option, bilinear interpolation, determines the new
|
|---|
| 2306 | value of a cell based on a weighted distance average of surrounding
|
|---|
| 2307 | cells. The CUBIC option, cubic convolution, determines the new cell
|
|---|
| 2308 | value by fitting a smooth curve through the surrounding points.
|
|---|
| 2309 | These are most appropriate for continuous data and may cause some
|
|---|
| 2310 | smoothing; also, cubic convolution may result in the output raster
|
|---|
| 2311 | containing values outside the range of the input raster. It is not
|
|---|
| 2312 | recommended that BILINEAR or CUBIC be used with categorical data
|
|---|
| 2313 | because the cell values may be altered.
|
|---|
| 2314 | """),
|
|---|
| 2315 | arcGISDisplayName=_(u'Resampling techniques'),
|
|---|
| 2316 | arcGISCategory=_(u'Prediction options'))
|
|---|
| 2317 |
|
|---|
| 2318 | AddArgumentMetadata(GLM.PredictFromArcGISRasters, u'ignoreOutOfRangeValues',
|
|---|
| 2319 | typeMetadata=BooleanTypeMetadata(),
|
|---|
| 2320 | description=_(
|
|---|
| 2321 | u"""If True, predictions will not be made where the values of
|
|---|
| 2322 | predictor rasters fall outside of the range of values used to fit the
|
|---|
| 2323 | model. These cells will appear as NoData in the output rasters.
|
|---|
| 2324 |
|
|---|
| 2325 | If False, predictions will be attempted regardless of what the
|
|---|
| 2326 | predictor values are.
|
|---|
| 2327 |
|
|---|
| 2328 | This parameter is set to True by default because many believe that it
|
|---|
| 2329 | is a bad practice to extrapolate a model beyond the range of values
|
|---|
| 2330 | used to fit it. But if your model provides a very strong fit, or you
|
|---|
| 2331 | have some other reason to believe it is very robust, you can set this
|
|---|
| 2332 | parameter to False to perform the extrapolation."""),
|
|---|
| 2333 | arcGISDisplayName=_(u'Ignore raster values outside the modeled range'),
|
|---|
| 2334 | arcGISCategory=_(u'Prediction options'))
|
|---|
| 2335 |
|
|---|
| 2336 | AddArgumentMetadata(GLM.PredictFromArcGISRasters, u'outputErrorRaster',
|
|---|
| 2337 | typeMetadata=ArcGISRasterTypeMetadata(canBeNone=True, mustBeDifferentThanArguments=[u'inputModelFile', u'outputResponseRaster'], deleteIfParameterIsTrue=u'overwriteExisting', createParentDirectories=True),
|
|---|
| 2338 | description=_(
|
|---|
| 2339 | u"""Output raster representing the standard errors of the predicted
|
|---|
| 2340 | response.
|
|---|
| 2341 |
|
|---|
| 2342 | If any pixel is NoData in any predictor raster (after it has been
|
|---|
| 2343 | projected and clipped as needed), it will be NoData in the output
|
|---|
| 2344 | raster as well. This is because the response variable cannot be
|
|---|
| 2345 | predicted by the model if any of the predictor variables are
|
|---|
| 2346 | missing."""),
|
|---|
| 2347 | direction=u'Output',
|
|---|
| 2348 | arcGISDisplayName=_(u'Output standard error raster'),
|
|---|
| 2349 | arcGISCategory=_(u'Additional output options'))
|
|---|
| 2350 |
|
|---|
| 2351 | AddArgumentMetadata(GLM.PredictFromArcGISRasters, u'outputBinaryResponseRaster',
|
|---|
| 2352 | typeMetadata=ArcGISRasterTypeMetadata(canBeNone=True, mustBeDifferentThanArguments=[u'inputModelFile', u'outputResponseRaster', u'outputErrorRaster'], deleteIfParameterIsTrue=u'overwriteExisting', createParentDirectories=True),
|
|---|
| 2353 | description=_(
|
|---|
| 2354 | u"""Output raster with the predicted response classified into two
|
|---|
| 2355 | possible values, 0 or 1, according to the cutoff parameter (which you
|
|---|
| 2356 | must also provide). Predicted response values less than the cutoff
|
|---|
| 2357 | will be classified as 0; values greater than or equal to the cutoff
|
|---|
| 2358 | will be classified as 1.
|
|---|
| 2359 |
|
|---|
| 2360 | If any pixel is NoData in any predictor raster (after it has been
|
|---|
| 2361 | projected and clipped as needed), it will be NoData in the output
|
|---|
| 2362 | raster as well. This is because the response variable cannot be
|
|---|
| 2363 | predicted by the model if any of the predictor variables are
|
|---|
| 2364 | missing.
|
|---|
| 2365 |
|
|---|
| 2366 | You can examine the performance of your model for different cutoff
|
|---|
| 2367 | values using the Plot ROC of Binary Classification Model tool or the
|
|---|
| 2368 | Plot Performance of Binary Classification Model tool."""),
|
|---|
| 2369 | direction=u'Output',
|
|---|
| 2370 | arcGISDisplayName=_(u'Output binary response raster'),
|
|---|
| 2371 | arcGISCategory=_(u'Additional output options'),
|
|---|
| 2372 | dependencies=[ArcGISExtensionDependency(u'spatial')])
|
|---|
| 2373 |
|
|---|
| 2374 | AddArgumentMetadata(GLM.PredictFromArcGISRasters, u'cutoff',
|
|---|
| 2375 | typeMetadata=FloatTypeMetadata(canBeNone=True),
|
|---|
| 2376 | description=_(
|
|---|
| 2377 | u"""Cutoff for classifying the continous predicted response into a
|
|---|
| 2378 | binary response. See the documentation for the Output Binary Response
|
|---|
| 2379 | Raster for more information."""),
|
|---|
| 2380 | arcGISDisplayName=_(u'Cutoff'),
|
|---|
| 2381 | arcGISCategory=_(u'Additional output options'))
|
|---|
| 2382 |
|
|---|
| 2383 | AddArgumentMetadata(GLM.PredictFromArcGISRasters, u'buildPyramids',
|
|---|
| 2384 | typeMetadata=BooleanTypeMetadata(),
|
|---|
| 2385 | description=_(
|
|---|
| 2386 | u"""If True, pyramids will be built for the output rasters, which will
|
|---|
| 2387 | improve their display speed in the ArcGIS user interface."""),
|
|---|
| 2388 | arcGISDisplayName=_(u'Build pyramids'),
|
|---|
| 2389 | arcGISCategory=_(u'Additional output options'))
|
|---|
| 2390 |
|
|---|
| 2391 | AddArgumentMetadata(GLM.PredictFromArcGISRasters, u'overwriteExisting',
|
|---|
| 2392 | typeMetadata=BooleanTypeMetadata(),
|
|---|
| 2393 | description=_(
|
|---|
| 2394 | u"""If True, the output rasters will be overwritten, if they exist.
|
|---|
| 2395 | If False, a ValueError will be raised if either output raster
|
|---|
| 2396 | exists."""),
|
|---|
| 2397 | initializeToArcGISGeoprocessorVariable=u'OverwriteOutput')
|
|---|
| 2398 |
|
|---|
| 2399 | ###############################################################################
|
|---|
| 2400 | # Metadata: GAM class
|
|---|
| 2401 | ###############################################################################
|
|---|
| 2402 |
|
|---|
| 2403 | AddClassMetadata(GAM,
|
|---|
| 2404 | shortDescription=_(u'Provides methods for modeling and prediction using Generalized Additive Models (GAMs).'),
|
|---|
| 2405 | isExposedAsCOMServer=True,
|
|---|
| 2406 | comIID=u'{515D570A-CF61-463A-8250-C65B67BEE0C7}',
|
|---|
| 2407 | comCLSID=u'{D875D98A-6E4D-486B-B681-225FF40A0D7E}')
|
|---|
| 2408 |
|
|---|
| 2409 | # Public method: GAM.FitToArcGISTable
|
|---|
| 2410 |
|
|---|
| 2411 | AddMethodMetadata(GAM.FitToArcGISTable,
|
|---|
| 2412 | shortDescription=_(u'Fits a generalized additive model (GAM) to data in an ArcGIS table.'),
|
|---|
| 2413 | isExposedToPythonCallers=True,
|
|---|
| 2414 | isExposedByCOM=True,
|
|---|
| 2415 | isExposedAsArcGISTool=True,
|
|---|
| 2416 | arcGISDisplayName=_(u'Fit GAM'),
|
|---|
| 2417 | arcGISToolCategory=_(u'Statistics\\Model Data\\Generalized Additive Models'),
|
|---|
| 2418 | dependencies=[ArcGISDependency(9, 1), RDependency(2, 6, 0)])
|
|---|
| 2419 |
|
|---|
| 2420 | AddArgumentMetadata(GAM.FitToArcGISTable, u'cls',
|
|---|
| 2421 | typeMetadata=ClassOrClassInstanceTypeMetadata(cls=GAM),
|
|---|
| 2422 | description=_(u'%s class or an instance of it.') % GAM.__name__)
|
|---|
| 2423 |
|
|---|
| 2424 | CopyArgumentMetadata(GLM.FitToArcGISTable, u'inputTable', GAM.FitToArcGISTable, u'inputTable')
|
|---|
| 2425 | CopyArgumentMetadata(GLM.FitToArcGISTable, u'outputModelFile', GAM.FitToArcGISTable, u'outputModelFile')
|
|---|
| 2426 |
|
|---|
| 2427 | AddArgumentMetadata(GAM.FitToArcGISTable, u'formula',
|
|---|
| 2428 | typeMetadata=GLM.FitToArcGISTable.__doc__.Obj.GetArgumentByName(u'formula').Type,
|
|---|
| 2429 | description=GLM.FitToArcGISTable.__doc__.Obj.GetArgumentByName(u'formula').Description + _(
|
|---|
| 2430 | u"""
|
|---|
| 2431 |
|
|---|
| 2432 | A principal advantage of GAMs over GLMs is the ability to fit a model
|
|---|
| 2433 | using smoothing functions. The most common smoothing function is
|
|---|
| 2434 | called s::
|
|---|
| 2435 |
|
|---|
| 2436 | Presence ~ SST + s(ChlDensity) + Depth
|
|---|
| 2437 |
|
|---|
| 2438 | The available smoothing functions and their meanings and parameters
|
|---|
| 2439 | depend on which R package is used to fit the GAM:
|
|---|
| 2440 |
|
|---|
| 2441 | * The R gam package provides two smoothing functions: s for spline
|
|---|
| 2442 | smooths and lo for loess smooths. For more information, please see
|
|---|
| 2443 | the documentation for these functions in the
|
|---|
| 2444 | `gam package documentation <http://cran.r-project.org/web/packages/gam/gam.pdf>`_.
|
|---|
| 2445 |
|
|---|
| 2446 | * The R mgcv package also provides two smoothing functions. The s
|
|---|
| 2447 | function fits a thin plate regression spline. The te function fits a
|
|---|
| 2448 | tensor product smooth using a cubic regression spline. Both
|
|---|
| 2449 | functions allow you to specify different spline types, such as cubic
|
|---|
| 2450 | regression splines, cubic regression splines with shrinkage, cyclic
|
|---|
| 2451 | cubic regression splines, and thin plate regression splines with
|
|---|
| 2452 | shrinkage. For more information, please see the documentation for
|
|---|
| 2453 | these functions in the
|
|---|
| 2454 | `mgcv package documentation <http://cran.r-project.org/web/packages/mgcv/mgcv.pdf>`_."""),
|
|---|
| 2455 | arcGISDisplayName=GLM.FitToArcGISTable.__doc__.Obj.GetArgumentByName(u'formula').ArcGISDisplayName)
|
|---|
| 2456 |
|
|---|
| 2457 | AddArgumentMetadata(GAM.FitToArcGISTable, u'family',
|
|---|
| 2458 | typeMetadata=UnicodeStringTypeMetadata(allowedValues=[u'binomial', u'gaussian', u'Gamma', u'inverse.gaussian', u'poisson', u'negbin', u'quasi', u'quasibinomial', u'quasipoisson']),
|
|---|
| 2459 | description=_(
|
|---|
| 2460 | u"""Name of the family for the model. The family, together with the
|
|---|
| 2461 | link function and variance parameters, describes the error
|
|---|
| 2462 | distribution that will be used by the model. The family name is case
|
|---|
| 2463 | sensitive.
|
|---|
| 2464 |
|
|---|
| 2465 | The negbin family is only available when the mgcv R package is used to
|
|---|
| 2466 | fit the model. When negbin is used, the theta parameter must also be
|
|---|
| 2467 | specified (under Model Options)."""),
|
|---|
| 2468 | arcGISDisplayName=_(u'Family'))
|
|---|
| 2469 |
|
|---|
| 2470 | AddArgumentMetadata(GAM.FitToArcGISTable, u'rPackage',
|
|---|
| 2471 | typeMetadata=UnicodeStringTypeMetadata(allowedValues=[u'gam', u'mgcv']),
|
|---|
| 2472 | description=_(
|
|---|
| 2473 | u"""R package to use when fitting the GAM.
|
|---|
| 2474 |
|
|---|
| 2475 | * mgcv - the GAM package written by Simon Wood, an expert in
|
|---|
| 2476 | statistical modeling with smooth functions. This package allows
|
|---|
| 2477 | terms to be smoothed with a variety of spline types. Stepwise model
|
|---|
| 2478 | selection is not used with these models because terms can be fitted
|
|---|
| 2479 | with "shrinkage" splines, allowing them to drop out of the model
|
|---|
| 2480 | during the fitting process. Because this package is under active
|
|---|
| 2481 | development and maintenance and provides more features than the gam
|
|---|
| 2482 | package (below), it is the default.
|
|---|
| 2483 |
|
|---|
| 2484 | * gam - the GAM package written by Trevor Hastie, one of the inventors
|
|---|
| 2485 | of GAMs. This package allows terms to be smoothed with splines or
|
|---|
| 2486 | loess functions and models to be optimized using stepwise model
|
|---|
| 2487 | selection. One disadvantage is that the Predict GAM From ArcGIS
|
|---|
| 2488 | Rasters tool cannot produce standard error rasters for models fitted
|
|---|
| 2489 | with this package.
|
|---|
| 2490 |
|
|---|
| 2491 | For more information on the two packages, please see the
|
|---|
| 2492 | `gam package documentation <http://cran.r-project.org/web/packages/gam/gam.pdf>`_
|
|---|
| 2493 | and the
|
|---|
| 2494 | `mgcv package documentation <http://cran.r-project.org/web/packages/mgcv/mgcv.pdf>`_."""),
|
|---|
| 2495 | arcGISDisplayName=_(u'R package to use'))
|
|---|
| 2496 |
|
|---|
| 2497 | CopyArgumentMetadata(GLM.FitToArcGISTable, u'where', GAM.FitToArcGISTable, u'where')
|
|---|
| 2498 |
|
|---|
| 2499 | AddArgumentMetadata(GAM.FitToArcGISTable, u'link',
|
|---|
| 2500 | typeMetadata=UnicodeStringTypeMetadata(allowedValues=[u'1/mu^2', u'cauchit', u'cloglog', u'identity', u'inverse', u'log', u'logit', u'probit', u'sqrt'], canBeNone=True),
|
|---|
| 2501 | description=_(
|
|---|
| 2502 | u"""Name of the link function for the model. The allowed link
|
|---|
| 2503 | functions depend on the model family:
|
|---|
| 2504 |
|
|---|
| 2505 | * binomial family - cauchit, cloglog, log, logit, and probit
|
|---|
| 2506 |
|
|---|
| 2507 | * gaussian family - identity, inverse, and log
|
|---|
| 2508 |
|
|---|
| 2509 | * Gamma family - identity, inverse, and log
|
|---|
| 2510 |
|
|---|
| 2511 | * inverse.gaussian family - 1/mu^2, inverse, identity, and log
|
|---|
| 2512 |
|
|---|
| 2513 | * negbin family - identity, log, and sqrt
|
|---|
| 2514 |
|
|---|
| 2515 | * poisson family - identity, log, and sqrt
|
|---|
| 2516 |
|
|---|
| 2517 | * quasi family - 1/mu^2, cloglog, identity, inverse, log, logit, probit, and sqrt
|
|---|
| 2518 |
|
|---|
| 2519 | * quasibinomial family - cauchit, cloglog, log, logit, and probit
|
|---|
| 2520 |
|
|---|
| 2521 | * quasipoisson family - identity, log, and sqrt
|
|---|
| 2522 |
|
|---|
| 2523 | If a link function is not specified, the one that is used by default
|
|---|
| 2524 | also depends on the model family:
|
|---|
| 2525 |
|
|---|
| 2526 | * binomial family - logit
|
|---|
| 2527 |
|
|---|
| 2528 | * gaussian family - identity
|
|---|
| 2529 |
|
|---|
| 2530 | * Gamma family - inverse
|
|---|
| 2531 |
|
|---|
| 2532 | * inverse.gaussian family - 1/mu^2
|
|---|
| 2533 |
|
|---|
| 2534 | * negbin family - log
|
|---|
| 2535 |
|
|---|
| 2536 | * poisson family - log
|
|---|
| 2537 |
|
|---|
| 2538 | * quasi family - identity
|
|---|
| 2539 |
|
|---|
| 2540 | * quasibinomial family - logit
|
|---|
| 2541 |
|
|---|
| 2542 | * quasipoisson family - log
|
|---|
| 2543 | """),
|
|---|
| 2544 | arcGISDisplayName=_(u'Link function'),
|
|---|
| 2545 | arcGISCategory=_(u'Model options'))
|
|---|
| 2546 |
|
|---|
| 2547 | CopyArgumentMetadata(GLM.FitToArcGISTable, u'variance', GAM.FitToArcGISTable, u'variance')
|
|---|
| 2548 |
|
|---|
| 2549 | AddArgumentMetadata(GAM.FitToArcGISTable, u'theta',
|
|---|
| 2550 | typeMetadata=UnicodeStringTypeMetadata(canBeNone=True),
|
|---|
| 2551 | description=_(
|
|---|
| 2552 | u"""Theta parameter for the negbin model family.
|
|---|
| 2553 |
|
|---|
| 2554 | Theta specifies the amount of dispersion in the data and is required
|
|---|
| 2555 | when fitting a negbin model (it is ignored for all other model
|
|---|
| 2556 | families). It may be specified in several different ways:
|
|---|
| 2557 |
|
|---|
| 2558 | * A single number - this number defines the known fixed value of
|
|---|
| 2559 | theta. Use this alternative when you have determined theta yourself.
|
|---|
| 2560 |
|
|---|
| 2561 | * Two numbers - these numbers define the range over which mgcv
|
|---|
| 2562 | will search for the optimal theta. Use this alternative when you
|
|---|
| 2563 | have not determined theta yourself. Use the following syntax,
|
|---|
| 2564 | replacing A and B with the two numbers::
|
|---|
| 2565 |
|
|---|
| 2566 | c(A,B)
|
|---|
| 2567 |
|
|---|
| 2568 | * A list of three or more numbers - these numbers define the possible
|
|---|
| 2569 | values of theta; mgcv will select the optimal value from the list.
|
|---|
| 2570 | Use this alternative when you only want mgcv to consider specific
|
|---|
| 2571 | values. Use the following syntax, replacing, A, B, and so on with
|
|---|
| 2572 | your numbers::
|
|---|
| 2573 |
|
|---|
| 2574 | c(A,B,...)
|
|---|
| 2575 |
|
|---|
| 2576 | Please see the article titled "GAM negative binomial family" in the
|
|---|
| 2577 | `mgcv package documentation <http://cran.r-project.org/web/packages/mgcv/mgcv.pdf>`_.
|
|---|
| 2578 | for more information about how to use this parameter."""),
|
|---|
| 2579 | arcGISDisplayName=_(u'Theta parameter for negbin family'),
|
|---|
| 2580 | arcGISCategory=_(u'Model options'))
|
|---|
| 2581 |
|
|---|
| 2582 | AddArgumentMetadata(GAM.FitToArcGISTable, u'method',
|
|---|
| 2583 | typeMetadata=UnicodeStringTypeMetadata(canBeNone=True),
|
|---|
| 2584 | description=_(
|
|---|
| 2585 | u"""Smoothing parameter estimation method to use, when fitting the GAM
|
|---|
| 2586 | with the R mgcv package (this parameter is ignored when using the R
|
|---|
| 2587 | gam package).
|
|---|
| 2588 |
|
|---|
| 2589 | The methods available depend on the version of mgcv you have
|
|---|
| 2590 | installed. In mgcv version 1.7-5, the following methods were
|
|---|
| 2591 | available:
|
|---|
| 2592 |
|
|---|
| 2593 | * GCV.Cp - GCV for Poisson and binomial models and Mallows'
|
|---|
| 2594 | Cp/UBRE/AIC for others. (This is based on the fact that this tool
|
|---|
| 2595 | does not provide a scale parameter when it calls mgcv. The default
|
|---|
| 2596 | value of scale is 0, indicating that the scale should be 1 for
|
|---|
| 2597 | Poisson and binomial models and unknown otherwise.)
|
|---|
| 2598 |
|
|---|
| 2599 | * GACV.Cp - equivalent to GCV.Cp, but uses GACV in place of GCV.
|
|---|
| 2600 |
|
|---|
| 2601 | * REML - REML estimation.
|
|---|
| 2602 |
|
|---|
| 2603 | * P-REML - REML estimation, but using a Pearson estimate of the scale.
|
|---|
| 2604 |
|
|---|
| 2605 | * ML - maximum likelihood estimation.
|
|---|
| 2606 |
|
|---|
| 2607 | * P-ML - maximum likelihood estimation, but using a Pearson estimate
|
|---|
| 2608 | of the scale.
|
|---|
| 2609 |
|
|---|
| 2610 | The default is GCV.Cp."""),
|
|---|
| 2611 | arcGISDisplayName=_(u'Smoothing parameter estimation method'),
|
|---|
| 2612 | arcGISCategory=_(u'Model options'))
|
|---|
| 2613 |
|
|---|
| 2614 | AddArgumentMetadata(GAM.FitToArcGISTable, u'optimizer',
|
|---|
| 2615 | typeMetadata=UnicodeStringTypeMetadata(canBeNone=True),
|
|---|
| 2616 | description=_(
|
|---|
| 2617 | u"""Numerical optimization method to use to optimize the smoothing
|
|---|
| 2618 | parameter estimation criterion, when fitting the GAM with the R mgcv
|
|---|
| 2619 | package (this parameter is ignored when using the R gam package).
|
|---|
| 2620 |
|
|---|
| 2621 | The optimizers available depend on the version of mgcv you have
|
|---|
| 2622 | installed. In mgcv version 1.7-5, the following optimizers were
|
|---|
| 2623 | available:
|
|---|
| 2624 |
|
|---|
| 2625 | * perf - performance iteration.
|
|---|
| 2626 |
|
|---|
| 2627 | * outer - a more stable direct approach. This is the default. If this
|
|---|
| 2628 | method is selected, you may also specify an alternative optimizer.
|
|---|
| 2629 |
|
|---|
| 2630 | Please consult the mgcv documentation and authors for more information
|
|---|
| 2631 | about these optimizers."""),
|
|---|
| 2632 | arcGISDisplayName=_(u'Optimizer'),
|
|---|
| 2633 | arcGISCategory=_(u'Model options'))
|
|---|
| 2634 |
|
|---|
| 2635 | AddArgumentMetadata(GAM.FitToArcGISTable, u'alternativeOptimizer',
|
|---|
| 2636 | typeMetadata=UnicodeStringTypeMetadata(canBeNone=True),
|
|---|
| 2637 | description=_(
|
|---|
| 2638 | u"""Alternative mgcv optimizer to use when the "outer" optimizer is
|
|---|
| 2639 | selected for the previous parameter.
|
|---|
| 2640 |
|
|---|
| 2641 | The optimizers available depend on the version of mgcv you have
|
|---|
| 2642 | installed. In mgcv version 1.7-5, the following optimizers were
|
|---|
| 2643 | available:
|
|---|
| 2644 |
|
|---|
| 2645 | * newton - the default.
|
|---|
| 2646 |
|
|---|
| 2647 | * bfgs
|
|---|
| 2648 |
|
|---|
| 2649 | * optim
|
|---|
| 2650 |
|
|---|
| 2651 | * nlm
|
|---|
| 2652 |
|
|---|
| 2653 | * nlm.fd - the mgcv documentation says this method "is based entirely
|
|---|
| 2654 | on finite differenced derivatives and is very slow".
|
|---|
| 2655 |
|
|---|
| 2656 | Please consult the mgcv documentation and authors for more information
|
|---|
| 2657 | about these optimizers."""),
|
|---|
| 2658 | arcGISDisplayName=_(u'Alternative optimizer'),
|
|---|
| 2659 | arcGISCategory=_(u'Model options'))
|
|---|
| 2660 |
|
|---|
| 2661 | CopyArgumentMetadata(GLM.FitToArcGISTable, u'xColumnName', GAM.FitToArcGISTable, u'xColumnName')
|
|---|
| 2662 | CopyArgumentMetadata(GLM.FitToArcGISTable, u'yColumnName', GAM.FitToArcGISTable, u'yColumnName')
|
|---|
| 2663 | CopyArgumentMetadata(GLM.FitToArcGISTable, u'zColumnName', GAM.FitToArcGISTable, u'zColumnName')
|
|---|
| 2664 | CopyArgumentMetadata(GLM.FitToArcGISTable, u'mColumnName', GAM.FitToArcGISTable, u'mColumnName')
|
|---|
| 2665 |
|
|---|
| 2666 | AddArgumentMetadata(GAM.FitToArcGISTable, u'selectionMethod',
|
|---|
| 2667 | typeMetadata=UnicodeStringTypeMetadata(canBeNone=True, allowedValues=[u'Stepwise backward'], makeLowercase=True),
|
|---|
| 2668 | description=_(
|
|---|
| 2669 | u"""Automated model selection method to execute after fitting the
|
|---|
| 2670 | model using the original formula you specified. If automated model
|
|---|
| 2671 | selection is performed, the output model file will contain the final
|
|---|
| 2672 | model that was selected, and the output summary and plots will be for
|
|---|
| 2673 | this model.
|
|---|
| 2674 |
|
|---|
| 2675 | The selection methods currently available are:
|
|---|
| 2676 |
|
|---|
| 2677 | * Stepwise backward - backward stepwise model selection by AIC. This
|
|---|
| 2678 | method is only available if the gam was fitted using the R gam
|
|---|
| 2679 | package. The basic idea of this method is to keep dropping model
|
|---|
| 2680 | terms from the original formula so long as a better model fit can be
|
|---|
| 2681 | achieved. Model selection occurs in a loop. First, the Akaike
|
|---|
| 2682 | Information Criterion (AIC) is computed for the current model. Then,
|
|---|
| 2683 | N candidate model are generated, where N equals the number of terms
|
|---|
| 2684 | in the current model. Each candidate model drops a different single
|
|---|
| 2685 | term from the model. The AIC is computed for each candidate model,
|
|---|
| 2686 | and the candidate model that provides the greatest reduction in AIC
|
|---|
| 2687 | becomes the new current model. The loop proceeds until the AIC can
|
|---|
| 2688 | no longer be reduced by dropping terms.
|
|---|
| 2689 |
|
|---|
| 2690 | Additional methods may be implemented in a future release of this
|
|---|
| 2691 | tool.
|
|---|
| 2692 |
|
|---|
| 2693 | Automated model selection will increase the run time and possibly the
|
|---|
| 2694 | memory utilization of this tool. The amount of increase depends on the
|
|---|
| 2695 | complexity of your model and the amount of data in your table.
|
|---|
| 2696 |
|
|---|
| 2697 | For an alternative to automated model selection, you can fit the GAM
|
|---|
| 2698 | using the R mgcv package and use splines with "shrinkage" by setting
|
|---|
| 2699 | the bs parameter of the s function to "ts" for a thin plate regression
|
|---|
| 2700 | spline with shrinkage or "cs" for a cubic regression spline with
|
|---|
| 2701 | shrinkage. For example::
|
|---|
| 2702 |
|
|---|
| 2703 | Presence ~ s(SST,bs="ts") + s(Chl,bs="ts") + s(Depth,bs="ts")
|
|---|
| 2704 |
|
|---|
| 2705 | After the model is fitted, if the model summary shows that a term has
|
|---|
| 2706 | an effective degrees of freedom (edf) close to zero, the term has been
|
|---|
| 2707 | "zeroed out" and the plot of the term will show a horizontal line at
|
|---|
| 2708 | the x axis. This indicates that the term is a poor predictor of the
|
|---|
| 2709 | response variable and should be removed from the model.
|
|---|
| 2710 |
|
|---|
| 2711 | Model selection can be a difficult task. You should never blindly rely
|
|---|
| 2712 | on an automated selection method that you do not fully understand. For
|
|---|
| 2713 | the best results, always consult a statistician."""),
|
|---|
| 2714 | arcGISDisplayName=_(u'Automated model selection method'),
|
|---|
| 2715 | arcGISCategory=_(u'Automated model selection options'))
|
|---|
| 2716 |
|
|---|
| 2717 | CopyArgumentMetadata(GLM.FitToArcGISTable, u'logSelectionDetails', GAM.FitToArcGISTable, u'logSelectionDetails')
|
|---|
| 2718 | CopyArgumentMetadata(GLM.FitToArcGISTable, u'writeSummaryFile', GAM.FitToArcGISTable, u'writeSummaryFile')
|
|---|
| 2719 |
|
|---|
| 2720 | AddArgumentMetadata(GAM.FitToArcGISTable, u'writeDiagnosticPlots',
|
|---|
| 2721 | typeMetadata=BooleanTypeMetadata(),
|
|---|
| 2722 | description=_(
|
|---|
| 2723 | u"""If True and the R mgcv package was used to fit the GAM, this tool
|
|---|
| 2724 | will write four diagnostic plots for the fitted model. If False or the
|
|---|
| 2725 | R gam package was used to fit the GAM, no plots will be written.
|
|---|
| 2726 |
|
|---|
| 2727 | The four plots are written to a single PNG file named X_diag.png,
|
|---|
| 2728 | where X is the name of the output model file, minus any extension. The
|
|---|
| 2729 | plots include a normal Q-Q plot, a plot of residuals vs. linear
|
|---|
| 2730 | predictors, a histogram of residuals, and a plot of the response vs.
|
|---|
| 2731 | the fitted values. For more information about these plots, please see
|
|---|
| 2732 | the documentation for the gam.check function in the
|
|---|
| 2733 | `mgcv package documentation <http://cran.r-project.org/web/packages/mgcv/mgcv.pdf>`_."""),
|
|---|
| 2734 | arcGISDisplayName=_(u'Write diagnostic plots'),
|
|---|
| 2735 | arcGISCategory=_(u'Additional output options'))
|
|---|
| 2736 |
|
|---|
| 2737 | CopyArgumentMetadata(GLM.FitToArcGISTable, u'writeTermPlots', GAM.FitToArcGISTable, u'writeTermPlots')
|
|---|
| 2738 |
|
|---|
| 2739 | AddArgumentMetadata(GAM.FitToArcGISTable, u'residuals',
|
|---|
| 2740 | typeMetadata=BooleanTypeMetadata(),
|
|---|
| 2741 | description=_(
|
|---|
| 2742 | u"""If True, residuals will be plotted in model term plots of
|
|---|
| 2743 | continuous terms (i.e. terms that are not factors).
|
|---|
| 2744 |
|
|---|
| 2745 | Regardless of what R package was used to fit the GAM, they are
|
|---|
| 2746 | calculated using the method employed by the mgcv package. That is,
|
|---|
| 2747 | they are "the working residuals from the IRLS iteration weighted by
|
|---|
| 2748 | the IRLS weights. Partial residuals for a smooth term [i.e. a term
|
|---|
| 2749 | fitted using the s, lo, or te function] are the residuals that would
|
|---|
| 2750 | be obtained by dropping the term concerned from the model, while
|
|---|
| 2751 | leaving all other estimates fixed (i.e. the estimates for the term
|
|---|
| 2752 | plus the residuals)." (The same method is used for linear terms, as
|
|---|
| 2753 | well.)"""),
|
|---|
| 2754 | arcGISDisplayName=_(u'Plot residuals'),
|
|---|
| 2755 | arcGISCategory=_(u'Additional output options'))
|
|---|
| 2756 |
|
|---|
| 2757 | CopyArgumentMetadata(GLM.FitToArcGISTable, u'xAxis', GAM.FitToArcGISTable, u'xAxis')
|
|---|
| 2758 | CopyArgumentMetadata(GLM.FitToArcGISTable, u'commonScale', GAM.FitToArcGISTable, u'commonScale')
|
|---|
| 2759 | CopyArgumentMetadata(GLM.FitToArcGISTable, u'plotFileFormat', GAM.FitToArcGISTable, u'plotFileFormat')
|
|---|
| 2760 | CopyArgumentMetadata(GLM.FitToArcGISTable, u'res', GAM.FitToArcGISTable, u'res')
|
|---|
| 2761 | CopyArgumentMetadata(GLM.FitToArcGISTable, u'width', GAM.FitToArcGISTable, u'width')
|
|---|
| 2762 | CopyArgumentMetadata(GLM.FitToArcGISTable, u'height', GAM.FitToArcGISTable, u'height')
|
|---|
| 2763 | CopyArgumentMetadata(GLM.FitToArcGISTable, u'pointSize', GAM.FitToArcGISTable, u'pointSize')
|
|---|
| 2764 | CopyArgumentMetadata(GLM.FitToArcGISTable, u'bg', GAM.FitToArcGISTable, u'bg')
|
|---|
| 2765 | CopyArgumentMetadata(GLM.FitToArcGISTable, u'overwriteExisting', GAM.FitToArcGISTable, u'overwriteExisting')
|
|---|
| 2766 |
|
|---|
| 2767 | # Public method: GAM.PredictFromArcGISRasters
|
|---|
| 2768 |
|
|---|
| 2769 | AddMethodMetadata(GAM.PredictFromArcGISRasters,
|
|---|
| 2770 | shortDescription=_(u'Using a fitted generalized additive model (GAM), this tool creates a raster representing the response variable predicted from ArcGIS rasters representing the predictor variables.'),
|
|---|
| 2771 | isExposedToPythonCallers=True,
|
|---|
| 2772 | isExposedByCOM=True,
|
|---|
| 2773 | isExposedAsArcGISTool=True,
|
|---|
| 2774 | arcGISDisplayName=_(u'Predict GAM From Rasters'),
|
|---|
| 2775 | arcGISToolCategory=_(u'Statistics\\Model Data\\Generalized Additive Models'),
|
|---|
| 2776 | dependencies=[ArcGISDependency(9, 2), RDependency(2, 6, 0), RPackageDependency(u'rgdal')])
|
|---|
| 2777 |
|
|---|
| 2778 | CopyArgumentMetadata(GAM.FitToArcGISTable, u'cls', GAM.PredictFromArcGISRasters, u'cls')
|
|---|
| 2779 |
|
|---|
| 2780 | AddArgumentMetadata(GAM.PredictFromArcGISRasters, u'inputModelFile',
|
|---|
| 2781 | typeMetadata=FileTypeMetadata(mustExist=True),
|
|---|
| 2782 | description=_(
|
|---|
| 2783 | u"""File that contains the fitted model, generated by the Fit GAM
|
|---|
| 2784 | tool."""),
|
|---|
| 2785 | arcGISDisplayName=_(u'Input model file'))
|
|---|
| 2786 |
|
|---|
| 2787 | CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'inputPredictorRasters', GAM.PredictFromArcGISRasters, u'inputPredictorRasters')
|
|---|
| 2788 | CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'variableNames', GAM.PredictFromArcGISRasters, u'variableNames')
|
|---|
| 2789 | CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'outputResponseRaster', GAM.PredictFromArcGISRasters, u'outputResponseRaster')
|
|---|
| 2790 | CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'templateRaster', GAM.PredictFromArcGISRasters, u'templateRaster')
|
|---|
| 2791 | CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'resamplingTechniques', GAM.PredictFromArcGISRasters, u'resamplingTechniques')
|
|---|
| 2792 | CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'ignoreOutOfRangeValues', GAM.PredictFromArcGISRasters, u'ignoreOutOfRangeValues')
|
|---|
| 2793 | CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'outputErrorRaster', GAM.PredictFromArcGISRasters, u'outputErrorRaster')
|
|---|
| 2794 | CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'outputBinaryResponseRaster', GAM.PredictFromArcGISRasters, u'outputBinaryResponseRaster')
|
|---|
| 2795 | CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'cutoff', GAM.PredictFromArcGISRasters, u'cutoff')
|
|---|
| 2796 | CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'buildPyramids', GAM.PredictFromArcGISRasters, u'buildPyramids')
|
|---|
| 2797 | CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'overwriteExisting', GAM.PredictFromArcGISRasters, u'overwriteExisting')
|
|---|
| 2798 |
|
|---|
| 2799 | # Public method: GAM.BayesPredictFromArcGISRasters
|
|---|
| 2800 |
|
|---|
| 2801 | AddMethodMetadata(GAM.BayesPredictFromArcGISRasters,
|
|---|
| 2802 | shortDescription=_(u'Using a binomial generalized additive model (GAM) fitted using the R mgcv package, this tool creates rasters representing the estimated probabilities that the response variable will equal or exceed specified thresholds, given ArcGIS rasters representing the predictor variables.'),
|
|---|
| 2803 | isExposedToPythonCallers=True,
|
|---|
| 2804 | isExposedByCOM=True,
|
|---|
| 2805 | isExposedAsArcGISTool=True,
|
|---|
| 2806 | arcGISDisplayName=_(u'Predict Bayesian Probabilities for GAM From Rasters'),
|
|---|
| 2807 | arcGISToolCategory=_(u'Statistics\\Model Data\\Generalized Additive Models'),
|
|---|
| 2808 | dependencies=[ArcGISDependency(9, 2), RDependency(2, 6, 0), RPackageDependency(u'mgcv'), RPackageDependency(u'MASS'), RPackageDependency(u'rgdal')])
|
|---|
| 2809 |
|
|---|
| 2810 | CopyArgumentMetadata(GAM.FitToArcGISTable, u'cls', GAM.BayesPredictFromArcGISRasters, u'cls')
|
|---|
| 2811 |
|
|---|
| 2812 | AddArgumentMetadata(GAM.BayesPredictFromArcGISRasters, u'inputModelFile',
|
|---|
| 2813 | typeMetadata=FileTypeMetadata(mustExist=True),
|
|---|
| 2814 | description=_(
|
|---|
| 2815 | u"""File that contains the fitted model, generated by the Fit GAM
|
|---|
| 2816 | tool. The model must be of the binomial family, use a logit link
|
|---|
| 2817 | function, and have been fitted using R mgcv package."""),
|
|---|
| 2818 | arcGISDisplayName=_(u'Input model file'))
|
|---|
| 2819 |
|
|---|
| 2820 | CopyArgumentMetadata(GAM.PredictFromArcGISRasters, u'inputPredictorRasters', GAM.BayesPredictFromArcGISRasters, u'inputPredictorRasters')
|
|---|
| 2821 | CopyArgumentMetadata(GAM.PredictFromArcGISRasters, u'variableNames', GAM.BayesPredictFromArcGISRasters, u'variableNames')
|
|---|
| 2822 |
|
|---|
| 2823 | AddArgumentMetadata(GAM.BayesPredictFromArcGISRasters, u'thresholds',
|
|---|
| 2824 | typeMetadata=ListTypeMetadata(elementType=FloatTypeMetadata(), minLength=1),
|
|---|
| 2825 | description=_(
|
|---|
| 2826 | u"""List of thresholds for which probabilities should be estimated.
|
|---|
| 2827 | You must specify one output probability raster for each threshold. See
|
|---|
| 2828 | the documentation for that parameter for more information."""),
|
|---|
| 2829 | arcGISDisplayName=_(u'Thresholds'))
|
|---|
| 2830 |
|
|---|
| 2831 | AddArgumentMetadata(GAM.BayesPredictFromArcGISRasters, u'outputProbabilityRasters',
|
|---|
| 2832 | typeMetadata=ListTypeMetadata(elementType=ArcGISRasterTypeMetadata(mustBeDifferentThanArguments=[u'inputModelFile'], deleteIfParameterIsTrue=u'overwriteExisting', createParentDirectories=True), minLength=1, mustBeSameLengthAsArgument=u'thresholds'),
|
|---|
| 2833 | description=_(
|
|---|
| 2834 | u"""List of output rasters representing the estimated probabilities
|
|---|
| 2835 | that the response variable will meet or exceed the thresholds you
|
|---|
| 2836 | specified, given the predictor rasters you specified. The
|
|---|
| 2837 | probabilities will range from 0 to 1.
|
|---|
| 2838 |
|
|---|
| 2839 | For example, if you specify the threshold 0.2, the output probability
|
|---|
| 2840 | raster represents the estimated probability that the response variable
|
|---|
| 2841 | will be in the range 0.2 - 1.0. If a given pixel of the output raster
|
|---|
| 2842 | is close to 0, it indicates high likelihood that the response will be
|
|---|
| 2843 | less than 0.2 at that location, while a value close to 1.0 indicates
|
|---|
| 2844 | high likelihood that the response will be greater than 0.2.
|
|---|
| 2845 |
|
|---|
| 2846 | The probability is estimated by comparing the predictor raster values
|
|---|
| 2847 | to samples drawn from the posterior distribution of these predictors
|
|---|
| 2848 | in the model. Please contact the author of this tool for more
|
|---|
| 2849 | information. (I will provide a better explanation once I have obtained
|
|---|
| 2850 | it from the statistician who wrote the algorithm.)
|
|---|
| 2851 |
|
|---|
| 2852 | If any pixel is NoData in any predictor raster (after it has been
|
|---|
| 2853 | projected and clipped as needed), the pixel will be NoData in the
|
|---|
| 2854 | output rasters as well. This is because the response variable cannot
|
|---|
| 2855 | be predicted by the model if any of the predictor variables are
|
|---|
| 2856 | missing."""),
|
|---|
| 2857 | direction=u'Output',
|
|---|
| 2858 | arcGISDisplayName=_(u'Output probability rasters'))
|
|---|
| 2859 |
|
|---|
| 2860 | CopyArgumentMetadata(GAM.PredictFromArcGISRasters, u'templateRaster', GAM.BayesPredictFromArcGISRasters, u'templateRaster')
|
|---|
| 2861 | CopyArgumentMetadata(GAM.PredictFromArcGISRasters, u'resamplingTechniques', GAM.BayesPredictFromArcGISRasters, u'resamplingTechniques')
|
|---|
| 2862 | CopyArgumentMetadata(GAM.PredictFromArcGISRasters, u'ignoreOutOfRangeValues', GAM.BayesPredictFromArcGISRasters, u'ignoreOutOfRangeValues')
|
|---|
| 2863 |
|
|---|
| 2864 | AddArgumentMetadata(GAM.BayesPredictFromArcGISRasters, u'samples',
|
|---|
| 2865 | typeMetadata=IntegerTypeMetadata(minValue=1),
|
|---|
| 2866 | description=_(
|
|---|
| 2867 | u"""Number of samples to draw from the posterior distribution when
|
|---|
| 2868 | estimating the probability that a given prediction meets or exceeds
|
|---|
| 2869 | the thresholds you specified.
|
|---|
| 2870 |
|
|---|
| 2871 | Large numbers increase statistical accuracy but require more memoryand
|
|---|
| 2872 | processing time than small numbers. The memory increase is linear. I
|
|---|
| 2873 | am not sure about the processing time.
|
|---|
| 2874 |
|
|---|
| 2875 | For quick, inaccurate results, use 100 samples. For publishable
|
|---|
| 2876 | results, use 1000 or 10,000. If the tool fails due to insufficient
|
|---|
| 2877 | memory, increase the value of the Split Prediction Into Chunk
|
|---|
| 2878 | parameter. For example, if you increase the number of samples by 10x,
|
|---|
| 2879 | you should also increase the number of chunks by 10x."""),
|
|---|
| 2880 | arcGISDisplayName=_(u'Number of samples from the posterior distribution'),
|
|---|
| 2881 | arcGISCategory=_(u'Prediction options'))
|
|---|
| 2882 |
|
|---|
| 2883 | AddArgumentMetadata(GAM.BayesPredictFromArcGISRasters, u'chunks',
|
|---|
| 2884 | typeMetadata=IntegerTypeMetadata(canBeNone=True, minValue=1),
|
|---|
| 2885 | description=_(
|
|---|
| 2886 | u"""Number of chunks to split the prediction into.
|
|---|
| 2887 |
|
|---|
| 2888 | Increase this parameter if you find that this tool fails because there
|
|---|
| 2889 | is insufficient memory to perform your prediction. That failure occurs
|
|---|
| 2890 | because the underlying R function that performs the prediction assumes
|
|---|
| 2891 | that infinite memory is available and does not attempt to control its
|
|---|
| 2892 | utilization when performing large predictions. When a value is
|
|---|
| 2893 | provided for this parameter, this tool splits up the prediction job
|
|---|
| 2894 | into the specified number of same-size chunks and invokes the R
|
|---|
| 2895 | function on each chunk. This requires some additional processing but
|
|---|
| 2896 | cuts the memory required for the prediction to that needed for just
|
|---|
| 2897 | one of the chunks.
|
|---|
| 2898 |
|
|---|
| 2899 | If you need to use this parameter we recommend that you start with a
|
|---|
| 2900 | value of 100. If that does not work, try 1000, 10000, and so on."""),
|
|---|
| 2901 | arcGISDisplayName=_(u'Split prediction into chunks'),
|
|---|
| 2902 | arcGISCategory=_(u'Prediction options'))
|
|---|
| 2903 |
|
|---|
| 2904 | CopyArgumentMetadata(GAM.PredictFromArcGISRasters, u'buildPyramids', GAM.BayesPredictFromArcGISRasters, u'buildPyramids')
|
|---|
| 2905 | CopyArgumentMetadata(GAM.PredictFromArcGISRasters, u'overwriteExisting', GAM.BayesPredictFromArcGISRasters, u'overwriteExisting')
|
|---|
| 2906 |
|
|---|
| 2907 | ###############################################################################
|
|---|
| 2908 | # Metadata: TreeModel class
|
|---|
| 2909 | ###############################################################################
|
|---|
| 2910 |
|
|---|
| 2911 | AddClassMetadata(TreeModel,
|
|---|
| 2912 | shortDescription=_(u'Provides methods for modeling and prediction using tree models (a.k.a. CARTs).'),
|
|---|
| 2913 | isExposedAsCOMServer=True,
|
|---|
| 2914 | comIID=u'{737951E5-51F7-4D54-85E1-ED955485C9CD}',
|
|---|
| 2915 | comCLSID=u'{F6F9A24E-C466-4C15-BF74-D26547854EA0}')
|
|---|
| 2916 |
|
|---|
| 2917 | # Public method: TreeModel.FitToArcGISTable
|
|---|
| 2918 |
|
|---|
| 2919 | AddMethodMetadata(TreeModel.FitToArcGISTable,
|
|---|
| 2920 | shortDescription=_(u'Fits a tree model to data in an ArcGIS table.'),
|
|---|
| 2921 | longDescription=_(
|
|---|
| 2922 | u"""Tree models were first introduced by Breiman et al. (1984) in the
|
|---|
| 2923 | classic Classification and Regression Tree (CART) software and are
|
|---|
| 2924 | frequently referenced by that name. Since that time, the original
|
|---|
| 2925 | methods have been reimplemented in R and many other statistical
|
|---|
| 2926 | programs. This tool fits tree models using the R rpart package by
|
|---|
| 2927 | Terry M. Therneau and Elizabeth J. Atkinson, and plots them using the
|
|---|
| 2928 | R rpart.plot package by Stephen Milborrow.
|
|---|
| 2929 |
|
|---|
| 2930 | **References**
|
|---|
| 2931 |
|
|---|
| 2932 | Breiman, L., Friedman, J.H., Olshen, R.A., Stone, C.J. (1984). Classification and regression trees. Chapman & Hall/CRC.
|
|---|
| 2933 |
|
|---|
| 2934 | `An Introduction to Recursive Partitioning Using the RPART Routines <http://www.mayo.edu/hsr/techrpt/61.pdf>`_
|
|---|
| 2935 |
|
|---|
| 2936 | `R rpart package documentation <http://cran.r-project.org/web/packages/rpart/rpart.pdf>`_"""),
|
|---|
| 2937 | isExposedToPythonCallers=True,
|
|---|
| 2938 | isExposedByCOM=True,
|
|---|
| 2939 | isExposedAsArcGISTool=True,
|
|---|
| 2940 | arcGISDisplayName=_(u'Fit Tree Model'),
|
|---|
| 2941 | arcGISToolCategory=_(u'Statistics\\Model Data\\Tree Models'),
|
|---|
| 2942 | dependencies=[ArcGISDependency(9, 1), RDependency(2, 12, 0), RPackageDependency(u'rpart', u'3.1-48'), RPackageDependency(u'rpart.plot', u'1.0-0')])
|
|---|
| 2943 |
|
|---|
| 2944 | AddArgumentMetadata(TreeModel.FitToArcGISTable, u'cls',
|
|---|
| 2945 | typeMetadata=ClassOrClassInstanceTypeMetadata(cls=TreeModel),
|
|---|
| 2946 | description=_(u'%s class or an instance of it.') % TreeModel.__name__)
|
|---|
| 2947 |
|
|---|
| 2948 | CopyArgumentMetadata(GLM.FitToArcGISTable, u'inputTable', TreeModel.FitToArcGISTable, u'inputTable')
|
|---|
| 2949 | CopyArgumentMetadata(GLM.FitToArcGISTable, u'outputModelFile', TreeModel.FitToArcGISTable, u'outputModelFile')
|
|---|
| 2950 | CopyArgumentMetadata(GLM.FitToArcGISTable, u'formula', TreeModel.FitToArcGISTable, u'formula')
|
|---|
| 2951 |
|
|---|
| 2952 | AddArgumentMetadata(TreeModel.FitToArcGISTable, u'method',
|
|---|
| 2953 | typeMetadata=UnicodeStringTypeMetadata(allowedValues=[u'ANOVA', u'Class', u'Exp', u'Poisson'], makeLowercase=True),
|
|---|
| 2954 | description=_(
|
|---|
| 2955 | u"""Method to use for splitting the tree, one of:
|
|---|
| 2956 |
|
|---|
| 2957 | * ANOVA - Use this method to build a regression tree, i.e. when you
|
|---|
| 2958 | are modeling a continuous response variable, such as the abundance
|
|---|
| 2959 | of a species. With this method, the splits will be chosen to
|
|---|
| 2960 | maximize the between-groups sum-of-squares in a simple analysis of
|
|---|
| 2961 | variance.
|
|---|
| 2962 |
|
|---|
| 2963 | * Class - Use this method to build a classification tree, i.e. when
|
|---|
| 2964 | you are modeling a categorical response variable, such as the
|
|---|
| 2965 | presence or absence of a species. When this method is selected, the
|
|---|
| 2966 | response variable is assumed to be categorical and the R factor
|
|---|
| 2967 | function is automatically applied to it.
|
|---|
| 2968 |
|
|---|
| 2969 | * Exp - Use this method to build a regression tree using exponential
|
|---|
| 2970 | scaling. For more information about this method, please see the
|
|---|
| 2971 | references below.
|
|---|
| 2972 |
|
|---|
| 2973 | * Poisson - Use this method to build a regression tree using Poisson
|
|---|
| 2974 | regression, which is appropriate for event rate data. For more
|
|---|
| 2975 | information about this method, please see the references below.
|
|---|
| 2976 |
|
|---|
| 2977 | **References**
|
|---|
| 2978 |
|
|---|
| 2979 | `An Introduction to Recursive Partitioning Using the RPART Routines <http://www.mayo.edu/hsr/techrpt/61.pdf>`_
|
|---|
| 2980 |
|
|---|
| 2981 | `R rpart package documentation <http://cran.r-project.org/web/packages/rpart/rpart.pdf>`_"""),
|
|---|
| 2982 | arcGISDisplayName=_(u'Splitting method'))
|
|---|
| 2983 |
|
|---|
| 2984 | AddArgumentMetadata(TreeModel.FitToArcGISTable, u'where',
|
|---|
| 2985 | typeMetadata=SQLWhereClauseTypeMetadata(canBeNone=True),
|
|---|
| 2986 | description=ArcGIS91SelectCursor.__init__.__doc__.Obj.GetArgumentByName(u'where').Description,
|
|---|
| 2987 | arcGISParameterDependencies=[u'inputTable'],
|
|---|
| 2988 | arcGISDisplayName=_(u'Where clause'),
|
|---|
| 2989 | arcGISCategory=_(u'Tree growing options'))
|
|---|
| 2990 |
|
|---|
| 2991 | AddArgumentMetadata(TreeModel.FitToArcGISTable, u'allowMissingCovariates',
|
|---|
| 2992 | typeMetadata=BooleanTypeMetadata(),
|
|---|
| 2993 | description=_(
|
|---|
| 2994 | u"""If this option is enabled (the default), records will be included
|
|---|
| 2995 | in the model fitting process so long as they have a value for the
|
|---|
| 2996 | response variable and at least one predictor variable. If this option
|
|---|
| 2997 | is disabled, records must have values for the response variable and
|
|---|
| 2998 | all predictor variables in order to be included.
|
|---|
| 2999 |
|
|---|
| 3000 | The R rpart package that is used to fit the model has the novel
|
|---|
| 3001 | capability of allowing records that are missing some data to still
|
|---|
| 3002 | participate in the model fitting process. For more information about
|
|---|
| 3003 | how this works, please see
|
|---|
| 3004 | `An Introduction to Recursive Partitioning Using the RPART Routines <http://www.mayo.edu/hsr/techrpt/61.pdf>`_."""),
|
|---|
| 3005 | arcGISDisplayName=_(u'Include records that are missing covariates'),
|
|---|
| 3006 | arcGISCategory=_(u'Tree growing options'))
|
|---|
| 3007 |
|
|---|
| 3008 | AddArgumentMetadata(TreeModel.FitToArcGISTable, u'minSplit',
|
|---|
| 3009 | typeMetadata=IntegerTypeMetadata(minValue=2),
|
|---|
| 3010 | description=_(
|
|---|
| 3011 | u"""The minimum number of observations that must exist in a node of
|
|---|
| 3012 | the tree in order for a split of that node to be attempted. The
|
|---|
| 3013 | default value, 20, was taken from the R rpart package that is used to
|
|---|
| 3014 | fit the model."""),
|
|---|
| 3015 | arcGISDisplayName=_(u'Minimum number of observations to attempt a split'),
|
|---|
| 3016 | arcGISCategory=_(u'Tree growing options'))
|
|---|
| 3017 |
|
|---|
| 3018 | AddArgumentMetadata(TreeModel.FitToArcGISTable, u'minBucket',
|
|---|
| 3019 | typeMetadata=IntegerTypeMetadata(minValue=1),
|
|---|
| 3020 | description=_(
|
|---|
| 3021 | u"""The minimum number of observations that may be in any leaf node of
|
|---|
| 3022 | the tree. The default value, 7, was taken from the R rpart package
|
|---|
| 3023 | that is used to fit the model. By default, rpart recommends that this
|
|---|
| 3024 | parameter be set to one third of the previous parameter."""),
|
|---|
| 3025 | arcGISDisplayName=_(u'Minimum number of observations in a leaf node'),
|
|---|
| 3026 | arcGISCategory=_(u'Tree growing options'))
|
|---|
| 3027 |
|
|---|
| 3028 | AddArgumentMetadata(TreeModel.FitToArcGISTable, u'cp',
|
|---|
| 3029 | typeMetadata=FloatTypeMetadata(minValue=0.),
|
|---|
| 3030 | description=_(
|
|---|
| 3031 | u"""Any split that does not decrease the overall lack of fit by a
|
|---|
| 3032 | factor of this parameter will not be attempted. For instance, with
|
|---|
| 3033 | ANOVA splitting, this means that the overall Rsquare must increase by
|
|---|
| 3034 | this parameter at each step. The main role of this parameter is to
|
|---|
| 3035 | save computing time by pruning off splits that are obviously not
|
|---|
| 3036 | worthwhile. Essentially, you inform the tool that any split which does
|
|---|
| 3037 | not improve the fit by this parameter will likely be pruned off by
|
|---|
| 3038 | cross-validation, and that hence the tool need not pursue it.
|
|---|
| 3039 |
|
|---|
| 3040 | The default value, 0.01, was taken from the R rpart package that is
|
|---|
| 3041 | used to fit the model."""),
|
|---|
| 3042 | arcGISDisplayName=_(u'Complexity parameter'),
|
|---|
| 3043 | arcGISCategory=_(u'Tree growing options'))
|
|---|
| 3044 |
|
|---|
| 3045 | AddArgumentMetadata(TreeModel.FitToArcGISTable, u'maxCompete',
|
|---|
| 3046 | typeMetadata=IntegerTypeMetadata(minValue=0),
|
|---|
| 3047 | description=_(
|
|---|
| 3048 | u"""The number of competitor splits to retain in the output. It is
|
|---|
| 3049 | useful to know not just which split was chosen, but which variable
|
|---|
| 3050 | came in second, third, etc. The default value, 4, was taken from the R
|
|---|
| 3051 | rpart package that is used to fit the model."""),
|
|---|
| 3052 | arcGISDisplayName=_(u'Number of competitor splits to retain'),
|
|---|
| 3053 | arcGISCategory=_(u'Tree growing options'))
|
|---|
| 3054 |
|
|---|
| 3055 | AddArgumentMetadata(TreeModel.FitToArcGISTable, u'maxSurrogate',
|
|---|
| 3056 | typeMetadata=IntegerTypeMetadata(minValue=0),
|
|---|
| 3057 | description=_(
|
|---|
| 3058 | u"""The number of surrogate splits to retain in the output. If this is
|
|---|
| 3059 | set to zero the compute time will be shortened, since approximately
|
|---|
| 3060 | half of the computational time (other than setup) is used in the
|
|---|
| 3061 | search for surrogate splits. The default value, 5, was taken from the
|
|---|
| 3062 | R rpart package that is used to fit the model."""),
|
|---|
| 3063 | arcGISDisplayName=_(u'Number of surrogate splits to retain'),
|
|---|
| 3064 | arcGISCategory=_(u'Tree growing options'))
|
|---|
| 3065 |
|
|---|
| 3066 | AddArgumentMetadata(TreeModel.FitToArcGISTable, u'useSurrogate',
|
|---|
| 3067 | typeMetadata=IntegerTypeMetadata(allowedValues=[0, 1, 2]),
|
|---|
| 3068 | description=_(
|
|---|
| 3069 | u"""The method for using surrogates in the splitting process, one of:
|
|---|
| 3070 |
|
|---|
| 3071 | * 0 - display only; an observation with a missing value for the
|
|---|
| 3072 | primary split rule is not sent further down the tree.
|
|---|
| 3073 |
|
|---|
| 3074 | * 1 - use surrogates, in order, to split subjects missing the primary
|
|---|
| 3075 | variable; if all surrogates are missing the observation is not
|
|---|
| 3076 | split.
|
|---|
| 3077 |
|
|---|
| 3078 | * 2 - if all surrogates are missing, then send the observation in the
|
|---|
| 3079 | majority direction. This is the recommendations of Breiman, et al.
|
|---|
| 3080 |
|
|---|
| 3081 | The default value, 2, was taken from the R rpart package that is used
|
|---|
| 3082 | to fit the model."""),
|
|---|
| 3083 | arcGISDisplayName=_(u'Surrogate usage method'),
|
|---|
| 3084 | arcGISCategory=_(u'Tree growing options'))
|
|---|
| 3085 |
|
|---|
| 3086 | AddArgumentMetadata(TreeModel.FitToArcGISTable, u'surrogateStyle',
|
|---|
| 3087 | typeMetadata=IntegerTypeMetadata(allowedValues=[0, 1]),
|
|---|
| 3088 | description=_(
|
|---|
| 3089 | u"""The method used to select the best surrogate, one of:
|
|---|
| 3090 |
|
|---|
| 3091 | * 0 - the tool uses the total number of correct classifications for a
|
|---|
| 3092 | potential surrogate variable.
|
|---|
| 3093 |
|
|---|
| 3094 | * 1 - the tool uses the percent correct, calculated over the
|
|---|
| 3095 | non-missing values of the surrogate.
|
|---|
| 3096 |
|
|---|
| 3097 | The default value, 0, was taken from the R rpart package that is used
|
|---|
| 3098 | to fit the model. This value more severely penalizes covariates with a
|
|---|
| 3099 | large number of missing values."""),
|
|---|
| 3100 | arcGISDisplayName=_(u'Surrogate selection method'),
|
|---|
| 3101 | arcGISCategory=_(u'Tree growing options'))
|
|---|
| 3102 |
|
|---|
| 3103 | AddArgumentMetadata(TreeModel.FitToArcGISTable, u'xval',
|
|---|
| 3104 | typeMetadata=IntegerTypeMetadata(minValue=1),
|
|---|
| 3105 | description=_(
|
|---|
| 3106 | u"""The number of cross-validations to perform.
|
|---|
| 3107 |
|
|---|
| 3108 | The R rpart package that is used to fit the model uses a default of
|
|---|
| 3109 | 10, but we have found that so few iterations can cause the calculated
|
|---|
| 3110 | cross-validation error to differ substantially over several runs of
|
|---|
| 3111 | the tool using identical input data and parameter values. In one case,
|
|---|
| 3112 | we observed the cross-validation errors to vary by over 10%. Because
|
|---|
| 3113 | the cross-validation errors are often used to prune the tree, we
|
|---|
| 3114 | believe it is important to have accurate estimates of them, so we
|
|---|
| 3115 | increased the default to 1000. This will cause complicated models to
|
|---|
| 3116 | run substantially slower. If you find your model is too slow, decrease
|
|---|
| 3117 | the value."""),
|
|---|
| 3118 | arcGISDisplayName=_(u'Number of cross-validations'),
|
|---|
| 3119 | arcGISCategory=_(u'Tree growing options'))
|
|---|
| 3120 |
|
|---|
| 3121 | AddArgumentMetadata(TreeModel.FitToArcGISTable, u'maxDepth',
|
|---|
| 3122 | typeMetadata=IntegerTypeMetadata(minValue=0, maxValue=30),
|
|---|
| 3123 | description=_(
|
|---|
| 3124 | u"""The maximum depth of any node of the final tree, with the root
|
|---|
| 3125 | node counted as depth 0. The default value, 30, was taken from the R
|
|---|
| 3126 | rpart package that is used to fit the model."""),
|
|---|
| 3127 | arcGISDisplayName=_(u'Maximum tree depth'),
|
|---|
| 3128 | arcGISCategory=_(u'Tree growing options'))
|
|---|
| 3129 |
|
|---|
| 3130 | AddArgumentMetadata(TreeModel.FitToArcGISTable, u'pruningMethod',
|
|---|
| 3131 | typeMetadata=UnicodeStringTypeMetadata(canBeNone=True, allowedValues=[u'Minimum error', u'1-SE rule', u'Interactive', u'User specified CP'], makeLowercase=True),
|
|---|
| 3132 | description=_(
|
|---|
| 3133 | u"""Method to use for pruning the tree, one of:
|
|---|
| 3134 |
|
|---|
| 3135 | * Minimum error - The tool will prune the tree using the complexity
|
|---|
| 3136 | parameter associated with the minimum cross-validation error.
|
|---|
| 3137 |
|
|---|
| 3138 | * 1-SE rule - The tool will prune the tree using the complexity
|
|---|
| 3139 | parameter obtained using the 1-SE rule described by Therneau and
|
|---|
| 3140 | Atkinson in chapter 4 of
|
|---|
| 3141 | `An Introduction to Recursive Partitioning Using the RPART Routines <http://www.mayo.edu/hsr/techrpt/61.pdf>`_.
|
|---|
| 3142 | A plot of the mean cross-validation errors versus candidate values
|
|---|
| 3143 | of the complexity parameter often has an initial sharp drop followed
|
|---|
| 3144 | by a relatively flat plateau and then a slow rise. (This tool
|
|---|
| 3145 | produces that plot as an optional diagnostic output.) According to
|
|---|
| 3146 | the 1-SE rule, any cross-validation error within one standard error
|
|---|
| 3147 | of the minimum cross-validation error is considered equivalent to
|
|---|
| 3148 | the minimum (i.e. considered to be part of the flat plateau). The
|
|---|
| 3149 | 1-SE rule chooses the largest complexity parameter that yields a
|
|---|
| 3150 | cross-validation error equivalent ot the minimum. This results in an
|
|---|
| 3151 | optimal tree, i.e. the tree with the fewest number of splits that
|
|---|
| 3152 | yields a cross-validation error equivalent to the minimum.
|
|---|
| 3153 |
|
|---|
| 3154 | * Interactive - The tool will display the unpruned tree in a window,
|
|---|
| 3155 | allowing you to prune it interactively with the mouse. If you click
|
|---|
| 3156 | on a split it will be marked as deleted. If you click on an
|
|---|
| 3157 | already-deleted split it will be undeleted (if its parent is not
|
|---|
| 3158 | deleted). Information about the node is printed as you click. When
|
|---|
| 3159 | you have finished pruning, click on the QUIT button.
|
|---|
| 3160 |
|
|---|
| 3161 | * User specified CP - The tool will prune the tree using the
|
|---|
| 3162 | complexity parameter you specify below.
|
|---|
| 3163 |
|
|---|
| 3164 | If this parameter is omitted, the tree will not be pruned."""),
|
|---|
| 3165 | arcGISDisplayName=_(u'Pruning method'),
|
|---|
| 3166 | arcGISCategory=_(u'Tree pruning options'))
|
|---|
| 3167 |
|
|---|
| 3168 | AddArgumentMetadata(TreeModel.FitToArcGISTable, u'pruningCP',
|
|---|
| 3169 | typeMetadata=FloatTypeMetadata(canBeNone=True, mustBeGreaterThan=0.),
|
|---|
| 3170 | description=_(
|
|---|
| 3171 | u"""Complexity parameter for pruning the tree. This parameter is only
|
|---|
| 3172 | used when the Pruning Method is set to 'User specified'."""),
|
|---|
| 3173 | arcGISDisplayName=_(u'Complexity parameter for pruning'),
|
|---|
| 3174 | arcGISCategory=_(u'Tree pruning options'))
|
|---|
| 3175 |
|
|---|
| 3176 | CopyArgumentMetadata(GLM.FitToArcGISTable, u'xColumnName', TreeModel.FitToArcGISTable, u'xColumnName')
|
|---|
| 3177 | CopyArgumentMetadata(GLM.FitToArcGISTable, u'yColumnName', TreeModel.FitToArcGISTable, u'yColumnName')
|
|---|
| 3178 | CopyArgumentMetadata(GLM.FitToArcGISTable, u'zColumnName', TreeModel.FitToArcGISTable, u'zColumnName')
|
|---|
| 3179 | CopyArgumentMetadata(GLM.FitToArcGISTable, u'mColumnName', TreeModel.FitToArcGISTable, u'mColumnName')
|
|---|
| 3180 |
|
|---|
| 3181 | AddArgumentMetadata(TreeModel.FitToArcGISTable, u'writeSummaryFile',
|
|---|
| 3182 | typeMetadata=BooleanTypeMetadata(),
|
|---|
| 3183 | description=_(
|
|---|
| 3184 | u"""If True, this tool will write summary information about the fitted
|
|---|
| 3185 | model to a text file. (This is the same information that the tool
|
|---|
| 3186 | outputs as log messages.) The file will have the name X_summary.txt,
|
|---|
| 3187 | where X is the name of the output model file, minus any
|
|---|
| 3188 | extension."""),
|
|---|
| 3189 | arcGISDisplayName=_(u'Write model summary file'),
|
|---|
| 3190 | arcGISCategory=_(u'Additional output options'))
|
|---|
| 3191 |
|
|---|
| 3192 | AddArgumentMetadata(TreeModel.FitToArcGISTable, u'writeDiagnosticPlots',
|
|---|
| 3193 | typeMetadata=BooleanTypeMetadata(),
|
|---|
| 3194 | description=_(
|
|---|
| 3195 | u"""If True, this tool will write diagnostic plots:
|
|---|
| 3196 |
|
|---|
| 3197 | * X_cp.Y - visual representation of the cross-validation results for
|
|---|
| 3198 | the unpruned tree, to assist you with choosing a Complexity
|
|---|
| 3199 | Parameter for pruning the tree. The x-axis represents possible
|
|---|
| 3200 | choices for the Complexity Parameter and the y-axis represents the
|
|---|
| 3201 | means and standard deviations of the errors in the cross-validated
|
|---|
| 3202 | prediction that would result. The dashed horizontal line is drawn 1
|
|---|
| 3203 | standard error above the minimum of the curve. A good choice of the
|
|---|
| 3204 | Complexity Parameter for pruning is the leftmost value for which the
|
|---|
| 3205 | mean error lies below the line. This value will be chosen
|
|---|
| 3206 | automatically if the Pruning Method parameter is set to '1-SE rule'.
|
|---|
| 3207 |
|
|---|
| 3208 | * X_rsquare.Y - two-panel plot only produced for the ANOVA splitting
|
|---|
| 3209 | method. The first panel shows the r-square (both apparent and
|
|---|
| 3210 | apparent from cross-validation) versus the number of splits. The
|
|---|
| 3211 | second panel shows the mean error in the cross-validated prediction
|
|---|
| 3212 | versus the number of splits (this is essentially the same plot as
|
|---|
| 3213 | the X_cp.Y plot described above). Both panels are produced for the
|
|---|
| 3214 | unpruned tree.
|
|---|
| 3215 |
|
|---|
| 3216 | * X_residuals.Y - plot of the residuals vs. the fitted values for the
|
|---|
| 3217 | unpruned tree.
|
|---|
| 3218 |
|
|---|
| 3219 | * X_pruned_residuals.Y - plot of the residuals vs. the fitted values
|
|---|
| 3220 | for the pruned tree. This plot will only be produced if the tree is
|
|---|
| 3221 | pruned.
|
|---|
| 3222 |
|
|---|
| 3223 | In the file names above, X is the name of the output model file, minus
|
|---|
| 3224 | any extension, and Y is the extension of the selected output plot
|
|---|
| 3225 | format."""),
|
|---|
| 3226 | arcGISDisplayName=_(u'Write diagnostic plots'),
|
|---|
| 3227 | arcGISCategory=_(u'Additional output options'))
|
|---|
| 3228 |
|
|---|
| 3229 | AddArgumentMetadata(TreeModel.FitToArcGISTable, u'writeTreePlot',
|
|---|
| 3230 | typeMetadata=BooleanTypeMetadata(),
|
|---|
| 3231 | description=_(
|
|---|
| 3232 | u"""If True, this tool will write a plot of the unpruned tree to a
|
|---|
| 3233 | file having the name X_unpruned_tree.Y, where X is the name of the
|
|---|
| 3234 | output model file minus the extension and Y is the extension of the
|
|---|
| 3235 | selected output plot format."""),
|
|---|
| 3236 | arcGISDisplayName=_(u'Write tree plot'),
|
|---|
| 3237 | arcGISCategory=_(u'Additional output options'))
|
|---|
| 3238 |
|
|---|
| 3239 | AddArgumentMetadata(TreeModel.FitToArcGISTable, u'writePrunedTreePlot',
|
|---|
| 3240 | typeMetadata=BooleanTypeMetadata(),
|
|---|
| 3241 | description=_(
|
|---|
| 3242 | u"""If True, this tool will write a plot of the pruned tree to a file
|
|---|
| 3243 | having the name X_pruned_tree.Y, where X is the name of the output
|
|---|
| 3244 | model file minus the extension and Y is the extension of the selected
|
|---|
| 3245 | output plot format. This plot will only be produced if the tree is
|
|---|
| 3246 | pruned."""),
|
|---|
| 3247 | arcGISDisplayName=_(u'Write pruned tree plot'),
|
|---|
| 3248 | arcGISCategory=_(u'Additional output options'))
|
|---|
| 3249 |
|
|---|
| 3250 | CopyArgumentMetadata(GLM.FitToArcGISTable, u'plotFileFormat', TreeModel.FitToArcGISTable, u'plotFileFormat')
|
|---|
| 3251 | CopyArgumentMetadata(GLM.FitToArcGISTable, u'res', TreeModel.FitToArcGISTable, u'res')
|
|---|
| 3252 | CopyArgumentMetadata(GLM.FitToArcGISTable, u'width', TreeModel.FitToArcGISTable, u'width')
|
|---|
| 3253 | CopyArgumentMetadata(GLM.FitToArcGISTable, u'height', TreeModel.FitToArcGISTable, u'height')
|
|---|
| 3254 |
|
|---|
| 3255 | AddArgumentMetadata(TreeModel.FitToArcGISTable, u'pointSize',
|
|---|
| 3256 | typeMetadata=FloatTypeMetadata(minValue=1.0),
|
|---|
| 3257 | description=_(
|
|---|
| 3258 | u"""The default pointsize of text in diagnostic plots (the size of the
|
|---|
| 3259 | text in tree plots is controlled by a different parameter)."""),
|
|---|
| 3260 | arcGISDisplayName=_(u'Default pointsize of text in diagnostic plots'),
|
|---|
| 3261 | arcGISCategory=_(u'Additional output options'))
|
|---|
| 3262 |
|
|---|
| 3263 | CopyArgumentMetadata(GLM.FitToArcGISTable, u'bg', TreeModel.FitToArcGISTable, u'bg')
|
|---|
| 3264 |
|
|---|
| 3265 | AddArgumentMetadata(TreeModel.FitToArcGISTable, u'treePlotType',
|
|---|
| 3266 | typeMetadata=IntegerTypeMetadata(allowedValues=[0, 1, 2, 3, 4]),
|
|---|
| 3267 | description=_(
|
|---|
| 3268 | u"""Type of tree plots to create, one of:
|
|---|
| 3269 |
|
|---|
| 3270 | * 0 - The default. Draw a split label at each split and a node label
|
|---|
| 3271 | at each leaf.
|
|---|
| 3272 |
|
|---|
| 3273 | * 1 - Label all nodes, not just leaves.
|
|---|
| 3274 |
|
|---|
| 3275 | * 2 - Like 1 but draw the split labels below the node labels. Similar
|
|---|
| 3276 | to the plots in the CART book.
|
|---|
| 3277 |
|
|---|
| 3278 | * 3 - Draw separate split labels for the left and right directions.
|
|---|
| 3279 |
|
|---|
| 3280 | * 4 - Like 3 but label all nodes, not just leaves.
|
|---|
| 3281 | """),
|
|---|
| 3282 | arcGISDisplayName=_(u'Plot type'),
|
|---|
| 3283 | arcGISCategory=_(u'Tree plot options'))
|
|---|
| 3284 |
|
|---|
| 3285 | AddArgumentMetadata(TreeModel.FitToArcGISTable, u'extra',
|
|---|
| 3286 | typeMetadata=IntegerTypeMetadata(allowedValues=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
|
|---|
| 3287 | description=_(
|
|---|
| 3288 | u"""Extra information to display at the nodes, one of:
|
|---|
| 3289 |
|
|---|
| 3290 | * 0 - No extra information.
|
|---|
| 3291 |
|
|---|
| 3292 | * 1 - The default. Display the number of observations that fall in the
|
|---|
| 3293 | node (per class for Class models; prefixed by the number of events
|
|---|
| 3294 | for Poisson and Exp models).
|
|---|
| 3295 |
|
|---|
| 3296 | * 2 - Class models: display the classification rate at the node, expressed as the
|
|---|
| 3297 | number of correct classifications and the number of observations in the node.
|
|---|
| 3298 | Poisson and Exp models: display the number of events.
|
|---|
| 3299 |
|
|---|
| 3300 | * 3 - Class models only: misclassification rate at the node, expressed
|
|---|
| 3301 | as the number of incorrect classifications and the number of
|
|---|
| 3302 | observations in the node.
|
|---|
| 3303 |
|
|---|
| 3304 | * 4 - Class models only: probability per class of observations in the
|
|---|
| 3305 | node (conditioned on the node, sum across a node is 1).
|
|---|
| 3306 |
|
|---|
| 3307 | * 5 - Class models only: like 4 but do not display the fitted class.
|
|---|
| 3308 |
|
|---|
| 3309 | * 6 - Class models only: the probability of the second class only.
|
|---|
| 3310 | Useful for binary responses.
|
|---|
| 3311 |
|
|---|
| 3312 | * 7 - Class models only: like 6 but do not display the fitted class.
|
|---|
| 3313 |
|
|---|
| 3314 | * 8 - Class models only: the probability of the fitted class.
|
|---|
| 3315 |
|
|---|
| 3316 | * 9 - Class models only: the probabilities times the fraction of
|
|---|
| 3317 | observations in the node (the probability relative to all
|
|---|
| 3318 | observations, sum across all leaves is 1).
|
|---|
| 3319 | """),
|
|---|
| 3320 | arcGISDisplayName=_(u'Extra information'),
|
|---|
| 3321 | arcGISCategory=_(u'Tree plot options'))
|
|---|
| 3322 |
|
|---|
| 3323 | AddArgumentMetadata(TreeModel.FitToArcGISTable, u'percentage',
|
|---|
| 3324 | typeMetadata=BooleanTypeMetadata(),
|
|---|
| 3325 | description=_(
|
|---|
| 3326 | u"""If True, the default, nodes will be labeled with the percentage of
|
|---|
| 3327 | observations in the node. The percentage will be displayed below the
|
|---|
| 3328 | "extra information" (if any is requested)."""),
|
|---|
| 3329 | arcGISDisplayName=_(u'Display percentage of observations'),
|
|---|
| 3330 | arcGISCategory=_(u'Tree plot options'))
|
|---|
| 3331 |
|
|---|
| 3332 | AddArgumentMetadata(TreeModel.FitToArcGISTable, u'under',
|
|---|
| 3333 | typeMetadata=BooleanTypeMetadata(),
|
|---|
| 3334 | description=_(
|
|---|
| 3335 | u"""If True, the default, extra information and percentage of
|
|---|
| 3336 | observations will be displayed below the nodes. If False, they will be
|
|---|
| 3337 | displayed within the nodes' boxes.
|
|---|
| 3338 |
|
|---|
| 3339 | This parameter is ignored if neither extra information nor percentage
|
|---|
| 3340 | of observations are requested."""),
|
|---|
| 3341 | arcGISDisplayName=_(u'Display extra text under node boxes'),
|
|---|
| 3342 | arcGISCategory=_(u'Tree plot options'))
|
|---|
| 3343 |
|
|---|
| 3344 | AddArgumentMetadata(TreeModel.FitToArcGISTable, u'clipRightLabels',
|
|---|
| 3345 | typeMetadata=BooleanTypeMetadata(),
|
|---|
| 3346 | description=_(
|
|---|
| 3347 | u"""If True, the default, the right-hand split labels on plots of type
|
|---|
| 3348 | 3 or 4 will not include "variable=". If False, the right-hand labels
|
|---|
| 3349 | will include "variable=", just like the left-hand labels.
|
|---|
| 3350 |
|
|---|
| 3351 | This parameter is ignored the plot type is not 3 or 4."""),
|
|---|
| 3352 | arcGISDisplayName=_(u'Clip right-hand split labels'),
|
|---|
| 3353 | arcGISCategory=_(u'Tree plot options'))
|
|---|
| 3354 |
|
|---|
| 3355 | AddArgumentMetadata(TreeModel.FitToArcGISTable, u'fallenLeaves',
|
|---|
| 3356 | typeMetadata=BooleanTypeMetadata(),
|
|---|
| 3357 | description=_(
|
|---|
| 3358 | u"""If True, all leaf nodes will be displayed at the bottom. If False,
|
|---|
| 3359 | the default, leaf nodes will be displayed where they would normally
|
|---|
| 3360 | appear."""),
|
|---|
| 3361 | arcGISDisplayName=_(u'Display leaves at bottom'),
|
|---|
| 3362 | arcGISCategory=_(u'Tree plot options'))
|
|---|
| 3363 |
|
|---|
| 3364 | AddArgumentMetadata(TreeModel.FitToArcGISTable, u'branchType',
|
|---|
| 3365 | typeMetadata=IntegerTypeMetadata(allowedValues=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
|
|---|
| 3366 | description=_(
|
|---|
| 3367 | u"""Type of branches to draw. If zero, the default, the tool will draw
|
|---|
| 3368 | conventional branches having a constant narrow width. If nonzero, the tool
|
|---|
| 3369 | will draw "wide branches", with branch widths proportional to the
|
|---|
| 3370 | specified parameter, one of:
|
|---|
| 3371 |
|
|---|
| 3372 | * 1 - Deviance
|
|---|
| 3373 |
|
|---|
| 3374 | * 2 - Square root of deviance
|
|---|
| 3375 |
|
|---|
| 3376 | * 3 - Deviance / number of observations
|
|---|
| 3377 |
|
|---|
| 3378 | * 4 - Square root of (deviance / number of observations)
|
|---|
| 3379 |
|
|---|
| 3380 | * 5 - Number of observations
|
|---|
| 3381 |
|
|---|
| 3382 | * 6 - Complexity parameter
|
|---|
| 3383 |
|
|---|
| 3384 | * 7 - Absolute value of the predicted value
|
|---|
| 3385 |
|
|---|
| 3386 | * 8 - Predicted value minus the minimum predicted value
|
|---|
| 3387 |
|
|---|
| 3388 | * 9 - Constant wide width, for checking the visual distortion that
|
|---|
| 3389 | results when wide branches are drawn at different angles
|
|---|
| 3390 | """),
|
|---|
| 3391 | arcGISDisplayName=_(u'Branch type'),
|
|---|
| 3392 | arcGISCategory=_(u'Tree plot options'))
|
|---|
| 3393 |
|
|---|
| 3394 | AddArgumentMetadata(TreeModel.FitToArcGISTable, u'branch',
|
|---|
| 3395 | typeMetadata=FloatTypeMetadata(minValue=0., maxValue=1.),
|
|---|
| 3396 | description=_(
|
|---|
| 3397 | u"""Controls the shape of the branches from parent to child nodes. Any
|
|---|
| 3398 | number from 0 to 1 is allowed. A value of 1 gives square shouldered
|
|---|
| 3399 | branches, a value of 0 give V shaped branches, with other values being
|
|---|
| 3400 | intermediate.
|
|---|
| 3401 |
|
|---|
| 3402 | Note that if the Branch Type parameter is nonzero, the Branch Shape
|
|---|
| 3403 | parameter will be rounded to 1 or 0 (e.g. a Branch Shape of 0.75 will
|
|---|
| 3404 | be rounded to 1)."""),
|
|---|
| 3405 | arcGISDisplayName=_(u'Branch shape'),
|
|---|
| 3406 | arcGISCategory=_(u'Tree plot options'))
|
|---|
| 3407 |
|
|---|
| 3408 | AddArgumentMetadata(TreeModel.FitToArcGISTable, u'uniform',
|
|---|
| 3409 | typeMetadata=BooleanTypeMetadata(),
|
|---|
| 3410 | description=_(
|
|---|
| 3411 | u"""If True, the default, the vertical spacing of the nodes will be
|
|---|
| 3412 | uniform. If False, the nodes will be spaced proportionally to the fit
|
|---|
| 3413 | (more precisely, to the difference between a node's deviance and the
|
|---|
| 3414 | sum of its children's deviances). Small spaces must be expanded to
|
|---|
| 3415 | leave room for the labels.
|
|---|
| 3416 |
|
|---|
| 3417 | Note: if this parameter is False and the Text Magnification Factor is
|
|---|
| 3418 | omitted (the default), very small text can sometimes result."""),
|
|---|
| 3419 | arcGISDisplayName=_(u'Use uniform vertical spacing'),
|
|---|
| 3420 | arcGISCategory=_(u'Tree plot options'))
|
|---|
| 3421 |
|
|---|
| 3422 | AddArgumentMetadata(TreeModel.FitToArcGISTable, u'digits',
|
|---|
| 3423 | typeMetadata=IntegerTypeMetadata(minValue=0),
|
|---|
| 3424 | description=_(
|
|---|
| 3425 | u"""Number of significant digits to display in floating-point numbers.
|
|---|
| 3426 |
|
|---|
| 3427 | Probabilities and percentages are treated specially. Probabilities are
|
|---|
| 3428 | displayed with the specified number of digits after the decimal point
|
|---|
| 3429 | (by default 2 digits). Percentages are displayed with the specified
|
|---|
| 3430 | number of digits minus 2 after the decimal point (by default no
|
|---|
| 3431 | digits)."""),
|
|---|
| 3432 | arcGISDisplayName=_(u'Significant digits for labels'),
|
|---|
| 3433 | arcGISCategory=_(u'Tree plot options'))
|
|---|
| 3434 |
|
|---|
| 3435 | AddArgumentMetadata(TreeModel.FitToArcGISTable, u'varlen',
|
|---|
| 3436 | typeMetadata=IntegerTypeMetadata(),
|
|---|
| 3437 | description=_(
|
|---|
| 3438 | u"""Length of variable names in text at the splits (and, for class
|
|---|
| 3439 | responses, the class displayed at the node). There are three
|
|---|
| 3440 | possibilities:
|
|---|
| 3441 |
|
|---|
| 3442 | * 0 - The default. Use full names.
|
|---|
| 3443 |
|
|---|
| 3444 | * >0 - Use an abbreviation algorithm to shorten the names to at
|
|---|
| 3445 | least the specified number, such that they remain unique.
|
|---|
| 3446 |
|
|---|
| 3447 | * <0 - Truncate names to the shortest length where they are still
|
|---|
| 3448 | unique, but never truncate to shorter than the specified number
|
|---|
| 3449 | (e.g. the value -5 means never truncate to shorter than 5
|
|---|
| 3450 | characters).
|
|---|
| 3451 | """),
|
|---|
| 3452 | arcGISDisplayName=_(u'Length of variable names at splits'),
|
|---|
| 3453 | arcGISCategory=_(u'Tree plot options'))
|
|---|
| 3454 |
|
|---|
| 3455 | AddArgumentMetadata(TreeModel.FitToArcGISTable, u'faclen',
|
|---|
| 3456 | typeMetadata=IntegerTypeMetadata(),
|
|---|
| 3457 | description=_(
|
|---|
| 3458 | u"""Length of factor level names (i.e. categorical variable values) in
|
|---|
| 3459 | splits. There are four possibilities:
|
|---|
| 3460 |
|
|---|
| 3461 | * 0 - The default. Use full names.
|
|---|
| 3462 |
|
|---|
| 3463 | * 1 - Represent factor levels with alphabetic characters (a for the
|
|---|
| 3464 | first level, b for the second, and so on).
|
|---|
| 3465 |
|
|---|
| 3466 | * >1 - Use an abbreviation algorithm to shorten the names to at
|
|---|
| 3467 | least the specified number, such that they remain unique.
|
|---|
| 3468 |
|
|---|
| 3469 | * <0 - Truncate names to the shortest length where they are still
|
|---|
| 3470 | unique, but never truncate to shorter than the specified number
|
|---|
| 3471 | (e.g. the value -5 means never truncate to shorter than 5
|
|---|
| 3472 | characters).
|
|---|
| 3473 | """),
|
|---|
| 3474 | arcGISDisplayName=_(u'Length of factor level names in splits'),
|
|---|
| 3475 | arcGISCategory=_(u'Tree plot options'))
|
|---|
| 3476 |
|
|---|
| 3477 | AddArgumentMetadata(TreeModel.FitToArcGISTable, u'cex',
|
|---|
| 3478 | typeMetadata=FloatTypeMetadata(canBeNone=True, mustBeGreaterThan=0.),
|
|---|
| 3479 | description=_(
|
|---|
| 3480 | u"""A numerical value giving the amount by which text should be
|
|---|
| 3481 | magnified relative to the default. If omitted, the default, the text
|
|---|
| 3482 | size will be calculated automatically.
|
|---|
| 3483 |
|
|---|
| 3484 | The default automatic calculation means that this seemingly innocuous
|
|---|
| 3485 | argument has a far reaching effect. If necessary it will trigger the
|
|---|
| 3486 | node shifting engine to get a decent type size (see the Compress Tree
|
|---|
| 3487 | Vertically parameter)."""),
|
|---|
| 3488 | arcGISDisplayName=_(u'Text magnification factor'),
|
|---|
| 3489 | arcGISCategory=_(u'Tree plot options'))
|
|---|
| 3490 |
|
|---|
| 3491 | AddArgumentMetadata(TreeModel.FitToArcGISTable, u'tweak',
|
|---|
| 3492 | typeMetadata=FloatTypeMetadata(mustBeGreaterThan=0.),
|
|---|
| 3493 | description=_(
|
|---|
| 3494 | u"""Adjust the (possibly automatically calculated) Text Magnification
|
|---|
| 3495 | Factor. For example, use 1.1 to make the text 10% larger. The default
|
|---|
| 3496 | is 1, meaning no adjustment.
|
|---|
| 3497 |
|
|---|
| 3498 | Note that font sizes are discrete, so the Text Magnification Factor
|
|---|
| 3499 | you ask for may not be the one you get. And a small tweak may not
|
|---|
| 3500 | actually change the type size or change it more than you want."""),
|
|---|
| 3501 | arcGISDisplayName=_(u'Tweak text magnification'),
|
|---|
| 3502 | arcGISCategory=_(u'Tree plot options'))
|
|---|
| 3503 |
|
|---|
| 3504 | AddArgumentMetadata(TreeModel.FitToArcGISTable, u'compress',
|
|---|
| 3505 | typeMetadata=BooleanTypeMetadata(),
|
|---|
| 3506 | description=_(
|
|---|
| 3507 | u"""If True, the default, the tree will be compressed horizontally by
|
|---|
| 3508 | shifting nodes horizontally where space is available."""),
|
|---|
| 3509 | arcGISDisplayName=_(u'Compress tree horizontally'),
|
|---|
| 3510 | arcGISCategory=_(u'Tree plot options'))
|
|---|
| 3511 |
|
|---|
| 3512 | AddArgumentMetadata(TreeModel.FitToArcGISTable, u'ycompress',
|
|---|
| 3513 | typeMetadata=BooleanTypeMetadata(),
|
|---|
| 3514 | description=_(
|
|---|
| 3515 | u"""If True, the default, and the initial automatically calculated
|
|---|
| 3516 | Text Magnification Factor is less than 0.7, crowded labels will be
|
|---|
| 3517 | shifted vertically where space is available. This often allows
|
|---|
| 3518 | considerably larger text.
|
|---|
| 3519 |
|
|---|
| 3520 | Set this parameter to False if you fell the resulting plot is too
|
|---|
| 3521 | messy. The shifting algorithm may work a little better (allowing
|
|---|
| 3522 | larger text) for plot types 1, 2, and 3."""),
|
|---|
| 3523 | arcGISDisplayName=_(u'Compress tree vertically'),
|
|---|
| 3524 | arcGISCategory=_(u'Tree plot options'))
|
|---|
| 3525 |
|
|---|
| 3526 | CopyArgumentMetadata(GLM.FitToArcGISTable, u'overwriteExisting', TreeModel.FitToArcGISTable, u'overwriteExisting')
|
|---|
| 3527 |
|
|---|
| 3528 | # Public method: TreeModel.PredictFromArcGISRasters
|
|---|
| 3529 |
|
|---|
| 3530 | AddMethodMetadata(TreeModel.PredictFromArcGISRasters,
|
|---|
| 3531 | shortDescription=_(u'Using a fitted tree model, this tool creates a raster representing the response variable predicted from rasters representing the predictor variables.'),
|
|---|
| 3532 | isExposedToPythonCallers=True,
|
|---|
| 3533 | isExposedByCOM=True,
|
|---|
| 3534 | isExposedAsArcGISTool=True,
|
|---|
| 3535 | arcGISDisplayName=_(u'Predict Tree Model From Rasters'),
|
|---|
| 3536 | arcGISToolCategory=_(u'Statistics\\Model Data\\Tree Models'),
|
|---|
| 3537 | dependencies=[ArcGISDependency(9, 1), RDependency(2, 12, 0), RPackageDependency(u'rpart', u'3.1-48'), RPackageDependency(u'rgdal')])
|
|---|
| 3538 |
|
|---|
| 3539 | CopyArgumentMetadata(TreeModel.FitToArcGISTable, u'cls', TreeModel.PredictFromArcGISRasters, u'cls')
|
|---|
| 3540 |
|
|---|
| 3541 | AddArgumentMetadata(TreeModel.PredictFromArcGISRasters, u'inputModelFile',
|
|---|
| 3542 | typeMetadata=FileTypeMetadata(mustExist=True),
|
|---|
| 3543 | description=_(
|
|---|
| 3544 | u"""File that contains the fitted model, generated by the Fit Tree
|
|---|
| 3545 | Model tool."""),
|
|---|
| 3546 | arcGISDisplayName=_(u'Input model file'))
|
|---|
| 3547 |
|
|---|
| 3548 | CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'inputPredictorRasters', TreeModel.PredictFromArcGISRasters, u'inputPredictorRasters')
|
|---|
| 3549 | CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'variableNames', TreeModel.PredictFromArcGISRasters, u'variableNames')
|
|---|
| 3550 |
|
|---|
| 3551 | AddArgumentMetadata(TreeModel.PredictFromArcGISRasters, u'outputResponseRaster',
|
|---|
| 3552 | typeMetadata=ArcGISRasterTypeMetadata(mustBeDifferentThanArguments=[u'inputModelFile'], deleteIfParameterIsTrue=u'overwriteExisting', createParentDirectories=True),
|
|---|
| 3553 | description=_(
|
|---|
| 3554 | u"""Output raster representing the response predicted by the model
|
|---|
| 3555 | from the input predictor rasters."""),
|
|---|
| 3556 | direction=u'Output',
|
|---|
| 3557 | arcGISDisplayName=_(u'Output response raster'))
|
|---|
| 3558 |
|
|---|
| 3559 | CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'templateRaster', TreeModel.PredictFromArcGISRasters, u'templateRaster')
|
|---|
| 3560 | CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'resamplingTechniques', TreeModel.PredictFromArcGISRasters, u'resamplingTechniques')
|
|---|
| 3561 | CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'ignoreOutOfRangeValues', TreeModel.PredictFromArcGISRasters, u'ignoreOutOfRangeValues')
|
|---|
| 3562 |
|
|---|
| 3563 | AddArgumentMetadata(TreeModel.PredictFromArcGISRasters, u'buildPyramids',
|
|---|
| 3564 | typeMetadata=BooleanTypeMetadata(),
|
|---|
| 3565 | description=_(
|
|---|
| 3566 | u"""If True, pyramids will be built for the output raster, which will
|
|---|
| 3567 | improve its display speed in the ArcGIS user interface."""),
|
|---|
| 3568 | arcGISDisplayName=_(u'Build pyramids'),
|
|---|
| 3569 | arcGISCategory=_(u'Additional output options'))
|
|---|
| 3570 |
|
|---|
| 3571 | AddArgumentMetadata(TreeModel.PredictFromArcGISRasters, u'overwriteExisting',
|
|---|
| 3572 | typeMetadata=BooleanTypeMetadata(),
|
|---|
| 3573 | description=_(
|
|---|
| 3574 | u"""If True, the output raster will be overwritten, if it exists. If
|
|---|
| 3575 | False, a ValueError will be raised if the output raster exists."""),
|
|---|
| 3576 | initializeToArcGISGeoprocessorVariable=u'OverwriteOutput')
|
|---|
| 3577 |
|
|---|
| 3578 | ###############################################################################
|
|---|
| 3579 | # Metadata: LinearMixedModel class
|
|---|
| 3580 | ###############################################################################
|
|---|
| 3581 |
|
|---|
| 3582 | AddClassMetadata(LinearMixedModel,
|
|---|
| 3583 | shortDescription=_(u'Provides methods for fitting and predicting linear mixed-effects models.'),
|
|---|
| 3584 | isExposedAsCOMServer=True,
|
|---|
| 3585 | comIID=u'{4678EDA8-073A-496B-81B6-6DC8F47E4EC3}',
|
|---|
| 3586 | comCLSID=u'{A65B2686-1240-44E8-8764-397EB58BFCD7}')
|
|---|
| 3587 |
|
|---|
| 3588 | # Public method: LinearMixedModel.FitToArcGISTable
|
|---|
| 3589 |
|
|---|
| 3590 | AddMethodMetadata(LinearMixedModel.FitToArcGISTable,
|
|---|
| 3591 | shortDescription=_(u'Fits a linear mixed-effects model to data in an ArcGIS table.'),
|
|---|
| 3592 | longDescription=_(
|
|---|
| 3593 | u"""This tool fits a linear mixed-effects model using the lme function
|
|---|
| 3594 | from the R `nlme package <http://cran.r-project.org/web/packages/nlme/nlme.pdf>`_
|
|---|
| 3595 | written by Jose Pinherio and Douglas Bates (2000). Please see the
|
|---|
| 3596 | documentation for that function for more information.
|
|---|
| 3597 |
|
|---|
| 3598 | **References**
|
|---|
| 3599 |
|
|---|
| 3600 | The R documentation for the nlme package states the following about
|
|---|
| 3601 | the lme function: The computational methods follow the general
|
|---|
| 3602 | framework of Lindstrom and Bates (1988). The model formulation is
|
|---|
| 3603 | described in Laird and Ware (1982). The variance-covariance
|
|---|
| 3604 | parametrizations are described in Pinheiro and Bates (1996). The
|
|---|
| 3605 | different correlation structures that may be used are described in
|
|---|
| 3606 | Box, Jenkins and Reinse (1994), Littel et al (1996), and Venables and
|
|---|
| 3607 | Ripley, (1997). The use of variance functions for linear and nonlinear
|
|---|
| 3608 | mixed effects models is presented in detail in Davidian and Giltinan
|
|---|
| 3609 | (1995).
|
|---|
| 3610 |
|
|---|
| 3611 | Box, G.E.P., Jenkins, G.M., and Reinsel G.C. (1994) "Time Series
|
|---|
| 3612 | Analysis: Forecasting and Control", 3rd Edition, Holden-Day.
|
|---|
| 3613 |
|
|---|
| 3614 | Davidian, M. and Giltinan, D.M. (1995) "Nonlinear Mixed Effects Models
|
|---|
| 3615 | for Repeated Measurement Data", Chapman and Hall.
|
|---|
| 3616 |
|
|---|
| 3617 | Laird, N.M. and Ware, J.H. (1982) "Random-Effects Models for
|
|---|
| 3618 | Longitudinal Data", Biometrics, 38, 963-974.
|
|---|
| 3619 |
|
|---|
| 3620 | Lindstrom, M.J. and Bates, D.M. (1988) "Newton-Raphson and EM
|
|---|
| 3621 | Algorithms for Linear Mixed-Effects Models for Repeated-Measures
|
|---|
| 3622 | Data", Journal of the American Statistical Association, 83, 1014-1022.
|
|---|
| 3623 |
|
|---|
| 3624 | Littel, R.C., Milliken, G.A., Stroup, W.W., and Wolfinger, R.D. (1996)
|
|---|
| 3625 | "SAS Systems for Mixed Models", SAS Institute.
|
|---|
| 3626 |
|
|---|
| 3627 | Pinheiro, J.C. and Bates., D.M. (1996) "Unconstrained Parametrizations
|
|---|
| 3628 | for Variance-Covariance Matrices", Statistics and Computing, 6,
|
|---|
| 3629 | 289-296.
|
|---|
| 3630 |
|
|---|
| 3631 | Pinheiro, J.C., and Bates, D.M. (2000) "Mixed-Effects Models in S and
|
|---|
| 3632 | S-PLUS", Springer.
|
|---|
| 3633 |
|
|---|
| 3634 | Venables, W.N. and Ripley, B.D. (2002) "Modern Applied Statistics with
|
|---|
| 3635 | S", 4th Edition, Springer-Verlag.
|
|---|
| 3636 |
|
|---|
| 3637 | `R nlme package documentation <http://cran.r-project.org/web/packages/nlme/nlme.pdf>`_"""),
|
|---|
| 3638 | isExposedToPythonCallers=True,
|
|---|
| 3639 | isExposedByCOM=True,
|
|---|
| 3640 | isExposedAsArcGISTool=True,
|
|---|
| 3641 | arcGISDisplayName=_(u'Fit Linear Mixed Model'),
|
|---|
| 3642 | arcGISToolCategory=_(u'Statistics\\Model Data\\Linear Mixed Models'),
|
|---|
| 3643 | dependencies=[ArcGISDependency(9, 1), RDependency(2, 7, 0)])
|
|---|
| 3644 |
|
|---|
| 3645 | AddArgumentMetadata(LinearMixedModel.FitToArcGISTable, u'cls',
|
|---|
| 3646 | typeMetadata=ClassOrClassInstanceTypeMetadata(cls=LinearMixedModel),
|
|---|
| 3647 | description=_(u'%s class or an instance of it.') % LinearMixedModel.__name__)
|
|---|
| 3648 |
|
|---|
| 3649 | CopyArgumentMetadata(GLM.FitToArcGISTable, u'inputTable', LinearMixedModel.FitToArcGISTable, u'inputTable')
|
|---|
| 3650 | CopyArgumentMetadata(GLM.FitToArcGISTable, u'outputModelFile', LinearMixedModel.FitToArcGISTable, u'outputModelFile')
|
|---|
| 3651 |
|
|---|
| 3652 | AddArgumentMetadata(LinearMixedModel.FitToArcGISTable, u'fixedFormula',
|
|---|
| 3653 | typeMetadata=UnicodeStringTypeMetadata(),
|
|---|
| 3654 | description=_(
|
|---|
| 3655 | u"""Two-sided formula that specifies the fixed-effects part of the
|
|---|
| 3656 | model.
|
|---|
| 3657 |
|
|---|
| 3658 | """ + _FormulaDescription),
|
|---|
| 3659 | arcGISDisplayName=_(u'Fixed effects formula'))
|
|---|
| 3660 |
|
|---|
| 3661 | AddArgumentMetadata(LinearMixedModel.FitToArcGISTable, u'randomFormula',
|
|---|
| 3662 | typeMetadata=UnicodeStringTypeMetadata(),
|
|---|
| 3663 | description=_(
|
|---|
| 3664 | u"""One-sided formula that specifies the random-effects part of the
|
|---|
| 3665 | model. The formula has the form::
|
|---|
| 3666 |
|
|---|
| 3667 | ~x1+...+xn | g1/.../gm
|
|---|
| 3668 |
|
|---|
| 3669 | with x1+...+xn specifying the model for the random effects and
|
|---|
| 3670 | g1/.../gm the grouping structure (m may be equal to 1, in which case
|
|---|
| 3671 | no / is required). The random effects formula will be repeated for all
|
|---|
| 3672 | levels of grouping, in the case of multiple levels of grouping."""),
|
|---|
| 3673 | arcGISDisplayName=_(u'Random effects formula'))
|
|---|
| 3674 |
|
|---|
| 3675 | CopyArgumentMetadata(GLM.FitToArcGISTable, u'where', LinearMixedModel.FitToArcGISTable, u'where')
|
|---|
| 3676 |
|
|---|
| 3677 | AddArgumentMetadata(LinearMixedModel.FitToArcGISTable, u'method',
|
|---|
| 3678 | typeMetadata=UnicodeStringTypeMetadata(allowedValues=[u'REML', u'ML']),
|
|---|
| 3679 | description=_(
|
|---|
| 3680 | u"""Model fitting method, one of:
|
|---|
| 3681 |
|
|---|
| 3682 | * REML - the model will be fit by maximizing the restricted
|
|---|
| 3683 | log-likelihood. This is the default.
|
|---|
| 3684 |
|
|---|
| 3685 | * ML - the model will be fit by maximizing the log-likelihood.
|
|---|
| 3686 | """),
|
|---|
| 3687 | arcGISDisplayName=_(u'Model fitting method'),
|
|---|
| 3688 | arcGISCategory=_(u'Model options'))
|
|---|
| 3689 |
|
|---|
| 3690 | AddArgumentMetadata(LinearMixedModel.FitToArcGISTable, u'correlationStructure',
|
|---|
| 3691 | typeMetadata=UnicodeStringTypeMetadata(canBeNone=True, makeLowercase=True, allowedValues=[u'Exponential', u'Gaussian', u'Linear', u'Rational quadratic', u'Spherical']),
|
|---|
| 3692 | description=_(
|
|---|
| 3693 | u"""TODO: Write documentation for this parameter."""),
|
|---|
| 3694 | arcGISDisplayName=_(u'Correlation structure'),
|
|---|
| 3695 | arcGISCategory=_(u'Within-group correlation structure'))
|
|---|
| 3696 |
|
|---|
| 3697 | AddArgumentMetadata(LinearMixedModel.FitToArcGISTable, u'correlationFormula',
|
|---|
| 3698 | typeMetadata=UnicodeStringTypeMetadata(canBeNone=True),
|
|---|
| 3699 | description=_(
|
|---|
| 3700 | u"""TODO: Write documentation for this parameter."""),
|
|---|
| 3701 | arcGISDisplayName=_(u'Correlation formula'),
|
|---|
| 3702 | arcGISCategory=_(u'Within-group correlation structure'))
|
|---|
| 3703 |
|
|---|
| 3704 | AddArgumentMetadata(LinearMixedModel.FitToArcGISTable, u'range_',
|
|---|
| 3705 | typeMetadata=FloatTypeMetadata(canBeNone=True, mustBeGreaterThan=0.),
|
|---|
| 3706 | description=_(
|
|---|
| 3707 | u"""TODO: Write documentation for this parameter."""),
|
|---|
| 3708 | arcGISDisplayName=_(u'Range'),
|
|---|
| 3709 | arcGISCategory=_(u'Within-group correlation structure'))
|
|---|
| 3710 |
|
|---|
| 3711 | AddArgumentMetadata(LinearMixedModel.FitToArcGISTable, u'nugget',
|
|---|
| 3712 | typeMetadata=FloatTypeMetadata(canBeNone=True, minValue=0., maxValue=1.),
|
|---|
| 3713 | description=_(
|
|---|
| 3714 | u"""TODO: Write documentation for this parameter."""),
|
|---|
| 3715 | arcGISDisplayName=_(u'Nugget'),
|
|---|
| 3716 | arcGISCategory=_(u'Within-group correlation structure'))
|
|---|
| 3717 |
|
|---|
| 3718 | AddArgumentMetadata(LinearMixedModel.FitToArcGISTable, u'metric',
|
|---|
| 3719 | typeMetadata=UnicodeStringTypeMetadata(canBeNone=True, makeLowercase=True, allowedValues=[u'Euclidean', u'Manhattan', u'Maximum']),
|
|---|
| 3720 | description=_(
|
|---|
| 3721 | u"""TODO: Write documentation for this parameter."""),
|
|---|
| 3722 | arcGISDisplayName=_(u'Distance metric'),
|
|---|
| 3723 | arcGISCategory=_(u'Within-group correlation structure'))
|
|---|
| 3724 |
|
|---|
| 3725 | AddArgumentMetadata(LinearMixedModel.FitToArcGISTable, u'fixed',
|
|---|
| 3726 | typeMetadata=BooleanTypeMetadata(),
|
|---|
| 3727 | description=_(
|
|---|
| 3728 | u"""TODO: Write documentation for this parameter."""),
|
|---|
| 3729 | arcGISDisplayName=_(u'Keep coefficients fixed during model optimization'),
|
|---|
| 3730 | arcGISCategory=_(u'Within-group correlation structure'))
|
|---|
| 3731 |
|
|---|
| 3732 | CopyArgumentMetadata(GLM.FitToArcGISTable, u'xColumnName', LinearMixedModel.FitToArcGISTable, u'xColumnName')
|
|---|
| 3733 | CopyArgumentMetadata(GLM.FitToArcGISTable, u'yColumnName', LinearMixedModel.FitToArcGISTable, u'yColumnName')
|
|---|
| 3734 | CopyArgumentMetadata(GLM.FitToArcGISTable, u'zColumnName', LinearMixedModel.FitToArcGISTable, u'zColumnName')
|
|---|
| 3735 | CopyArgumentMetadata(GLM.FitToArcGISTable, u'mColumnName', LinearMixedModel.FitToArcGISTable, u'mColumnName')
|
|---|
| 3736 |
|
|---|
| 3737 | AddArgumentMetadata(LinearMixedModel.FitToArcGISTable, u'writeSummaryFile',
|
|---|
| 3738 | typeMetadata=BooleanTypeMetadata(),
|
|---|
| 3739 | description=_(
|
|---|
| 3740 | u"""If True, this tool will write summary information about the fitted
|
|---|
| 3741 | model to a text file. (This is the same information that the tool
|
|---|
| 3742 | outputs as log messages.) The file will have the name X_summary.txt,
|
|---|
| 3743 | where X is the name of the output model file, minus any
|
|---|
| 3744 | extension."""),
|
|---|
| 3745 | arcGISDisplayName=_(u'Write model summary file'),
|
|---|
| 3746 | arcGISCategory=_(u'Additional output options'))
|
|---|
| 3747 |
|
|---|
| 3748 | AddArgumentMetadata(LinearMixedModel.FitToArcGISTable, u'writeDiagnosticPlots',
|
|---|
| 3749 | typeMetadata=BooleanTypeMetadata(),
|
|---|
| 3750 | description=_(
|
|---|
| 3751 | u"""If True, this tool will write diagnostic plots for the fitted
|
|---|
| 3752 | model.
|
|---|
| 3753 |
|
|---|
| 3754 | TODO: Write documentation for this parameter."""),
|
|---|
| 3755 | arcGISDisplayName=_(u'Write diagnostic plots'),
|
|---|
| 3756 | arcGISCategory=_(u'Additional output options'))
|
|---|
| 3757 |
|
|---|
| 3758 | CopyArgumentMetadata(GLM.FitToArcGISTable, u'plotFileFormat', LinearMixedModel.FitToArcGISTable, u'plotFileFormat')
|
|---|
| 3759 | CopyArgumentMetadata(GLM.FitToArcGISTable, u'res', LinearMixedModel.FitToArcGISTable, u'res')
|
|---|
| 3760 | CopyArgumentMetadata(GLM.FitToArcGISTable, u'width', LinearMixedModel.FitToArcGISTable, u'width')
|
|---|
| 3761 | CopyArgumentMetadata(GLM.FitToArcGISTable, u'height', LinearMixedModel.FitToArcGISTable, u'height')
|
|---|
| 3762 | CopyArgumentMetadata(GLM.FitToArcGISTable, u'pointSize', LinearMixedModel.FitToArcGISTable, u'pointSize')
|
|---|
| 3763 | CopyArgumentMetadata(GLM.FitToArcGISTable, u'bg', LinearMixedModel.FitToArcGISTable, u'bg')
|
|---|
| 3764 |
|
|---|
| 3765 | CopyArgumentMetadata(GLM.FitToArcGISTable, u'overwriteExisting', LinearMixedModel.FitToArcGISTable, u'overwriteExisting')
|
|---|
| 3766 |
|
|---|
| 3767 | # Public method: LinearMixedModel.PredictFromArcGISRasters
|
|---|
| 3768 |
|
|---|
| 3769 | AddMethodMetadata(LinearMixedModel.PredictFromArcGISRasters,
|
|---|
| 3770 | shortDescription=_(u'Using a fitted linear mixed model, this tool creates a raster representing the response variable predicted from rasters representing the predictor variables.'),
|
|---|
| 3771 | isExposedToPythonCallers=True,
|
|---|
| 3772 | isExposedByCOM=True,
|
|---|
| 3773 | isExposedAsArcGISTool=True,
|
|---|
| 3774 | arcGISDisplayName=_(u'Predict Linear Mixed Model From Rasters'),
|
|---|
| 3775 | arcGISToolCategory=_(u'Statistics\\Model Data\\Linear Mixed Models'),
|
|---|
| 3776 | dependencies=[ArcGISDependency(9, 1), RDependency(2, 7, 0), RPackageDependency(u'rgdal')])
|
|---|
| 3777 |
|
|---|
| 3778 | CopyArgumentMetadata(LinearMixedModel.FitToArcGISTable, u'cls', LinearMixedModel.PredictFromArcGISRasters, u'cls')
|
|---|
| 3779 |
|
|---|
| 3780 | AddArgumentMetadata(LinearMixedModel.PredictFromArcGISRasters, u'inputModelFile',
|
|---|
| 3781 | typeMetadata=FileTypeMetadata(mustExist=True),
|
|---|
| 3782 | description=_(
|
|---|
| 3783 | u"""File that contains the fitted model, generated by the Fit Linear
|
|---|
| 3784 | Mixed Model tool."""),
|
|---|
| 3785 | arcGISDisplayName=_(u'Input model file'))
|
|---|
| 3786 |
|
|---|
| 3787 | CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'inputPredictorRasters', LinearMixedModel.PredictFromArcGISRasters, u'inputPredictorRasters')
|
|---|
| 3788 | CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'variableNames', LinearMixedModel.PredictFromArcGISRasters, u'variableNames')
|
|---|
| 3789 | CopyArgumentMetadata(TreeModel.PredictFromArcGISRasters, u'outputResponseRaster', LinearMixedModel.PredictFromArcGISRasters, u'outputResponseRaster')
|
|---|
| 3790 | CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'templateRaster', LinearMixedModel.PredictFromArcGISRasters, u'templateRaster')
|
|---|
| 3791 | CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'resamplingTechniques', LinearMixedModel.PredictFromArcGISRasters, u'resamplingTechniques')
|
|---|
| 3792 | CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'ignoreOutOfRangeValues', LinearMixedModel.PredictFromArcGISRasters, u'ignoreOutOfRangeValues')
|
|---|
| 3793 | CopyArgumentMetadata(TreeModel.PredictFromArcGISRasters, u'buildPyramids', LinearMixedModel.PredictFromArcGISRasters, u'buildPyramids')
|
|---|
| 3794 | CopyArgumentMetadata(TreeModel.PredictFromArcGISRasters, u'overwriteExisting', LinearMixedModel.PredictFromArcGISRasters, u'overwriteExisting')
|
|---|
| 3795 |
|
|---|
| 3796 | ###############################################################################
|
|---|
| 3797 | # Metadata: ModelEvaluation class
|
|---|
| 3798 | ###############################################################################
|
|---|
| 3799 |
|
|---|
| 3800 | AddClassMetadata(ModelEvaluation,
|
|---|
| 3801 | shortDescription=_(u'Provides methods for evaluating fitted models.'),
|
|---|
| 3802 | isExposedAsCOMServer=True,
|
|---|
| 3803 | comIID=u'{D5DC8EA1-B51A-4B0F-A43D-F91C516CD8A1}',
|
|---|
| 3804 | comCLSID=u'{DD4042FC-A356-480E-9211-62B51C2113A7}')
|
|---|
| 3805 |
|
|---|
| 3806 | # Public method: ModelEvaluation.PlotPerformanceOfBinaryClassificationModel
|
|---|
| 3807 |
|
|---|
| 3808 | AddMethodMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel,
|
|---|
| 3809 | shortDescription=_(u'Plots the performance of a binary classification model (a model where the response variable has two possible values) using the R ROCR package.'),
|
|---|
| 3810 | isExposedToPythonCallers=True,
|
|---|
| 3811 | isExposedByCOM=True,
|
|---|
| 3812 | isExposedAsArcGISTool=True,
|
|---|
| 3813 | arcGISDisplayName=_(u'Plot Performance of Binary Classification Model'),
|
|---|
| 3814 | arcGISToolCategory=_(u'Statistics\\Model Data\\Evaluate Model Performance'),
|
|---|
| 3815 | dependencies=[RDependency(2, 6, 0), RPackageDependency(u'ROCR')])
|
|---|
| 3816 |
|
|---|
| 3817 | AddArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'cls',
|
|---|
| 3818 | typeMetadata=ClassOrClassInstanceTypeMetadata(cls=ModelEvaluation),
|
|---|
| 3819 | description=_(u'%s class or an instance of it.') % ModelEvaluation.__name__)
|
|---|
| 3820 |
|
|---|
| 3821 | AddArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'inputModelFile',
|
|---|
| 3822 | typeMetadata=FileTypeMetadata(mustExist=True),
|
|---|
| 3823 | description=_(
|
|---|
| 3824 | u"""File that contains the fitted model, generated by one of the
|
|---|
| 3825 | fitting tools (e.g. Fit GLM).
|
|---|
| 3826 |
|
|---|
| 3827 | The model must conform to two constraints:
|
|---|
| 3828 |
|
|---|
| 3829 | * It must be a binary classification model, i.e. there must be only
|
|---|
| 3830 | two possible values of the response variable in the data used to
|
|---|
| 3831 | build the model.
|
|---|
| 3832 |
|
|---|
| 3833 | * The larger response value must represent the "positive" result, and
|
|---|
| 3834 | the smaller the "negative" result.
|
|---|
| 3835 |
|
|---|
| 3836 | These constraints would be satisfied, for example, by a species
|
|---|
| 3837 | presence/absence model where the response variable was either 1
|
|---|
| 3838 | (species present) or 0 (species not present). It would not be
|
|---|
| 3839 | satisfied from a species density model where the response variable was
|
|---|
| 3840 | the number of species per unit area. Nor would it be satisfied if the
|
|---|
| 3841 | response variable values were switched, such that 1 represented
|
|---|
| 3842 | absence and 0 represented presence.
|
|---|
| 3843 |
|
|---|
| 3844 | If the first constraint is violated, you will receve the error::
|
|---|
| 3845 |
|
|---|
| 3846 | ROCR currently supports only evaluation of binary classification tasks.
|
|---|
| 3847 |
|
|---|
| 3848 | If the second constraint is violated, you may not receive an error.
|
|---|
| 3849 | Instead, the output plot and statistics will be wrong. Take care to
|
|---|
| 3850 | avoid violating this constraint."""),
|
|---|
| 3851 | arcGISDisplayName=_(u'Input model file'))
|
|---|
| 3852 |
|
|---|
| 3853 | AddArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'measure1',
|
|---|
| 3854 | typeMetadata=UnicodeStringTypeMetadata(allowedValues=[u'acc', u'cal', u'chisq', u'ecost', u'err', u'f', u'fall', u'fnr', u'fpr', u'lift', u'mat', u'mi', u'miss', u'npv', u'odds', u'ppv', u'pcfall', u'pcmiss', u'phi', u'prec', u'rch', u'rec', u'rnp', u'rpp', u'sar', u'sens', u'spec', u'tnr', u'tpr']),
|
|---|
| 3855 | description=_(
|
|---|
| 3856 | u"""The first performance measure to plot.
|
|---|
| 3857 |
|
|---|
| 3858 | This measure will serve as the Y coordinate for the plot. If a second
|
|---|
| 3859 | measure is specified, it will serve as the X coordinate. Otherwise the
|
|---|
| 3860 | model cutoff will serve as the X coordinate.
|
|---|
| 3861 |
|
|---|
| 3862 | When modeling a binary response, an important task is selecting a
|
|---|
| 3863 | cutoff. The cutoff is the value used to determine whether a given
|
|---|
| 3864 | predicted value of the response variable should be classified as
|
|---|
| 3865 | positive or negative. Because the model typically outputs predictions
|
|---|
| 3866 | along a continual range (e.g. between 0.0 and 1.0), you must determine
|
|---|
| 3867 | the portions of the range that represent positive and negative
|
|---|
| 3868 | responses. Response values less than the cutoff are classified as
|
|---|
| 3869 | negative and those greater than or equal to the cutoff are classified
|
|---|
| 3870 | as positive.
|
|---|
| 3871 |
|
|---|
| 3872 | Plots that use the cutoff as the X coordinate show you a measure of
|
|---|
| 3873 | the model's performance for the range of cutoff values. You can use
|
|---|
| 3874 | this this plot to select the cutoff value that maximizes, minimizes,
|
|---|
| 3875 | or otherwise optimizes a given performance measure. For example, if
|
|---|
| 3876 | you selected "acc" (accuracy) as the performance measure, you could
|
|---|
| 3877 | find the cutoff value that maximized the model's accuracy by finding
|
|---|
| 3878 | the highest point on the plot and then looking up the cutoff value on
|
|---|
| 3879 | the X axis.
|
|---|
| 3880 |
|
|---|
| 3881 | Plots that use a second performance measure as the X coordinate allow
|
|---|
| 3882 | you to balance one measure of the model's performance against another.
|
|---|
| 3883 | The plot will be color-coded by cutoff value, allowing you to look up
|
|---|
| 3884 | the cutoff for a given combination of the two performance measures.
|
|---|
| 3885 | Selecting an optimal cutoff will often involve making a tradeoff
|
|---|
| 3886 | between the two measures.
|
|---|
| 3887 |
|
|---|
| 3888 | For example, to create a classic receiver operating characteristic
|
|---|
| 3889 | (ROC) curve, select "tpr" (true positive rate) for the first measure
|
|---|
| 3890 | and "fpr" (false positive rate) for the second measure.
|
|---|
| 3891 |
|
|---|
| 3892 | For the first performance measure, you can select from the following
|
|---|
| 3893 | list. This list is taken from the documentation of the R ROCR package
|
|---|
| 3894 | that implements the plots. Some of them do not allow selection of a
|
|---|
| 3895 | second performance measure (you must omit it when invoking this tool),
|
|---|
| 3896 | as noted in the measure's description.
|
|---|
| 3897 |
|
|---|
| 3898 | In these descriptions, Y and Yhat are random variables representing
|
|---|
| 3899 | the class and the prediction for a randomly drawn sample,
|
|---|
| 3900 | respectively. + and - are the positive and negative class,
|
|---|
| 3901 | respectively. The following abbreviations are used for for empirical
|
|---|
| 3902 | quantities: P (# positive samples), N (# negative samples), TP (# true
|
|---|
| 3903 | positives), TN (# true negatives), FP (# false positives), FN (# false
|
|---|
| 3904 | negatives).
|
|---|
| 3905 |
|
|---|
| 3906 | * acc - Accuracy. P(Yhat = Y). Estimated as: (TP+TN)/(P+N).
|
|---|
| 3907 |
|
|---|
| 3908 | * cal - Calibration error. The calibration error is the absolute
|
|---|
| 3909 | difference between predicted confidence and actual reliability. This
|
|---|
| 3910 | error is estimated at all cutoffs by sliding a window of size 100
|
|---|
| 3911 | across the range of possible cutoffs. E.g., if for several positive
|
|---|
| 3912 | samples the output of the classifier is around 0.75, you might
|
|---|
| 3913 | expect from a well-calibrated classifier that the fraction of them
|
|---|
| 3914 | which is correctly predicted as positive is also around 0.75. In a
|
|---|
| 3915 | well-calibrated classifier, the probabilistic confidence estimates
|
|---|
| 3916 | are realistic. Only for use with probabilistic output (i.e. scores
|
|---|
| 3917 | between 0 and 1; some of the other measures actually support values
|
|---|
| 3918 | between -1 and 1).
|
|---|
| 3919 |
|
|---|
| 3920 | * chisq - Chi square test statistic. Note that R might raise a warning
|
|---|
| 3921 | if the sample size is too small.
|
|---|
| 3922 |
|
|---|
| 3923 | * ecost - Expected cost. For details on cost curves, cf. Drummond &
|
|---|
| 3924 | Holte 2000, 2004. ecost has an obligatory x axis, the so-called
|
|---|
| 3925 | 'probability-cost function'; thus you may not specify a second
|
|---|
| 3926 | performance measure.
|
|---|
| 3927 |
|
|---|
| 3928 | * err - Error rate. P(Yhat != Y). Estimated as: (FP+FN)/(P+N).
|
|---|
| 3929 |
|
|---|
| 3930 | * f - Precision-recall F measure (van Rijsbergen, 1979). Weighted
|
|---|
| 3931 | harmonic mean of precision (P) and recall (R). F = 1/ (alpha*1/P +
|
|---|
| 3932 | (1-alpha)*1/R). For this tool, alpha is always 1/2, so the mean is
|
|---|
| 3933 | balanced.
|
|---|
| 3934 |
|
|---|
| 3935 | * fall - Fallout. Same as fpr.
|
|---|
| 3936 |
|
|---|
| 3937 | * fnr - False negative rate. P(Yhat = - | Y = +). Estimated as: FN/P.
|
|---|
| 3938 |
|
|---|
| 3939 | * fpr - False positive rate. P(Yhat = + | Y = -). Estimated as: FP/N.
|
|---|
| 3940 |
|
|---|
| 3941 | * lift - Lift value. P(Yhat = + | Y = +)/P(Yhat = +).
|
|---|
| 3942 |
|
|---|
| 3943 | * mat - Matthews correlation coefficient. Same as phi.
|
|---|
| 3944 |
|
|---|
| 3945 | * mi - Mutual information. I(Yhat, Y) := H(Y) - H(Y | Yhat), where H
|
|---|
| 3946 | is the (conditional) entropy. Entropies are estimated naively (no
|
|---|
| 3947 | bias correction).
|
|---|
| 3948 |
|
|---|
| 3949 | * miss - Miss. Same as fnr.
|
|---|
| 3950 |
|
|---|
| 3951 | * npv - Negative predictive value. P(Y = - | Yhat = -). Estimated as:
|
|---|
| 3952 | TN/(TN+FN).
|
|---|
| 3953 |
|
|---|
| 3954 | * odds - Odds ratio. (TP*TN)/(FN*FP). Note that odds ratio produces
|
|---|
| 3955 | Inf or NA values for all cutoffs corresponding to FN=0 or FP=0. This
|
|---|
| 3956 | can substantially decrease the plotted cutoff region.
|
|---|
| 3957 |
|
|---|
| 3958 | * pcfall - Prediction-conditioned fallout. P(Y = - | Yhat = +).
|
|---|
| 3959 | Estimated as: FP/(TP+FP).
|
|---|
| 3960 |
|
|---|
| 3961 | * pcmiss - Prediction-conditioned miss. P(Y = + | Yhat = -). Estimated
|
|---|
| 3962 | as: FN/(TN+FN).
|
|---|
| 3963 |
|
|---|
| 3964 | * phi - Phi correlation coefficient. (TP*TN -
|
|---|
| 3965 | FP*FN)/(sqrt((TP+FN)*(TN+FP)*(TP+FP)*(TN+FN))). Yields a number
|
|---|
| 3966 | between -1 and 1, with 1 indicating a perfect prediction, 0 indicating
|
|---|
| 3967 | a random prediction. Values below 0 indicate a worse than random
|
|---|
| 3968 | prediction.
|
|---|
| 3969 |
|
|---|
| 3970 | * ppv - Positive predictive value. P(Y = + | Yhat = +). Estimated as:
|
|---|
| 3971 | TP/(TP+FP).
|
|---|
| 3972 |
|
|---|
| 3973 | * prec - Precision. Same as ppv.
|
|---|
| 3974 |
|
|---|
| 3975 | * rch - ROC convex hull. A ROC (=tpr vs fpr) curve with concavities
|
|---|
| 3976 | (which represent suboptimal choices of cutoff) removed (Fawcett
|
|---|
| 3977 | 2001). Since the result is already a parametric performance curve,
|
|---|
| 3978 | it cannot be used in combination with other measures (thus you may
|
|---|
| 3979 | not specify a second performance measure).
|
|---|
| 3980 |
|
|---|
| 3981 | * rec - Recall. Same as tpr.
|
|---|
| 3982 |
|
|---|
| 3983 | * rnp - Rate of negative predictions. P(Yhat = -). Estimated as:
|
|---|
| 3984 | (TN+FN)/(TP+FP+TN+FN).
|
|---|
| 3985 |
|
|---|
| 3986 | * rpp - Rate of positive predictions. P(Yhat = +). Estimated as:
|
|---|
| 3987 | (TP+FP)/(TP+FP+TN+FN).
|
|---|
| 3988 |
|
|---|
| 3989 | * sar - Score combinining performance measures of different
|
|---|
| 3990 | characteristics, in the attempt of creating a more "robust" measure
|
|---|
| 3991 | (cf. Caruana R., ROCAI2004): SAR = 1/3 * (Accuracy + Area under the
|
|---|
| 3992 | ROC curve + Root mean-squared error).
|
|---|
| 3993 |
|
|---|
| 3994 | * sens - Sensitivity. Same as tpr.
|
|---|
| 3995 |
|
|---|
| 3996 | * spec - Specificity. Same as tnr.
|
|---|
| 3997 |
|
|---|
| 3998 | * tnr - True negative rate. P(Yhat = - | Y = -). Estimated as: TN/N.
|
|---|
| 3999 |
|
|---|
| 4000 | * tpr - True positive rate. P(Yhat = + | Y = +). Estimated as: TP/P.
|
|---|
| 4001 | """),
|
|---|
| 4002 | arcGISDisplayName=_(u'First performance measure to plot'))
|
|---|
| 4003 |
|
|---|
| 4004 | AddArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'measure2',
|
|---|
| 4005 | typeMetadata=UnicodeStringTypeMetadata(canBeNone=True, allowedValues=[u'acc', u'cal', u'chisq', u'err', u'f', u'fall', u'fnr', u'fpr', u'lift', u'mat', u'mi', u'miss', u'npv', u'odds', u'ppv', u'pcfall', u'pcmiss', u'phi', u'prec', u'rec', u'rnp', u'rpp', u'sar', u'sens', u'spec', u'tnr', u'tpr']),
|
|---|
| 4006 | description=_(
|
|---|
| 4007 | u"""The second performance measure to plot.
|
|---|
| 4008 |
|
|---|
| 4009 | If specified, this performance measure will be used as the X
|
|---|
| 4010 | coordinate of the plot instead of the model cutoff value. Please see
|
|---|
| 4011 | the documentation for the First Performance Measure for more
|
|---|
| 4012 | information."""),
|
|---|
| 4013 | arcGISDisplayName=_(u'Second performance measure to plot'))
|
|---|
| 4014 |
|
|---|
| 4015 | AddArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'summaryStats',
|
|---|
| 4016 | typeMetadata=ListTypeMetadata(elementType=UnicodeStringTypeMetadata(allowedValues=[u'auc', u'mxe', u'prbe', u'rmse']), canBeNone=True),
|
|---|
| 4017 | description=_(
|
|---|
| 4018 | u"""List of summary statistics to calculate for the model.
|
|---|
| 4019 |
|
|---|
| 4020 | The summary statistics are logged as information messages. If you
|
|---|
| 4021 | invoke this tool from ArcGIS, you will see them in the geoprocessing
|
|---|
| 4022 | output window. If you invoke this tool programmatically, the summary
|
|---|
| 4023 | statistics are returned in a list as well.
|
|---|
| 4024 |
|
|---|
| 4025 | The available statistics are:
|
|---|
| 4026 |
|
|---|
| 4027 | * auc - Area under the ROC curve. This is equal to the value of the
|
|---|
| 4028 | Wilcoxon-Mann-Whitney test statistic and also the probability that
|
|---|
| 4029 | the classifier will score are randomly drawn positive sample higher
|
|---|
| 4030 | than a randomly drawn negative sample.
|
|---|
| 4031 |
|
|---|
| 4032 | * mxe - Mean cross-entropy. Only for use with probabilistic response
|
|---|
| 4033 | variables (i.e. scores between 0 and 1). MXE := - 1/(P+N)
|
|---|
| 4034 | sum_{y_i=+} ln(yhat_i) + sum_{y_i=-} ln(1-yhat_i).
|
|---|
| 4035 |
|
|---|
| 4036 | * prbe - Precision-recall break-even point. The cutoff where
|
|---|
| 4037 | precision and recall are equal. At this point, positive and negative
|
|---|
| 4038 | predictions are made at the same rate as their prevalence in the
|
|---|
| 4039 | data. In the event that there are multiple break-even points, the
|
|---|
| 4040 | cutoff for the last one will be returned.
|
|---|
| 4041 |
|
|---|
| 4042 | * rmse - Root-mean-squared error. Only for use with numerical class
|
|---|
| 4043 | labels. RMSE := sqrt(1/(P+N) sum_i (y_i - yhat_i)^2).
|
|---|
| 4044 |
|
|---|
| 4045 | The summary statistics are calculated by the R ROCR package."""),
|
|---|
| 4046 | arcGISDisplayName=_(u'Summary statistics to calculate'))
|
|---|
| 4047 |
|
|---|
| 4048 | AddArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'title',
|
|---|
| 4049 | typeMetadata=UnicodeStringTypeMetadata(canBeNone=True),
|
|---|
| 4050 | description=_(
|
|---|
| 4051 | u"""Title for the plot. If no title is provided, the plot will not
|
|---|
| 4052 | have a title."""),
|
|---|
| 4053 | arcGISDisplayName=_(u'Plot title'))
|
|---|
| 4054 |
|
|---|
| 4055 | AddArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'evaluationDataTable',
|
|---|
| 4056 | typeMetadata=ArcGISTableViewTypeMetadata(canBeNone=True, mustExist=True),
|
|---|
| 4057 | description=_(
|
|---|
| 4058 | u"""ArcGIS table, table view, feature class, or feature layer
|
|---|
| 4059 | containing the data for evaluating the model. If an evaluation data
|
|---|
| 4060 | table is not provided, the model will be evaluated using the data that
|
|---|
| 4061 | was used to fit the model. (A copy of this data exists in the input
|
|---|
| 4062 | model file.)
|
|---|
| 4063 |
|
|---|
| 4064 | The evaluation data table must have the same fields as the table that
|
|---|
| 4065 | was used to fit the model. The model will be evaluated by reading
|
|---|
| 4066 | those values and computing the predicted response for each row and
|
|---|
| 4067 | comparing it to the actual response.
|
|---|
| 4068 |
|
|---|
| 4069 | When doing statistical modeling, it is considered good practice to
|
|---|
| 4070 | split your input data into a training data set and and an evaluation
|
|---|
| 4071 | data set, fit the model using the training data, and evaluate the
|
|---|
| 4072 | model using the evaluation data. Please consult the scientific
|
|---|
| 4073 | literature for advice on this procedure. The following article is a
|
|---|
| 4074 | good place to start.
|
|---|
| 4075 |
|
|---|
| 4076 | Guisan, A., and Zimmerman, N.E. 2000. Predictive habitat distribution
|
|---|
| 4077 | models in ecology. Ecological Modeling 135: 147-186."""),
|
|---|
| 4078 | arcGISDisplayName=_(u'Evaluation data table'),
|
|---|
| 4079 | arcGISCategory=_(u'Evaluation data options'),
|
|---|
| 4080 | dependencies=[ArcGISDependency(9, 1)])
|
|---|
| 4081 |
|
|---|
| 4082 | AddArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'where',
|
|---|
| 4083 | typeMetadata=SQLWhereClauseTypeMetadata(canBeNone=True),
|
|---|
| 4084 | description=ArcGIS91SelectCursor.__init__.__doc__.Obj.GetArgumentByName(u'where').Description,
|
|---|
| 4085 | arcGISParameterDependencies=[u'evaluationDataTable'],
|
|---|
| 4086 | arcGISDisplayName=_(u'Where clause'),
|
|---|
| 4087 | arcGISCategory=_(u'Evaluation data options'),
|
|---|
| 4088 | dependencies=[ArcGISDependency(9, 1)])
|
|---|
| 4089 |
|
|---|
| 4090 | AddArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'outputPlotFile',
|
|---|
| 4091 | typeMetadata=FileTypeMetadata(canBeNone=True, mustBeDifferentThanArguments=[u'inputModelFile'], deleteIfParameterIsTrue=u'overwriteExisting', createParentDirectories=True),
|
|---|
| 4092 | description=_(
|
|---|
| 4093 | u"""File to create for the plot. If this parameter is specified, the
|
|---|
| 4094 | plot will be written to the file rather than displayed on the screen.
|
|---|
| 4095 |
|
|---|
| 4096 | The file must have one of the following two extensions, which
|
|---|
| 4097 | determines the format that will be used:
|
|---|
| 4098 |
|
|---|
| 4099 | * .emf - Windows enhanced metafile (EMF) format. This is a vector
|
|---|
| 4100 | format that may be printed and resized without any pixelation and is
|
|---|
| 4101 | therefore suitable for use in printable documents that recognize
|
|---|
| 4102 | this format (e.g. Microsoft Word or Microsoft Visio).
|
|---|
| 4103 |
|
|---|
| 4104 | * .png - Portable network graphics (PNG) format. This is a compressed,
|
|---|
| 4105 | lossless, highly portable raster format suitable for use in web
|
|---|
| 4106 | pages or other locations where a raster format is desired. Most
|
|---|
| 4107 | scientific journals accept PNG; they typically request that files
|
|---|
| 4108 | have a resolution of at least 1000 DPI.
|
|---|
| 4109 | """),
|
|---|
| 4110 | direction=u'Output',
|
|---|
| 4111 | arcGISDisplayName=_(u'Output file'),
|
|---|
| 4112 | arcGISCategory=_(u'Output file options'))
|
|---|
| 4113 |
|
|---|
| 4114 | AddArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'res',
|
|---|
| 4115 | typeMetadata=FloatTypeMetadata(mustBeGreaterThan=0.),
|
|---|
| 4116 | description=_(
|
|---|
| 4117 | u"""PNG plot file resolution, in dots per inch (DPI). The default is
|
|---|
| 4118 | set to a high value (1000) because this is the minimum resolution
|
|---|
| 4119 | typically required by scientific journals that accept figures in PNG
|
|---|
| 4120 | format.
|
|---|
| 4121 |
|
|---|
| 4122 | This parameter is ignored for EMF format because it is a vector
|
|---|
| 4123 | format."""),
|
|---|
| 4124 | arcGISDisplayName=_(u'Plot resolution, in DPI'),
|
|---|
| 4125 | arcGISCategory=_(u'Output file options'))
|
|---|
| 4126 |
|
|---|
| 4127 | AddArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'width',
|
|---|
| 4128 | typeMetadata=FloatTypeMetadata(mustBeGreaterThan=0.),
|
|---|
| 4129 | description=_(
|
|---|
| 4130 | u"""Plot file width in thousandths of inches (for EMF format; e.g. the
|
|---|
| 4131 | value 3000 is 3 inches) or pixels (for PNG format)."""),
|
|---|
| 4132 | arcGISDisplayName=_(u'Plot width'),
|
|---|
| 4133 | arcGISCategory=_(u'Output file options'))
|
|---|
| 4134 |
|
|---|
| 4135 | AddArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'height',
|
|---|
| 4136 | typeMetadata=FloatTypeMetadata(mustBeGreaterThan=0.),
|
|---|
| 4137 | description=_(
|
|---|
| 4138 | u"""Plot file height in thousandths of inches (for EMF format; e.g. the
|
|---|
| 4139 | value 3000 is 3 inches) or pixels (for PNG format)."""),
|
|---|
| 4140 | arcGISDisplayName=_(u'Plot height'),
|
|---|
| 4141 | arcGISCategory=_(u'Output file options'))
|
|---|
| 4142 |
|
|---|
| 4143 | AddArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'pointSize',
|
|---|
| 4144 | typeMetadata=FloatTypeMetadata(minValue=1.0),
|
|---|
| 4145 | description=_(
|
|---|
| 4146 | u"""The default pointsize of plotted text."""),
|
|---|
| 4147 | arcGISDisplayName=_(u'Default pointsize of plotted text'),
|
|---|
| 4148 | arcGISCategory=_(u'Output file options'))
|
|---|
| 4149 |
|
|---|
| 4150 | AddArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'bg',
|
|---|
| 4151 | typeMetadata=UnicodeStringTypeMetadata(),
|
|---|
| 4152 | description=_(
|
|---|
| 4153 | u"""PNG plot file background color. The color must be a valid name in
|
|---|
| 4154 | R's color palette, or "transparent" if there is no background color.
|
|---|
| 4155 | This parameter is ignored if the plot format file is EMF."""),
|
|---|
| 4156 | arcGISDisplayName=_(u'Plot background color'),
|
|---|
| 4157 | arcGISCategory=_(u'Output file options'))
|
|---|
| 4158 |
|
|---|
| 4159 | AddArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'overwriteExisting',
|
|---|
| 4160 | typeMetadata=BooleanTypeMetadata(),
|
|---|
| 4161 | description=_(
|
|---|
| 4162 | u"""If True, the output file will be overwritten, if it exists. If
|
|---|
| 4163 | False, a ValueError will be raised if the output file exists."""),
|
|---|
| 4164 | initializeToArcGISGeoprocessorVariable=u'OverwriteOutput')
|
|---|
| 4165 |
|
|---|
| 4166 | AddResultMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'summaryStatValues',
|
|---|
| 4167 | typeMetadata=ListTypeMetadata(elementType=FloatTypeMetadata(), canBeNone=True),
|
|---|
| 4168 | description=_(
|
|---|
| 4169 | u"""The calculated summary statistics. This will be the same length as
|
|---|
| 4170 | the summaryStats parameter. If summaryStats was None, this will be
|
|---|
| 4171 | None."""))
|
|---|
| 4172 |
|
|---|
| 4173 | # Public method: ModelEvaluation.PlotROCOfBinaryClassificationModel
|
|---|
| 4174 |
|
|---|
| 4175 | AddMethodMetadata(ModelEvaluation.PlotROCOfBinaryClassificationModel,
|
|---|
| 4176 | shortDescription=_(u'Plots the receiver operating characteristic (ROC) curve of a binary classification model (a model where the response variable has two possible values) using the R ROCR package.'),
|
|---|
| 4177 | isExposedToPythonCallers=True,
|
|---|
| 4178 | isExposedByCOM=True,
|
|---|
| 4179 | isExposedAsArcGISTool=True,
|
|---|
| 4180 | arcGISDisplayName=_(u'Plot ROC of Binary Classification Model'),
|
|---|
| 4181 | arcGISToolCategory=_(u'Statistics\\Model Data\\Evaluate Model Performance'),
|
|---|
| 4182 | dependencies=[RDependency(2, 6, 0), RPackageDependency(u'ROCR')])
|
|---|
| 4183 |
|
|---|
| 4184 | CopyArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'cls', ModelEvaluation.PlotROCOfBinaryClassificationModel, u'cls')
|
|---|
| 4185 | CopyArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'inputModelFile', ModelEvaluation.PlotROCOfBinaryClassificationModel, u'inputModelFile')
|
|---|
| 4186 |
|
|---|
| 4187 | AddArgumentMetadata(ModelEvaluation.PlotROCOfBinaryClassificationModel, u'cutoff',
|
|---|
| 4188 | typeMetadata=UnicodeStringTypeMetadata(allowedValues=[u'None', u'Automatic', u'Specified'], makeLowercase=True),
|
|---|
| 4189 | description=_(
|
|---|
| 4190 | u"""Cutoff to display in the plot and to use for the calculation of
|
|---|
| 4191 | summary statistics. The possible values are:
|
|---|
| 4192 |
|
|---|
| 4193 | * None - the plot will not display a cutoff and summary statistics
|
|---|
| 4194 | will not be calculated.
|
|---|
| 4195 |
|
|---|
| 4196 | * Automatic - the tool will automatically select the cutoff value for
|
|---|
| 4197 | the point on the curve that is closest to the point of perfect
|
|---|
| 4198 | classification (the upper-left corner of the plot).
|
|---|
| 4199 |
|
|---|
| 4200 | * Specified - the tool will use the cutoff value that you specify for
|
|---|
| 4201 | the Cutoff Value parameter.
|
|---|
| 4202 |
|
|---|
| 4203 | When doing binary classification, an important task is selecting a
|
|---|
| 4204 | cutoff. The cutoff is the value used to determine whether a given
|
|---|
| 4205 | predicted value of the response variable should be classified as
|
|---|
| 4206 | positive or negative. Because the model typically outputs predictions
|
|---|
| 4207 | along a continual range (e.g. between 0.0 and 1.0), you must determine
|
|---|
| 4208 | the portions of the range that represent positive and negative
|
|---|
| 4209 | responses. Response values less than the cutoff are classified as
|
|---|
| 4210 | negative and those greater than or equal to the cutoff are classified
|
|---|
| 4211 | as positive.
|
|---|
| 4212 |
|
|---|
| 4213 | In ROC analysis, you choose the cutoff value by contemplating the
|
|---|
| 4214 | tradeoff you will obtain between the true positive rate and false
|
|---|
| 4215 | positive rate of the model. A perfect model has a true positive rate
|
|---|
| 4216 | of 1 and a false positive rate of 0. This corresponds to the
|
|---|
| 4217 | upper-left corner of the ROC plot. Depending on your scenario, one
|
|---|
| 4218 | rate may be more important than another. In this case, you should run
|
|---|
| 4219 | this tool once with Cutoff set to Automatic, inspect the plot, choose
|
|---|
| 4220 | a cutoff that meets your goals, and run the tool a second time with
|
|---|
| 4221 | Cutoff set to Specified. If one rate is not more important than the
|
|---|
| 4222 | other, you might be satisfied with the Automatic option. When in
|
|---|
| 4223 | doubt, consult a statistician."""),
|
|---|
| 4224 | arcGISDisplayName=_(u'Cutoff'))
|
|---|
| 4225 |
|
|---|
| 4226 | AddArgumentMetadata(ModelEvaluation.PlotROCOfBinaryClassificationModel, u'cutoffValue',
|
|---|
| 4227 | typeMetadata=FloatTypeMetadata(canBeNone=True),
|
|---|
| 4228 | description=_(
|
|---|
| 4229 | u"""Cutoff to value to use when the Cutoff parameter is set to
|
|---|
| 4230 | Specifed. Please see the documentation for that parameter for more
|
|---|
| 4231 | information."""),
|
|---|
| 4232 | arcGISDisplayName=_(u'Cutoff value'))
|
|---|
| 4233 |
|
|---|
| 4234 | AddArgumentMetadata(ModelEvaluation.PlotROCOfBinaryClassificationModel, u'colorize',
|
|---|
| 4235 | typeMetadata=BooleanTypeMetadata(),
|
|---|
| 4236 | description=_(
|
|---|
| 4237 | u"""If True, the ROC curve will be colorized by cutoff value. If
|
|---|
| 4238 | False, the ROC curve will be black."""),
|
|---|
| 4239 | arcGISDisplayName=_(u'Colorize ROC curve'))
|
|---|
| 4240 |
|
|---|
| 4241 | CopyArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'title', ModelEvaluation.PlotROCOfBinaryClassificationModel, u'title')
|
|---|
| 4242 | CopyArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'evaluationDataTable', ModelEvaluation.PlotROCOfBinaryClassificationModel, u'evaluationDataTable')
|
|---|
| 4243 | CopyArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'where', ModelEvaluation.PlotROCOfBinaryClassificationModel, u'where')
|
|---|
| 4244 |
|
|---|
| 4245 | AddArgumentMetadata(ModelEvaluation.PlotROCOfBinaryClassificationModel, u'outputSummaryFile',
|
|---|
| 4246 | typeMetadata=FileTypeMetadata(canBeNone=True, mustBeDifferentThanArguments=[u'inputModelFile', u'outputPlotFile'], deleteIfParameterIsTrue=u'overwriteExisting', createParentDirectories=True),
|
|---|
| 4247 | description=_(
|
|---|
| 4248 | u"""Text file to receive the model summary statistics that this tool
|
|---|
| 4249 | reports as log messages. If a cutoff is provided or calculated by this
|
|---|
| 4250 | tool, a contingency table and associated statistics will also be
|
|---|
| 4251 | written."""),
|
|---|
| 4252 | direction=u'Output',
|
|---|
| 4253 | arcGISDisplayName=_(u'Model summary statistics file'),
|
|---|
| 4254 | arcGISCategory=_(u'Output file options'))
|
|---|
| 4255 |
|
|---|
| 4256 | CopyArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'outputPlotFile', ModelEvaluation.PlotROCOfBinaryClassificationModel, u'outputPlotFile')
|
|---|
| 4257 | CopyArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'res', ModelEvaluation.PlotROCOfBinaryClassificationModel, u'res')
|
|---|
| 4258 | CopyArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'width', ModelEvaluation.PlotROCOfBinaryClassificationModel, u'width')
|
|---|
| 4259 | CopyArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'height', ModelEvaluation.PlotROCOfBinaryClassificationModel, u'height')
|
|---|
| 4260 | CopyArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'pointSize', ModelEvaluation.PlotROCOfBinaryClassificationModel, u'pointSize')
|
|---|
| 4261 | CopyArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'bg', ModelEvaluation.PlotROCOfBinaryClassificationModel, u'bg')
|
|---|
| 4262 | CopyArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'overwriteExisting', ModelEvaluation.PlotROCOfBinaryClassificationModel, u'overwriteExisting')
|
|---|
| 4263 |
|
|---|
| 4264 | AddResultMetadata(ModelEvaluation.PlotROCOfBinaryClassificationModel, u'outputCutoff',
|
|---|
| 4265 | typeMetadata=FloatTypeMetadata(canBeNone=True),
|
|---|
| 4266 | description=_(
|
|---|
| 4267 | u"""Output cutoff value. The returned value depends on what was
|
|---|
| 4268 | specified for the Cutoff parameter:
|
|---|
| 4269 |
|
|---|
| 4270 | * None - no value is returned (in Python, None is returned, often
|
|---|
| 4271 | called NULL in other programming languages).
|
|---|
| 4272 |
|
|---|
| 4273 | * Automatic - the automatically calculated value is returned.
|
|---|
| 4274 |
|
|---|
| 4275 | * Specified - the value specified for the Cutoff Value parameter is
|
|---|
| 4276 | returned.
|
|---|
| 4277 | """),
|
|---|
| 4278 | arcGISDisplayName=_(u'Output cutoff'))
|
|---|
| 4279 |
|
|---|
| 4280 | AddResultMetadata(ModelEvaluation.PlotROCOfBinaryClassificationModel, u'tp',
|
|---|
| 4281 | typeMetadata=FloatTypeMetadata(canBeNone=True),
|
|---|
| 4282 | description=_(
|
|---|
| 4283 | u"""The number of true positives obtained for the cutoff value that
|
|---|
| 4284 | was used. If no cutoff was used, no value is returned (in Python, None
|
|---|
| 4285 | is returned, often called NULL in other programming languages)."""))
|
|---|
| 4286 |
|
|---|
| 4287 | AddResultMetadata(ModelEvaluation.PlotROCOfBinaryClassificationModel, u'fp',
|
|---|
| 4288 | typeMetadata=FloatTypeMetadata(canBeNone=True),
|
|---|
| 4289 | description=_(
|
|---|
| 4290 | u"""The number of false positives obtained for the cutoff value that
|
|---|
| 4291 | was used. If no cutoff was used, no value is returned (in Python, None
|
|---|
| 4292 | is returned, often called NULL in other programming languages)."""))
|
|---|
| 4293 |
|
|---|
| 4294 | AddResultMetadata(ModelEvaluation.PlotROCOfBinaryClassificationModel, u'fn',
|
|---|
| 4295 | typeMetadata=FloatTypeMetadata(canBeNone=True),
|
|---|
| 4296 | description=_(
|
|---|
| 4297 | u"""The number of false negatives obtained for the cutoff value that
|
|---|
| 4298 | was used. If no cutoff was used, no value is returned (in Python, None
|
|---|
| 4299 | is returned, often called NULL in other programming languages)."""))
|
|---|
| 4300 |
|
|---|
| 4301 | AddResultMetadata(ModelEvaluation.PlotROCOfBinaryClassificationModel, u'tn',
|
|---|
| 4302 | typeMetadata=FloatTypeMetadata(canBeNone=True),
|
|---|
| 4303 | description=_(
|
|---|
| 4304 | u"""The number of true negatives obtained for the cutoff value that
|
|---|
| 4305 | was used. If no cutoff was used, no value is returned (in Python, None
|
|---|
| 4306 | is returned, often called NULL in other programming languages)."""))
|
|---|
| 4307 |
|
|---|
| 4308 | AddResultMetadata(ModelEvaluation.PlotROCOfBinaryClassificationModel, u'auc',
|
|---|
| 4309 | typeMetadata=FloatTypeMetadata(),
|
|---|
| 4310 | description=_(u"""The area under the ROC curve."""))
|
|---|
| 4311 |
|
|---|
| 4312 | AddResultMetadata(ModelEvaluation.PlotROCOfBinaryClassificationModel, u'mxe',
|
|---|
| 4313 | typeMetadata=FloatTypeMetadata(canBeNone=True),
|
|---|
| 4314 | description=_(
|
|---|
| 4315 | u"""Mean cross-entropy for the model, if the model is probabilistic
|
|---|
| 4316 | (the respose ranges from 0 to 1), or None otherwise. MXE := - 1/(P+N)
|
|---|
| 4317 | sum_{y_i=+} ln(yhat_i) + sum_{y_i=-} ln(1-yhat_i)."""))
|
|---|
| 4318 |
|
|---|
| 4319 | AddResultMetadata(ModelEvaluation.PlotROCOfBinaryClassificationModel, u'prbe',
|
|---|
| 4320 | typeMetadata=FloatTypeMetadata(canBeNone=True),
|
|---|
| 4321 | description=_(
|
|---|
| 4322 | u"""Precision-recall break-even point for the model, which is the
|
|---|
| 4323 | cutoff where precision and recall are equal. At this point, positive
|
|---|
| 4324 | and negative predictions are made at the same rate as their prevalence
|
|---|
| 4325 | in the data. In the event that there are multiple break-even points,
|
|---|
| 4326 | the cutoff for the last one will be returned. If there is no such
|
|---|
| 4327 | point, None will be returned. (I am not actually sure this is
|
|---|
| 4328 | possible)."""))
|
|---|
| 4329 |
|
|---|
| 4330 | AddResultMetadata(ModelEvaluation.PlotROCOfBinaryClassificationModel, u'rmse',
|
|---|
| 4331 | typeMetadata=FloatTypeMetadata(canBeNone=True),
|
|---|
| 4332 | description=_(
|
|---|
| 4333 | u"""Root-mean-squared error, if the response variable is a numeric
|
|---|
| 4334 | value (rather than a categorical string), or None otherwise. RMSE :=
|
|---|
| 4335 | sqrt(1/(P+N) sum_i (y_i - yhat_i)^2)."""))
|
|---|
| 4336 |
|
|---|
| 4337 | # Public method: ModelEvaluation.RandomlySplitArcGISTableIntoTrainingAndEvaluationRecords
|
|---|
| 4338 |
|
|---|
| 4339 | AddMethodMetadata(ModelEvaluation.RandomlySplitArcGISTableIntoTrainingAndEvaluationRecords,
|
|---|
| 4340 | shortDescription=_(u'Randomly designates the records of a table as either training records (for fitting a statistical model) or evaluation records (for evaluating the model).'),
|
|---|
| 4341 | isExposedToPythonCallers=True,
|
|---|
| 4342 | isExposedByCOM=True,
|
|---|
| 4343 | isExposedAsArcGISTool=True,
|
|---|
| 4344 | arcGISDisplayName=_(u'Randomly Split Table Into Training and Evaluation Records'),
|
|---|
| 4345 | arcGISToolCategory=_(u'Statistics\\Model Data\\Evaluate Model Performance'),
|
|---|
| 4346 | dependencies=[ArcGISDependency(9, 1)])
|
|---|
| 4347 |
|
|---|
| 4348 | CopyArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'cls', ModelEvaluation.RandomlySplitArcGISTableIntoTrainingAndEvaluationRecords, u'cls')
|
|---|
| 4349 |
|
|---|
| 4350 | AddArgumentMetadata(ModelEvaluation.RandomlySplitArcGISTableIntoTrainingAndEvaluationRecords, u'table',
|
|---|
| 4351 | typeMetadata=ArcGISTableViewTypeMetadata(mustExist=True),
|
|---|
| 4352 | description=_(
|
|---|
| 4353 | u"""ArcGIS table, table view, feature class, or feature layer."""),
|
|---|
| 4354 | arcGISDisplayName=_(u'Table'))
|
|---|
| 4355 |
|
|---|
| 4356 | AddArgumentMetadata(ModelEvaluation.RandomlySplitArcGISTableIntoTrainingAndEvaluationRecords, u'percentEvaluation',
|
|---|
| 4357 | typeMetadata=FloatTypeMetadata(minValue=0.0, maxValue=100.0),
|
|---|
| 4358 | description=_(
|
|---|
| 4359 | u"""Percent of the table's records to randomly designate evaluation records.
|
|---|
| 4360 | The remaining records will be designated training records.
|
|---|
| 4361 |
|
|---|
| 4362 | A popular ratio of training to evaluation is 2 to 1, but a different ratio
|
|---|
| 4363 | may be appropriate depending on the total number of records and the
|
|---|
| 4364 | objective of your study. Please consult the scientific literature for
|
|---|
| 4365 | advice. The following article is a good place to start.
|
|---|
| 4366 |
|
|---|
| 4367 | Guisan, A., and Zimmerman, N.E. 2000. Predictive habitat distribution
|
|---|
| 4368 | models in ecology. Ecological Modeling 135: 147-186."""),
|
|---|
| 4369 | arcGISDisplayName=_(u'Percentage of evaluation records'))
|
|---|
| 4370 |
|
|---|
| 4371 | AddArgumentMetadata(ModelEvaluation.RandomlySplitArcGISTableIntoTrainingAndEvaluationRecords, u'field',
|
|---|
| 4372 | typeMetadata=UnicodeStringTypeMetadata(),
|
|---|
| 4373 | description=_(
|
|---|
| 4374 | u"""Field to be randomly assigned the value 0 for training records and
|
|---|
| 4375 | 1 for evaluation records.
|
|---|
| 4376 |
|
|---|
| 4377 | If the field already exists, it must have the SHORT, LONG, FLOAT, or
|
|---|
| 4378 | DOUBLE data type. If it does not exist, it will be created with the
|
|---|
| 4379 | SHORT data type. If you do not provide a field name, the name
|
|---|
| 4380 | IsEvalData will be used."""),
|
|---|
| 4381 | arcGISDisplayName=_(u'Field to assign'))
|
|---|
| 4382 |
|
|---|
| 4383 | AddArgumentMetadata(ModelEvaluation.RandomlySplitArcGISTableIntoTrainingAndEvaluationRecords, u'where',
|
|---|
| 4384 | typeMetadata=SQLWhereClauseTypeMetadata(canBeNone=True),
|
|---|
| 4385 | description=ArcGIS91SelectCursor.__init__.__doc__.Obj.GetArgumentByName(u'where').Description,
|
|---|
| 4386 | arcGISParameterDependencies=[u'table'],
|
|---|
| 4387 | arcGISDisplayName=_(u'Where clause'))
|
|---|
| 4388 |
|
|---|
| 4389 | AddResultMetadata(ModelEvaluation.RandomlySplitArcGISTableIntoTrainingAndEvaluationRecords, u'updatedTable',
|
|---|
| 4390 | typeMetadata=ArcGISTableViewTypeMetadata(),
|
|---|
| 4391 | description=_(u'Updated table.'),
|
|---|
| 4392 | arcGISDisplayName=_(u'Updated table'),
|
|---|
| 4393 | arcGISParameterDependencies=[u'table'])
|
|---|
| 4394 |
|
|---|
| 4395 | ###############################################################################
|
|---|
| 4396 | # Names exported by this module
|
|---|
| 4397 | ###############################################################################
|
|---|
| 4398 |
|
|---|
| 4399 | __all__ = ['GLM', 'GAM', 'TreeModel', 'LinearMixedModel', 'ModelEvaluation']
|
|---|