root/MGET/Branches/Jason/PythonPackage/src/GeoEco/Statistics/Modeling.py @ 947

Revision 947, 232.8 KB (checked in by jjr8, 15 months ago)

Fixed #538: When emf is selected as the output format, Fit GLM, Fit GAM, Plot ROC, and other tools fail with ValueError?: The file C:\Temp\GeoEcoTemp?_Matt\tmphgchvj\output_term01.png, specified for the sourceFile parameter, does not exist. Please specify an existing file.

Line 
1# Modeling.py - Provides methods for modeling and prediction.
2#
3# Copyright (C) 2007 Jason J. Roberts and Ben D. Best
4#
5# This program is free software; you can redistribute it and/or
6# modify it under the terms of the GNU General Public License
7# as published by the Free Software Foundation; either version 2
8# of the License, or (at your option) any later version.
9#
10# This program is distributed in the hope that it will be useful,
11# but WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13# GNU General Public License (available in the file LICENSE.TXT)
14# for more details.
15#
16# You should have received a copy of the GNU General Public License
17# along with this program; if not, write to the Free Software
18# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
19
20import copy
21import glob
22import os.path
23import random
24import sys
25import types
26
27from GeoEco.ArcGIS import GeoprocessorManager
28from GeoEco.DatabaseAccess.ArcGIS import ArcGIS91DatabaseConnection
29from GeoEco.DataManagement.ArcGISRasters import ArcGISRaster
30from GeoEco.DataManagement.Directories import TemporaryDirectory
31from GeoEco.DataManagement.Files import File
32from GeoEco.DynamicDocString import DynamicDocString
33from GeoEco.Internationalization import _
34from GeoEco.Logging import Logger
35from GeoEco.R import R, RPackageDependency
36
37# Public classes
38
39class GLM(object):
40    __doc__ = DynamicDocString()
41
42    @classmethod
43    def FitToArcGISTable(cls, inputTable, outputModelFile, formula,
44                         family=u'gaussian', where=None, link=None, variance=None,
45                         xColumnName=None, yColumnName=None, zColumnName=None, mColumnName=None,
46                         selectionMethod=None, logSelectionDetails=True,
47                         writeSummaryFile=True, writeDiagnosticPlots=True, numDiagLabels=3, diagLabelField=None, writeTermPlots=True, residuals=False, xAxis=True, commonScale=True, plotFileFormat=u'png', res=1000., width=3000., height=3000., pointSize=10.0, bg=u'white',
48                         overwriteExisting=False):
49        cls.__doc__.Obj.ValidateMethodInvocation()
50
51        # If the caller requested emf format for the plot files,
52        # convert the width and height from thousands of an inch to
53        # inches.
54
55        if plotFileFormat == u'emf':
56            width = width / 1000
57            height = height / 1000
58
59        # Load the table into a temporary data frame.
60
61        if diagLabelField is not None:
62            extraFieldsToLoad = [diagLabelField]
63        else:
64            extraFieldsToLoad = None
65
66        dataFrameName, xColumnParam, yColumnParam, zColumnParam, mColumnParam, coordinateSystemParam, familyParam = _LoadDataFrameForModelFitting(inputTable, {_(u'formula') : formula}, {_(u'formula') : True}, family, link, variance, None, where, xColumnName, yColumnName, zColumnName, mColumnName, extraFieldsToLoad)
67
68        # Fit the GLM and create the output files in the temp
69        # directory.
70
71        r = R.GetInterpreter()
72       
73        try:
74            R.EvaluateFile(os.path.join(os.path.dirname(sys.modules[cls.__module__].__file__), 'SummarizeModel.r'), False)
75            R.EvaluateFile(os.path.join(os.path.dirname(sys.modules[cls.__module__].__file__), 'FitGLMForDataframe.r'), False)
76
77            tempDir = TemporaryDirectory()
78            tempOutputFile = os.path.join(tempDir.Path, u'output')
79
80            if selectionMethod is None:
81                selectionMethodParam = u'NULL'
82            else:
83                selectionMethodParam = u'"' + selectionMethod + u'"'
84
85            if diagLabelField is None:
86                diagLabelField = u'NULL'
87            else:
88                diagLabelField = u'"' + diagLabelField + u'"'
89
90            numTermPlots = r('FitGLMForDataframe(f=%s, d=%s, fam=%s, outputModelFile="%s", xVar=%s, yVar=%s, zVar=%s, mVar=%s, coordinateSystem=%s, selectionMethod=%s, logSelectionDetails=%s, writeSummaryFile=%s, writeDiagnosticPlots=%s, numDiagLabels=%i, diagLabelField=%s, writeTermPlots=%s, partial.resid=%s, xAxis=%s, commonScale=%s, plotFileFormat="%s", res=%f, width=%f, height=%f, pointSize=%f, bg="%s")' % (formula, dataFrameName, familyParam, tempOutputFile.replace('\\', '\\\\'), xColumnParam, yColumnParam, zColumnParam, mColumnParam, coordinateSystemParam, selectionMethodParam, str(logSelectionDetails).upper(), str(writeSummaryFile).upper(), str(writeDiagnosticPlots).upper(), numDiagLabels, diagLabelField, str(writeTermPlots).upper(), str(residuals).upper(), str(xAxis).upper(), str(commonScale).upper(), plotFileFormat, res, width, height, pointSize, bg))
91
92            # Move the temporary output files to the requested
93            # location.
94           
95            File.MoveSilent(tempOutputFile, outputModelFile, overwriteExisting=overwriteExisting)
96
97            outputFilePrefix = os.path.splitext(outputModelFile)[0]
98           
99            if writeSummaryFile:
100                File.MoveSilent(os.path.join(tempDir.Path, u'output_summary.txt'), outputFilePrefix + u'_summary.txt', overwriteExisting=overwriteExisting)
101
102            if writeDiagnosticPlots and os.path.isfile(os.path.join(tempDir.Path, u'output_resid_fit.%s' % plotFileFormat)):
103                File.MoveSilent(os.path.join(tempDir.Path, u'output_resid_fit.%s' % plotFileFormat), outputFilePrefix + u'_resid_fit.%s' % plotFileFormat, overwriteExisting=overwriteExisting)
104                File.MoveSilent(os.path.join(tempDir.Path, u'output_qq.%s' % plotFileFormat), outputFilePrefix + u'_qq.%s' % plotFileFormat, overwriteExisting=overwriteExisting)
105                File.MoveSilent(os.path.join(tempDir.Path, u'output_scale_loc.%s' % plotFileFormat), outputFilePrefix + u'_scale_loc.%s' % plotFileFormat, overwriteExisting=overwriteExisting)
106                File.MoveSilent(os.path.join(tempDir.Path, u'output_cooks.%s' % plotFileFormat), outputFilePrefix + u'_cooks.%s' % plotFileFormat, overwriteExisting=overwriteExisting)
107                File.MoveSilent(os.path.join(tempDir.Path, u'output_resid_lev.%s' % plotFileFormat), outputFilePrefix + u'_resid_lev.%s' % plotFileFormat, overwriteExisting=overwriteExisting)
108                File.MoveSilent(os.path.join(tempDir.Path, u'output_cooks_lev.%s' % plotFileFormat), outputFilePrefix + u'_cooks_lev.%s' % plotFileFormat, overwriteExisting=overwriteExisting)
109
110            if writeTermPlots and numTermPlots > 0:
111                for i in range(int(numTermPlots)):
112                    File.MoveSilent(os.path.join(tempDir.Path, u'output_term%02i.%s' % (i+1, plotFileFormat)), outputFilePrefix + (u'_term%02i.%s' % (i+1, plotFileFormat)), overwriteExisting=overwriteExisting)
113
114        # Delete the data frame.
115       
116        finally:
117            r('if (exists("%s")) rm("%s")' % (dataFrameName, dataFrameName))
118
119    @classmethod
120    def PredictFromArcGISRasters(cls, inputModelFile, inputPredictorRasters, variableNames, outputResponseRaster, templateRaster=None, resamplingTechniques=None, ignoreOutOfRangeValues=True, outputErrorRaster=None, outputBinaryResponseRaster=None, cutoff=None, buildPyramids=False, overwriteExisting=False):
121        cls.__doc__.Obj.ValidateMethodInvocation()
122        _PredictFromArcGISRasters('glm', _(u'Fit GLM'), inputModelFile, inputPredictorRasters, variableNames, outputResponseRaster, templateRaster, resamplingTechniques, ignoreOutOfRangeValues, outputErrorRaster, outputBinaryResponseRaster, cutoff, buildPyramids, overwriteExisting)
123
124
125class GAM(object):
126    __doc__ = DynamicDocString()
127
128    @classmethod
129    def FitToArcGISTable(cls, inputTable, outputModelFile, formula, family=u'gaussian', rPackage=u'mgcv',
130                         where=None, link=None, variance=None, theta=None, method=u'GCV.Cp', optimizer=u'outer', alternativeOptimizer=u'newton',
131                         xColumnName=None, yColumnName=None, zColumnName=None, mColumnName=None,
132                         selectionMethod=None, logSelectionDetails=True,
133                         writeSummaryFile=True, writeDiagnosticPlots=True, writeTermPlots=True, residuals=False, xAxis=True, commonScale=True, plotFileFormat=u'png', res=1000., width=3000., height=3000., pointSize=10.0, bg=u'white',
134                         overwriteExisting=False):
135        cls.__doc__.Obj.ValidateMethodInvocation()
136
137        # Perform additional parameter validation.
138
139        if rPackage == u'mgcv' and selectionMethod is not None:
140            Logger.RaiseException(ValueError(_(u'Automated model selection is not currently supported when the GAM is fitted using the R mgcv package. To perform automated model selection, you must use the gam package. As an alternative, you can use the mgcv package and use splines with "shrinkage". See the documentation for the Automated Model Selection Method parameter for more information. ')))
141
142        if family == u'negbin':
143            if rPackage != u'mgcv':
144                Logger.RaiseException(ValueError(_(u'The "negbin" (negative binomial) model family is only available when the GAM is fitted using the R mgcv package.')))
145
146            if theta is None:
147                Logger.RaiseException(ValueError(_(u'The "negbin" (negative binomial) model family was specifed but the theta parameter was omitted. You must specify a value for the theta parameter when using the negbin model family.')))
148
149        # If the caller requested emf format for the plot files,
150        # convert the width and height from thousands of an inch to
151        # inches.
152
153        if plotFileFormat == u'emf':
154            width = width / 1000
155            height = height / 1000
156
157        # Load the R package.
158
159        dependency = RPackageDependency(rPackage)
160        dependency.Initialize()
161
162        # Load the table into a temporary data frame.
163
164        dataFrameName, xColumnParam, yColumnParam, zColumnParam, mColumnParam, coordinateSystemParam, familyParam = _LoadDataFrameForModelFitting(inputTable, {_(u'formula') : formula}, {_(u'formula') : True}, family, link, variance, theta, where, xColumnName, yColumnName, zColumnName, mColumnName)
165
166        # Fit the GLM and create the output files in the temp
167        # directory.
168
169        r = R.GetInterpreter()
170       
171        try:
172            R.EvaluateFile(os.path.join(os.path.dirname(sys.modules[cls.__module__].__file__), 'SummarizeModel.r'), False)
173            R.EvaluateFile(os.path.join(os.path.dirname(sys.modules[cls.__module__].__file__), 'FitGAMForDataframe.r'), False)
174
175            tempDir = TemporaryDirectory()
176            tempOutputFile = os.path.join(tempDir.Path, u'output')
177
178            if method is None:
179                methodParam = u'NULL'
180            else:
181                methodParam = u'"' + method + u'"'
182
183            if optimizer is None:
184                optimizerParam = u'NULL'
185            elif optimizer != u'outer':
186                optimizerParam = u'"' + optimizer + u'"'
187            else:
188                if alternativeOptimizer is None:
189                    alternativeOptimizer = u'newton'
190                optimizerParam = u'c("' + optimizer + u'", "' + alternativeOptimizer + u'")'
191
192            if selectionMethod is None:
193                selectionMethodParam = u'NULL'
194            else:
195                selectionMethodParam = u'"' + selectionMethod + u'"'
196
197            if rPackage != u'mgcv':
198                writeDiagnosticPlots = False
199
200            numTermPlots = r('FitGAMForDataframe(f=%s, d=%s, fam=%s, rPackage="%s", outputModelFile="%s", method=%s, optimizer=%s, xVar=%s, yVar=%s, zVar=%s, mVar=%s, coordinateSystem=%s, selectionMethod=%s, logSelectionDetails=%s, writeSummaryFile=%s, writeDiagnosticPlots=%s, writeTermPlots=%s, partial.resid=%s, xAxis=%s, commonScale=%s, plotFileFormat="%s", res=%f, width=%f, height=%f, pointSize=%f, bg="%s")' %
201                             (formula, dataFrameName, familyParam, rPackage, tempOutputFile.replace('\\', '\\\\'),
202                              methodParam, optimizerParam,
203                              xColumnParam, yColumnParam, zColumnParam, mColumnParam, coordinateSystemParam,
204                              selectionMethodParam, str(logSelectionDetails).upper(),
205                              str(writeSummaryFile).upper(), str(writeDiagnosticPlots).upper(), str(writeTermPlots).upper(), str(residuals).upper(), str(xAxis).upper(), str(commonScale).upper(), plotFileFormat, res, width, height, pointSize, bg))
206
207            # Move the temporary output files to the requested
208            # location.
209           
210            File.MoveSilent(tempOutputFile, outputModelFile, overwriteExisting=overwriteExisting)
211
212            outputFilePrefix = os.path.splitext(outputModelFile)[0]
213           
214            if writeSummaryFile:
215                File.MoveSilent(os.path.join(tempDir.Path, u'output_summary.txt'), outputFilePrefix + u'_summary.txt', overwriteExisting=overwriteExisting)
216
217            if writeDiagnosticPlots and os.path.isfile(os.path.join(tempDir.Path, u'output_diag.%s' % plotFileFormat)):
218                File.MoveSilent(os.path.join(tempDir.Path, u'output_diag.%s' % plotFileFormat), outputFilePrefix + u'_diag.%s' % plotFileFormat, overwriteExisting=overwriteExisting)
219
220            if writeTermPlots and numTermPlots > 0:
221                for i in range(int(numTermPlots)):
222                    File.MoveSilent(os.path.join(tempDir.Path, u'output_term%02i.%s' % (i+1, plotFileFormat)), outputFilePrefix + (u'_term%02i.%s' % (i+1, plotFileFormat)), overwriteExisting=overwriteExisting)
223
224        # Delete the data frame.
225       
226        finally:
227            r('if (exists("%s")) rm("%s")' % (dataFrameName, dataFrameName))
228
229    @classmethod
230    def PredictFromArcGISRasters(cls, inputModelFile, inputPredictorRasters, variableNames, outputResponseRaster, templateRaster=None, resamplingTechniques=None, ignoreOutOfRangeValues=True, outputErrorRaster=None, outputBinaryResponseRaster=None, cutoff=None, buildPyramids=False, overwriteExisting=False):
231        cls.__doc__.Obj.ValidateMethodInvocation()
232        _PredictFromArcGISRasters('gam', _(u'Fit GAM'), inputModelFile, inputPredictorRasters, variableNames, outputResponseRaster, templateRaster, resamplingTechniques, ignoreOutOfRangeValues, outputErrorRaster, outputBinaryResponseRaster, cutoff, buildPyramids, overwriteExisting)
233
234    @classmethod
235    def BayesPredictFromArcGISRasters(cls, inputModelFile, inputPredictorRasters, variableNames, thresholds, outputProbabilityRasters, templateRaster=None, resamplingTechniques=None, ignoreOutOfRangeValues=True, samples=1000, chunks=100, buildPyramids=False, overwriteExisting=False):
236        cls.__doc__.Obj.ValidateMethodInvocation()
237
238        # Perform additional parameter validation.
239
240        inputPredictorRastersLower = map(lambda s: unicode(s).lower(), inputPredictorRasters)
241        for i in range(len(outputProbabilityRasters)):
242            raster = outputProbabilityRasters[i].lower()
243            if raster in inputPredictorRastersLower:
244                Logger.RaiseException(ValueError(_(u'The output raster %(out)s also appears in the list of input predictor rasters. This is not allowed. Please specify a different output raster or remove it from the list of input predictor rasters.') % {u'out': outputProbabilityRasters[i]}))
245
246        # Load the fitted model.
247
248        r = R.GetInterpreter()
249        gp = GeoprocessorManager.GetWrappedGeoprocessor()
250
251        r('load("%s")' % inputModelFile.replace('\\', '\\\\'))
252        try:
253            if not r('exists("model")'):
254                Logger.RaiseException(ValueError(_('The input model file %(file)s does not contain a variable called "model", indicating that it was not generated by the Fit GAM tool. Please provide a file that was generated by the Fit GAM tool.') % {u'file': inputModelFile}))
255            if 'gam' not in r('class(model)'):
256                Logger.RaiseException(ValueError(_('The input model file %(file)s contains a variable called "model" but it is not a GAM, indicating that it was not generated by the Fit GAM tool. Please provide a file that was generated by the Fit GAM tool.') % {u'file': inputModelFile}))
257            if not r('exists("rPackage")') or r['rPackage'] != 'mgcv':
258                Logger.RaiseException(ValueError(_('The input model in file %(file)s was fitted by the R %(pkg)s package, but this tool requires that the model be fitted with the mgcv package. Please refit the model using the mgcv package and try again.') % {u'file': inputModelFile, u'pkg': r['rPackage']}))
259
260            Logger.Info(_(u'Loaded %(type)s model from %(file)s. The model was fitted with the R %(pkg)s package.') % {u'type': r('class(model)[1]').upper(), u'file': inputModelFile, u'pkg': r['rPackage']})
261
262            # Log a summery of the model, to remind the user what it is.
263
264            R.EvaluateFile(os.path.join(os.path.dirname(sys.modules[GLM.__module__].__file__), 'SummarizeModel.r'), False)
265
266            r('writeLines("")')
267            r('writeLines("MODEL SUMMARY:")')
268            r('writeLines("==============")')
269            r('writeLines(SummarizeModel(model))')
270            r('writeLines("")')
271
272            if r('model$family$family') != 'binomial' or r('model$family$link') != 'logit':
273                Logger.RaiseException(ValueError(_('The input model in the file %(file)s is of the %(fam)s family and uses the %(link)s link function. This tool requires the model to be of the binomial family and use the logit link function. Please refit the model and try again. If this is not appropriate for your modeling problem, please contact the author of this tool for assistance.') % {u'file': inputModelFile, u'pkg': r['rPackage'], u'fam': r('model$family$family'), u'link': r('model$family$link')}))
274           
275            # Prepare the input rasters for the prediction.
276
277            tempDir = TemporaryDirectory()
278
279            _PreparePredictorRasters(r, gp, tempDir, variableNames, inputPredictorRasters, templateRaster, resamplingTechniques)
280
281            # Do the prediction. The prediction rasters are
282            # returned as ArcInfo ASCII grids because rgdal cannot
283            # write rasters (ArcInfo binary grids).
284            #
285            # Force the logging system to log R Info messages as
286            # Debug so that the user is not spammed with "Closing
287            # GDAL dataset handle" messages. There appears to be
288            # no way to suppress these messages from within R.
289
290            R.EvaluateFile(os.path.join(os.path.dirname(sys.modules[GLM.__module__].__file__), 'BayesPredictGAMForArcGISRasters.r'), False)
291
292            oldLoggingLevel = r.LogInfoAsDebug
293            r.LogInfoAsDebug = True
294            try:
295                if chunks is not None:
296                    chunks = str(chunks)
297                else:
298                    chunks = 'NULL'
299               
300                for i in range(len(outputProbabilityRasters)):
301                    Logger.Info(_(u'Predicting for threshold %g...') % thresholds[i])
302                    r('BayesPredictGAMForArcGISRasters(model, rastersForPredictors, %g, "%s", %s, %i, %s)' % (thresholds[i], os.path.join(tempDir.Path, u'temp_output%i.txt' % i).replace('\\', '\\\\'), str(ignoreOutOfRangeValues).upper(), samples, chunks))
303            finally:
304                r.LogInfoAsDebug = oldLoggingLevel
305
306            # Convert the ArcInfo ASCII grids to rasters.
307
308            Logger.Info(_(u'Creating outputs...'))
309
310            for i in range(len(outputProbabilityRasters)):
311                tempOutputRaster = os.path.join(tempDir.Path, u'output%i' % i)
312                gp.ASCIIToRaster_Conversion(os.path.join(tempDir.Path, u'temp_output%i.txt' % i), tempOutputRaster, u'FLOAT')
313                gp.DefineProjection_management(tempOutputRaster, coordinateSystem)          # TODO: Fix this
314                if buildPyramids:
315                    gp.BuildPyramids_Management(tempOutputRaster)
316
317            # Copy the output rasters to the requested destinations.
318
319            for i in range(len(outputProbabilityRasters)):
320                ArcGISRaster.CopySilent(os.path.join(tempDir.Path, u'output%i' % i), outputProbabilityRasters[i], overwriteExisting=overwriteExisting)
321
322        # Delete R variables assigned by this function.
323       
324        finally:
325            r('if (exists("rastersForPredictors")) rm("rastersForPredictors")')
326            r('if (exists("model")) rm("model")')
327            r('if (exists("rPackage")) rm("rPackage")')
328            r('if (exists("xVar")) rm("xVar")')
329            r('if (exists("yVar")) rm("yVar")')
330            r('if (exists("zVar")) rm("zVar")')
331            r('if (exists("mVar")) rm("mVar")')
332            r('if (exists("coordinateSystem")) rm("coordinateSystem")')
333
334
335class TreeModel(object):
336    __doc__ = DynamicDocString()
337
338    @classmethod
339    def FitToArcGISTable(cls, inputTable, outputModelFile, formula, method,
340                         where=None, allowMissingCovariates=True, minSplit=20, minBucket=7, cp=0.01, maxCompete=4, maxSurrogate=5, useSurrogate=2, surrogateStyle=0, xval=1000, maxDepth=30, pruningMethod=None, pruningCP=None,
341                         xColumnName=None, yColumnName=None, zColumnName=None, mColumnName=None,
342                         writeSummaryFile=True, writeDiagnosticPlots=True, writeTreePlot=True, writePrunedTreePlot=True, plotFileFormat=u'png', res=1000., width=3000., height=3000., pointSize=10.0, bg=u'white',
343                         treePlotType=0, extra=1, percentage=True, under=True, clipRightLabels=True, fallenLeaves=False, branchType=0, branch=0.2, uniform=True, digits=2, varlen=0, faclen=0, cex=None, tweak=1., compress=True, ycompress=True,
344                         overwriteExisting=False):
345        cls.__doc__.Obj.ValidateMethodInvocation()
346
347        # Perform additional parameter validation.
348
349        if pruningMethod == u'user specified cp' and pruningCP is None:
350            Logger.RaiseException(ValueError(_(u'The Pruning Method is \'User specified\' but no Complexity Parameter for Pruning was provided. Please provide a complexity parameter for pruning or select a different Pruning Method.')))
351
352        # If the caller requested emf format for the plot files,
353        # convert the width and height from thousands of an inch to
354        # inches.
355
356        if plotFileFormat == u'emf':
357            width = width / 1000
358            height = height / 1000
359
360        # Load the table into a temporary data frame.
361
362        dataFrameName, xColumnParam, yColumnParam, zColumnParam, mColumnParam, coordinateSystemParam = _LoadDataFrameForModelFitting(inputTable, {_(u'formula') : formula}, {_(u'formula') : True}, None, None, None, None, where, xColumnName, yColumnName, zColumnName, mColumnName)[:-1]     # We do not need the familyParam
363
364        # Fit the tree model and create the output files in the temp
365        # directory.
366
367        r = R.GetInterpreter()
368       
369        try:
370            R.EvaluateFile(os.path.join(os.path.dirname(sys.modules[cls.__module__].__file__), 'FitTreeModelForDataframe.r'), False)
371
372            tempDir = TemporaryDirectory()
373            tempOutputFile = os.path.join(tempDir.Path, u'output')
374
375            if pruningMethod is None:
376                pruningMethod = u'NULL'
377            else:
378                pruningMethod = u'"' + pruningMethod + u'"'
379
380            if pruningCP is None:
381                pruningCP = u'NULL'
382            else:
383                pruningCP = repr(pruningCP)
384
385            if cex is None:
386                cex = u'NULL'
387            else:
388                cex = repr(cex)
389
390            r('FitTreeModelForDataframe(f=%s, d=%s, outputModelFile="%s", method="%s", allowMissingCovariates=%s, minSplit=%i, minBucket=%i, cp=%f, maxCompete=%i, maxSurrogate=%i, useSurrogate=%i, surrogateStyle=%i, xval=%i, maxDepth=%i, pruningMethod=%s, pruningCP=%s, xVar=%s, yVar=%s, zVar=%s, mVar=%s, coordinateSystem=%s, writeSummaryFile=%s, writeDiagnosticPlots=%s, writeTreePlot=%s, writePrunedTreePlot=%s, plotFileFormat="%s", res=%f, width=%f, height=%f, pointSize=%f, bg="%s", treePlotType=%i, extra=%i, percentage=%s, under=%s, clipRightLabels=%s, fallenLeaves=%s, branchType=%i, branch=%f, uniform=%s, digits=%i, varlen=%i, faclen=%i, cex=%s, tweak=%f, compress=%s, ycompress=%s)' %
391              (formula, dataFrameName, tempOutputFile.replace('\\', '\\\\'), method,
392               str(allowMissingCovariates).upper(), minSplit, minBucket, cp, maxCompete, maxSurrogate, useSurrogate, surrogateStyle, xval, maxDepth, pruningMethod, pruningCP,
393               xColumnParam, yColumnParam, zColumnParam, mColumnParam, coordinateSystemParam,
394               str(writeSummaryFile).upper(), str(writeDiagnosticPlots).upper(), str(writeTreePlot).upper(), str(writePrunedTreePlot).upper(), plotFileFormat, res, width, height, pointSize, bg,
395               treePlotType, extra, str(percentage).upper(), str(under).upper(), str(clipRightLabels).upper(), str(fallenLeaves).upper(), branchType, branch, str(uniform).upper(), digits, varlen, faclen, cex, tweak, str(compress).upper(), str(ycompress).upper()))
396
397            # Move the temporary output files to the requested
398            # location.
399           
400            File.MoveSilent(tempOutputFile, outputModelFile, overwriteExisting=overwriteExisting)
401
402            outputFilePrefix = os.path.splitext(outputModelFile)[0]
403           
404            if writeSummaryFile:
405                File.MoveSilent(os.path.join(tempDir.Path, u'output_summary.txt'), outputFilePrefix + u'_summary.txt', overwriteExisting=overwriteExisting)
406
407            if writeDiagnosticPlots:
408                if os.path.isfile(os.path.join(tempDir.Path, u'output_cp.' + plotFileFormat)):
409                    File.MoveSilent(os.path.join(tempDir.Path, u'output_cp.' + plotFileFormat), outputFilePrefix + u'_cp.' + plotFileFormat, overwriteExisting=overwriteExisting)
410
411                if os.path.isfile(os.path.join(tempDir.Path, u'output_rsquare.' + plotFileFormat)):
412                    File.MoveSilent(os.path.join(tempDir.Path, u'output_rsquare.' + plotFileFormat), outputFilePrefix + u'_rsquare.' + plotFileFormat, overwriteExisting=overwriteExisting)
413
414                if os.path.isfile(os.path.join(tempDir.Path, u'output_residuals.' + plotFileFormat)):
415                    File.MoveSilent(os.path.join(tempDir.Path, u'output_residuals.' + plotFileFormat), outputFilePrefix + u'_residuals.' + plotFileFormat, overwriteExisting=overwriteExisting)
416
417                if os.path.isfile(os.path.join(tempDir.Path, u'output_pruned_residuals.' + plotFileFormat)):
418                    File.MoveSilent(os.path.join(tempDir.Path, u'output_pruned_residuals.' + plotFileFormat), outputFilePrefix + u'_pruned_residuals.' + plotFileFormat, overwriteExisting=overwriteExisting)
419
420            if writeTreePlot and os.path.isfile(os.path.join(tempDir.Path, u'output_unpruned_tree.' + plotFileFormat)):
421                File.MoveSilent(os.path.join(tempDir.Path, u'output_unpruned_tree.' + plotFileFormat), outputFilePrefix + u'_unpruned_tree.' + plotFileFormat, overwriteExisting=overwriteExisting)
422
423            if pruningMethod != u'NULL' and writePrunedTreePlot and os.path.isfile(os.path.join(tempDir.Path, u'output_pruned_tree.' + plotFileFormat)):
424                File.MoveSilent(os.path.join(tempDir.Path, u'output_pruned_tree.' + plotFileFormat), outputFilePrefix + u'_pruned_tree.' + plotFileFormat, overwriteExisting=overwriteExisting)
425
426        # Delete the data frame.
427       
428        finally:
429            r('if (exists("%s")) rm("%s")' % (dataFrameName, dataFrameName))
430
431    @classmethod
432    def PredictFromArcGISRasters(cls, inputModelFile, inputPredictorRasters, variableNames, outputResponseRaster, templateRaster=None, resamplingTechniques=None, ignoreOutOfRangeValues=True, buildPyramids=False, overwriteExisting=False):
433        cls.__doc__.Obj.ValidateMethodInvocation()
434        _PredictFromArcGISRasters('rpart', _(u'Fit Tree Model'), inputModelFile, inputPredictorRasters, variableNames, outputResponseRaster, templateRaster, resamplingTechniques, ignoreOutOfRangeValues, None, None, None, buildPyramids, overwriteExisting)
435
436
437class LinearMixedModel(object):
438    __doc__ = DynamicDocString()
439
440    @classmethod
441    def FitToArcGISTable(cls, inputTable, outputModelFile, fixedFormula, randomFormula,
442                         where=None, method=u'REML', correlationStructure=None, correlationFormula=None, range_=None, nugget=None, metric=None, fixed=False,
443                         xColumnName=None, yColumnName=None, zColumnName=None, mColumnName=None,
444                         writeSummaryFile=True, writeDiagnosticPlots=True, plotFileFormat=u'png', res=1000., width=3000., height=3000., pointSize=10.0, bg=u'white',
445                         overwriteExisting=False):
446        cls.__doc__.Obj.ValidateMethodInvocation()
447
448        # Perform additional validation.
449
450        if correlationStructure is not None and correlationFormula is None or correlationStructure is None and correlationFormula is not None:
451            Logger.RaiseException(ValueError(_('To fit a model that includes for within-group correlation, you must specify both the a correlation structure and a correlation formula. If you do not wish to include within-group correlation, you must omit both of those parameters.')))
452
453        if correlationStructure is not None and range_ is None and nugget is not None:
454            Logger.RaiseException(ValueError(_('If the Nugget parameter is specified, the Range parameter must also be specified.')))
455
456        # If the caller requested emf format for the plot files,
457        # convert the width and height from thousands of an inch to
458        # inches.
459
460        if plotFileFormat == u'emf':
461            width = width / 1000
462            height = height / 1000
463
464        # Load the table into a temporary data frame.
465
466        dataFrameName, xColumnParam, yColumnParam, zColumnParam, mColumnParam, coordinateSystemParam, familyParam = _LoadDataFrameForModelFitting(inputTable, {_(u'fixed effects formula') : fixedFormula, _(u'random effects formula') : randomFormula, _(u'correlation formula') : correlationFormula}, {_(u'fixed effects formula') : True, _(u'random effects formula') : False, _(u'correlation formula') : False}, None, None, None, None, where, xColumnName, yColumnName, zColumnName, mColumnName, None)
467
468        # Fit the model with the nlme package and create the output
469        # files in the temp directory.
470
471        r = R.GetInterpreter()
472       
473        try:
474            R.EvaluateFile(os.path.join(os.path.dirname(sys.modules[cls.__module__].__file__), 'SummarizeModel.r'), False)
475            R.EvaluateFile(os.path.join(os.path.dirname(sys.modules[cls.__module__].__file__), 'FitLMEForDataframe.r'), False)
476
477            tempDir = TemporaryDirectory()
478            tempOutputFile = os.path.join(tempDir.Path, u'output')
479
480            if randomFormula is None:
481                randomFormula = u'NULL'
482
483            if correlationStructure is not None:
484                if correlationStructure == u'exponential':
485                    correlation = u'corExp('
486                elif correlationStructure == u'gaussian':
487                    correlation = u'corGaus('
488                elif correlationStructure == u'linear':
489                    correlation = u'corLin('
490                elif correlationStructure == u'rational quadratic':
491                    correlation = u'corRatio('
492                elif correlationStructure == u'spherical':
493                    correlation = u'corSpher('
494                else:
495                    Logger.RaiseException(RuntimeError(_('Unknown correlationStructure value \'%(cs)s\'. Please contact the author of this tool for assistance.') % {u'cs': correlationStructure}))
496
497                if range_ is not None:
498                    if nugget is None:
499                        correlation += repr(range_) + ', '
500                    else:
501                        correlation += u'c(' + repr(range_) + u', ' + repr(nugget) + u'), '
502
503                correlation += u'form=' + correlationFormula + u', nugget=' + repr(nugget is not None).upper()
504
505                if metric is not None:
506                    correlation += u', metric="' + metric + u'"'
507
508                correlation += u', fixed=' + repr(fixed).upper() + u')'
509
510            else:
511                correlation = u'NULL'
512
513            r('library(nlme)')      # Required here so we can instantiate correlation classes
514
515            r('FitLMEForDataframe(fixed=%s, data=%s, outputModelFile="%s", random=%s, correlation=%s, method="%s", xVar=%s, yVar=%s, zVar=%s, mVar=%s, coordinateSystem=%s, writeSummaryFile=%s, writeDiagnosticPlots=%s, plotFileFormat="%s", res=%f, width=%f, height=%f, pointSize=%f, bg="%s")' %
516              (fixedFormula, dataFrameName, tempOutputFile.replace('\\', '\\\\'), randomFormula, correlation, method,
517               xColumnParam, yColumnParam, zColumnParam, mColumnParam, coordinateSystemParam,
518               str(writeSummaryFile).upper(), str(writeDiagnosticPlots).upper(), plotFileFormat, res, width, height, pointSize, bg))
519
520            # Move the temporary output files to the requested
521            # location.
522           
523            File.MoveSilent(tempOutputFile, outputModelFile, overwriteExisting=overwriteExisting)
524
525            outputFilePrefix = os.path.splitext(outputModelFile)[0]
526           
527            if writeSummaryFile:
528                File.MoveSilent(os.path.join(tempDir.Path, u'output_summary.txt'), outputFilePrefix + u'_summary.txt', overwriteExisting=overwriteExisting)
529
530            if writeDiagnosticPlots:
531                files = glob.glob(os.path.join(tempDir.Path, u'output_*.' + plotFileFormat))
532                for f in files:
533                    File.MoveSilent(f, outputFilePrefix + '_' + os.path.basename(f).split('_', 1)[1], overwriteExisting=overwriteExisting)
534
535        # Delete the data frame.
536       
537        finally:
538            r('if (exists("%s")) rm("%s")' % (dataFrameName, dataFrameName))
539
540    @classmethod
541    def PredictFromArcGISRasters(cls, inputModelFile, inputPredictorRasters, variableNames, outputResponseRaster, templateRaster=None, resamplingTechniques=None, ignoreOutOfRangeValues=True, buildPyramids=False, overwriteExisting=False):
542        cls.__doc__.Obj.ValidateMethodInvocation()
543        _PredictFromArcGISRasters('lme', _(u'Fit Linear Mixed Model'), inputModelFile, inputPredictorRasters, variableNames, outputResponseRaster, templateRaster, resamplingTechniques, ignoreOutOfRangeValues, None, None, None, buildPyramids, overwriteExisting)
544
545
546class ModelEvaluation(object):
547    __doc__ = DynamicDocString()
548
549    @classmethod
550    def PlotPerformanceOfBinaryClassificationModel(cls, inputModelFile, measure1, measure2=None, summaryStats=None, title=None,
551                                                   evaluationDataTable=None, where=None,
552                                                   outputPlotFile=None, res=1000., width=3000., height=3000., pointSize=10.0, bg=u'white',
553                                                   overwriteExisting=False):
554        cls.__doc__.Obj.ValidateMethodInvocation()
555
556        # If the caller requested emf format for the plot file,
557        # convert the width and height from thousands of an inch to
558        # inches.
559
560        if outputPlotFile is not None and outputPlotFile.lower().endswith(u'.emf'):
561            width = width / 1000
562            height = height / 1000
563
564        # Load the fitted model.
565
566        r = R.GetInterpreter()
567        dataFrameName = None
568        newPredictionsName = None
569
570        r('load("%s")' % inputModelFile.replace('\\', '\\\\'))
571        try:
572            if not r('exists("model")'):
573                Logger.RaiseException(ValueError(_('The input model file %(file)s does not contain a variable called "model", indicating that it was not generated by the one of the MGET model fitting tools, such as Fit GLM. Please provide a file that was generated by the one of the MGET model fitting tools.') % {u'file': inputModelFile}))
574
575            # If the model required an R package, load it.
576
577            if r('exists("rPackage")'):
578                dependency = RPackageDependency(unicode(r['rPackage']))
579                dependency.Initialize()
580
581            # If the caller specified a evaluation data table, load it
582            # and create new predictions.
583
584            if r('exists("rPackage")') and r['rPackage'] == 'rpart':
585                if r('model$method').lower() != 'class':
586                    Logger.RaiseException(ValueError(_('The input model file %(file)s contains a tree model that was fitted with the "%(method)s" splitting method. Models fitted with that method are not supported by this tool. This tool only supports models fitted with the "class" method.') % {u'file': inputModelFile, u'method': r('model$method')}))
587                if r('length(attr(model, "ylevels"))') != 2:
588                    Logger.RaiseException(ValueError(_('The classification tree model in %(file)s contains %(classes)i classes. This tool only supports models with two classes.') % {u'file': inputModelFile, u'classes': r('length(attr(model, "ylevels"))')}))
589               
590                if evaluationDataTable is None:
591                    predictedValues = 'as.vector(predict(model, type="prob")[,2])'
592                    actualValues = 'as.vector(model$y) - 1'
593                else:
594                    dataFrameName, newPredictionsName = cls._PredictionsForArcGISTable(inputModelFile, 'model', evaluationDataTable, where)
595                    predictedValues = newPredictionsName
596                    actualValues = 'as.vector(unclass(factor(%s$%s, levels=attr(model, "ylevels")))) - 1' % (dataFrameName, r('all.vars(attr(model$terms, "variables")[[2]])[[1]]'))
597
598            else:
599                if evaluationDataTable is None:
600                    predictedValues = 'model$fitted'
601                    actualValues = 'model$y'
602                else:
603                    dataFrameName, newPredictionsName = cls._PredictionsForArcGISTable(inputModelFile, 'model', evaluationDataTable, where)
604                    predictedValues = newPredictionsName
605                    actualValues = 'na.omit(%s)$%s' % (dataFrameName, r('all.vars(attr(terms(model$formula), "variables")[[2]])[[1]]'))
606
607            # Create the plot.
608       
609            R.EvaluateFile(os.path.join(os.path.dirname(sys.modules[cls.__module__].__file__), 'PerfPlotForBinaryModel.r'), False)
610
611            if measure2 is None:
612                measure2 = u'cutoff'
613
614            if title is not None:
615                title = '"' + title.replace('"', '\\"') + '"'
616            else:
617                title = 'NULL'
618
619            r['summaryStats'] = summaryStats
620           
621            if outputPlotFile is None:
622                summaryStatValues = r('PerfPlotForBinaryModel(predictedValues=%s, actualValues=%s, measure1="%s", measure2="%s", summaryStats=summaryStats, .title=%s)' % (predictedValues, actualValues, measure1, measure2, title))
623            else:
624                tempDir = TemporaryDirectory()
625                tempOutputFile = os.path.join(tempDir.Path, u'plot' + os.path.splitext(outputPlotFile)[1])
626                Logger.Info(_(u'Writing plot to %(file)s...') % {u'file': outputPlotFile})
627                summaryStatValues = r('PerfPlotForBinaryModelToFile("%s", predictedValues=%s, actualValues=%s, measure1="%s", measure2="%s", summaryStats=summaryStats, .title=%s, res=%f, width=%f, height=%f, pointsize=%f, bg="%s")' % (tempOutputFile.replace('\\', '\\\\'), predictedValues, actualValues, measure1, measure2, title, res, width, height, pointSize, bg))
628                File.MoveSilent(tempOutputFile, outputPlotFile, overwriteExisting=overwriteExisting)
629
630        # Delete R variables assigned by this function.
631       
632        finally:
633            r('if (exists("summaryStats")) rm("summaryStats")')
634            r('if (exists("model")) rm("model")')
635            r('if (exists("rPackage")) rm("rPackage")')
636            r('if (exists("xVar")) rm("xVar")')
637            r('if (exists("yVar")) rm("yVar")')
638            r('if (exists("zVar")) rm("zVar")')
639            r('if (exists("mVar")) rm("mVar")')
640            r('if (exists("coordinateSystem")) rm("coordinateSystem")')
641            r('if (exists("f20985982305")) rm("f20985982305", pos=globalenv())')
642            r('if (exists("data20985982305")) rm("data20985982305", pos=globalenv())')
643            r('if (exists("fam20985982305")) rm("fam20985982305", pos=globalenv())')
644            if dataFrameName is not None:
645                r('if (exists("%s")) rm("%s")' % (dataFrameName, dataFrameName))
646            if newPredictionsName is not None:
647                r('if (exists("%s")) rm("%s")' % (newPredictionsName, newPredictionsName))
648
649        # Return the summary statistics' values.
650
651        return summaryStatValues       
652
653    @classmethod
654    def PlotROCOfBinaryClassificationModel(cls, inputModelFile, cutoff=u'Automatic', cutoffValue=None, colorize=True, title=None,
655                                           evaluationDataTable=None, where=None,
656                                           outputSummaryFile=None, outputPlotFile=None, res=1000., width=3000., height=3000., pointSize=10.0, bg=u'white',
657                                           overwriteExisting=False):
658        cls.__doc__.Obj.ValidateMethodInvocation()
659
660        # Perform additional parameter validation.
661
662        if cutoff == u'specified' and cutoffValue is None:
663            Logger.RaiseException(ValueError(_(u'When you provide the value "Specified" for the Cutoff parameter, you must also provide a value for the Cutoff Value parameter.')))
664
665        # If the caller requested emf format for the plot file,
666        # convert the width and height from thousands of an inch to
667        # inches.
668
669        if outputPlotFile is not None and outputPlotFile.lower().endswith(u'.emf'):
670            width = width / 1000
671            height = height / 1000
672
673        # Load the fitted model.
674
675        r = R.GetInterpreter()
676        dataFrameName = None
677        newPredictionsName = None
678       
679        r('load("%s")' % inputModelFile.replace('\\', '\\\\'))
680        try:
681            if not r('exists("model")'):
682                Logger.RaiseException(ValueError(_('The input model file %(file)s does not contain a variable called "model", indicating that it was not generated by the one of the MGET model fitting tools, such as Fit GLM. Please provide a file that was generated by the one of the MGET model fitting tools.') % {u'file': inputModelFile}))
683
684            # If the model required an R package, load it.
685
686            if r('exists("rPackage")'):
687                dependency = RPackageDependency(unicode(r['rPackage']))
688                dependency.Initialize()
689
690            # If the caller specified a evaluation data table, load it
691            # and create new predictions.
692
693            if r('exists("rPackage")') and r['rPackage'] == 'rpart':
694                if r('model$method').lower() != 'class':
695                    Logger.RaiseException(ValueError(_('The input model file %(file)s contains a tree model that was fitted with the "%(method)s" splitting method. Models fitted with that method are not supported by this tool. This tool only supports models fitted with the "class" method.') % {u'file': inputModelFile, u'method': r('model$method')}))
696                if r('length(attr(model, "ylevels"))') != 2:
697                    Logger.RaiseException(ValueError(_('The classification tree model in %(file)s contains %(classes)i classes. This tool only supports models with two classes.') % {u'file': inputModelFile, u'classes': r('length(attr(model, "ylevels"))')}))
698
699                if evaluationDataTable is None:
700                    predictedValues = 'as.vector(predict(model, type="prob")[,2])'
701                    actualValues = 'as.vector(model$y) - 1'
702                else:
703                    dataFrameName, newPredictionsName = cls._PredictionsForArcGISTable(inputModelFile, 'model', evaluationDataTable, where)
704                    predictedValues = newPredictionsName
705                    actualValues = 'as.vector(unclass(factor(%s$%s, levels=attr(model, "ylevels")))) - 1' % (dataFrameName, r('all.vars(attr(model$terms, "variables")[[2]])[[1]]'))
706
707            else:
708                if evaluationDataTable is None:
709                    predictedValues = 'model$fitted'
710                    actualValues = 'model$y'
711                else:
712                    dataFrameName, newPredictionsName = cls._PredictionsForArcGISTable(inputModelFile, 'model', evaluationDataTable, where)
713                    predictedValues = newPredictionsName
714                    actualValues = 'na.omit(%s)$%s' % (dataFrameName, r('all.vars(attr(terms(model$formula), "variables")[[2]])[[1]]'))
715
716            # Create the plot.
717       
718            R.EvaluateFile(os.path.join(os.path.dirname(sys.modules[cls.__module__].__file__), 'ROCPlotForBinaryModel.r'), False)
719
720            if cutoffValue is None:
721                cutoffValue = 0.0
722
723            if title is not None:
724                title = '"' + title.replace('"', '\\"') + '"'
725            else:
726                title = 'NULL'
727
728            if outputSummaryFile is not None:
729                outputSummaryFile = '"' + outputSummaryFile.replace('\\', '\\\\') + '"'
730            else:
731                outputSummaryFile = 'NULL'
732           
733            if outputPlotFile is None:
734                outputCutoff, tp, fp, fn, tn, auc, mxe, prbe, rmse = r('ROCPlotForBinaryModel(predictedValues=%s, actualValues=%s, cutoff="%s", cutoffValue=%f, colorize=%s, .title=%s, summaryFile=%s)' % (predictedValues, actualValues, cutoff, cutoffValue, str(colorize).upper(), title, outputSummaryFile))
735            else:
736                tempDir = TemporaryDirectory()
737                tempOutputFile = os.path.join(tempDir.Path, u'plot' + os.path.splitext(outputPlotFile)[1])
738                Logger.Info(_(u'Writing plot to %(file)s...') % {u'file': outputPlotFile})
739                outputCutoff, tp, fp, fn, tn, auc, mxe, prbe, rmse = r('ROCPlotForBinaryModelToFile("%s", predictedValues=%s, actualValues=%s, cutoff="%s", cutoffValue=%f, colorize=%s, .title=%s, summaryFile=%s, res=%f, width=%f, height=%f, pointsize=%f, bg="%s")' % (tempOutputFile.replace('\\', '\\\\'), predictedValues, actualValues, cutoff, cutoffValue, str(colorize).upper(), title, outputSummaryFile, res, width, height, pointSize, bg))
740                File.MoveSilent(tempOutputFile, outputPlotFile, overwriteExisting=overwriteExisting)
741
742        # Delete R variables assigned by this function.
743       
744        finally:
745            r('if (exists("model")) rm("model")')
746            r('if (exists("rPackage")) rm("rPackage")')
747            r('if (exists("xVar")) rm("xVar")')
748            r('if (exists("yVar")) rm("yVar")')
749            r('if (exists("zVar")) rm("zVar")')
750            r('if (exists("mVar")) rm("mVar")')
751            r('if (exists("coordinateSystem")) rm("coordinateSystem")')
752            r('if (exists("f20985982305")) rm("f20985982305", pos=globalenv())')
753            r('if (exists("data20985982305")) rm("data20985982305", pos=globalenv())')
754            r('if (exists("fam20985982305")) rm("fam20985982305", pos=globalenv())')
755            if dataFrameName is not None:
756                r('if (exists("%s")) rm("%s")' % (dataFrameName, dataFrameName))
757            if newPredictionsName is not None:
758                r('if (exists("%s")) rm("%s")' % (newPredictionsName, newPredictionsName))
759
760        # Return successfully.
761
762        return outputCutoff, tp, fp, fn, tn, auc, mxe, prbe, rmse
763
764    @classmethod
765    def _PredictionsForArcGISTable(cls, inputModelFile, modelName, evaluationDataTable, where):
766
767        # First verify that the table contains all of the fields used
768        # to fit the model.
769
770        r = R.GetInterpreter()
771        gp = GeoprocessorManager.GetWrappedGeoprocessor()
772
773        fields = []
774        xColumnName = None
775        yColumnName = None
776        zColumnName = None
777        mColumnName = None
778        d = gp.Describe(evaluationDataTable)
779        gotPointFeatures = hasattr(d, u'ShapeType') and isinstance(d.ShapeType, basestring) and d.ShapeType.lower() == u'point'
780
781        if r('exists("rPackage")') and r['rPackage'] == 'rpart':
782            allVars = r('all.vars(%s$terms)' % modelName)
783        else:
784            allVars = r('all.vars(%s$formula)' % modelName)
785        if isinstance(allVars, basestring):
786            allVars = [allVars]
787       
788        for var in allVars:
789            conn = ArcGIS91DatabaseConnection()
790            if conn.FieldExists(evaluationDataTable, var):
791                fields.append(var)
792            else:
793                if var == r['xVar']:
794                    if gotPointFeatures:
795                        xColumnName = var
796                    else:
797                        Logger.RaiseException(ValueError(_('The input model in %(file)s was fitted to a point feature class and the point X coordinate was used as the variable "%(var)s" in the model, but the evaluation data table %(table)s does not contain point features, nor does it contain a field named "%(var)s". For the evaluation data, you must provide either a point feature class or a table that contains a field named "%(var)s".') % {u'file': inputModelFile, u'table': evaluationDataTable, u'var': var}))
798                elif var == r['yVar']:
799                    if gotPointFeatures:
800                        yColumnName = var
801                    else:
802                        Logger.RaiseException(ValueError(_('The input model in %(file)s was fitted to a point feature class and the point Y coordinate was used as the variable "%(var)s" in the model, but the evaluation data table %(table)s does not contain point features, nor does it contain a field named "%(var)s". For the evaluation data, you must provide either a point feature class or a table that contains a field named "%(var)s".') % {u'file': inputModelFile, u'table': evaluationDataTable, u'var': var}))
803                elif var == r['zVar']:
804                    if gotPointFeatures and d.HasZ:
805                        zColumnName = var
806                    elif gotPointFeatures:
807                        Logger.RaiseException(ValueError(_('The input model in %(file)s was fitted to a point feature class and the point Z coordinate was used as the variable "%(var)s" in the model, but the point feature class %(table)s provided as the evaluation data table does not have Z coordinates, nor does it contain a field named "%(var)s". For the evaluation data, you must provide either a point feature class that has Z values or a table that contains a field named "%(var)s".') % {u'file': inputModelFile, u'table': evaluationDataTable, u'var': var}))
808                    else:
809                        Logger.RaiseException(ValueError(_('The input model in %(file)s was fitted to a point feature class and the point Z coordinate was used as the variable "%(var)s" in the model, but the evaluation data table %(table)s does not contain point features, nor does it contain a field named "%(var)s". For the evaluation data, you must provide either a point feature class that has Z values or a table that contains a field named "%(var)s".') % {u'file': inputModelFile, u'table': evaluationDataTable, u'var': var}))
810                elif var == r['mVar']:
811                    if gotPointFeatures and d.HasM:
812                        mColumnName = var
813                    elif gotPointFeatures:
814                        Logger.RaiseException(ValueError(_('The input model in %(file)s was fitted to a point feature class and the point M value was used as the variable "%(var)s" in the model, but the point feature class %(table)s provided as the evaluation data table does not have M values, nor does it contain a field named "%(var)s". For the evaluation data, you must provide either a point feature class that has M values or a table that contains a field named "%(var)s".') % {u'file': inputModelFile, u'table': evaluationDataTable, u'var': var}))
815                    else:
816                        Logger.RaiseException(ValueError(_('The input model in %(file)s was fitted to a point feature class and the point M value was used as the variable "%(var)s" in the model, but the evaluation data table %(table)s does not contain point features, nor does it contain a field named "%(var)s". For the evaluation data, you must provide either a point feature class that has M values or a table that contains a field named "%(var)s".') % {u'file': inputModelFile, u'table': evaluationDataTable, u'var': var}))
817                else:
818                    Logger.RaiseException(ValueError(_('The input model in %(file)s was fitted to a table containing a field named "%(var)s" but the evaluation data table %(table)s does not contain this field. You must provide a table that contains this field.') % {u'file': inputModelFile, u'table': evaluationDataTable, u'var': var}))
819
820        # Load the table into a temporary data frame.
821       
822        dataFrameName = R.GetUniqueVariableName()
823        R.LoadDataFrameFromArcGISTable(evaluationDataTable, dataFrameName, fields=fields, where=where, xColumnName=xColumnName, yColumnName=yColumnName, zColumnName=zColumnName, mColumnName=mColumnName)
824       
825        if r('length(%s)' % dataFrameName) <= 0:
826            if where is not None:
827                Logger.RaiseException(ValueError(_(u'The where clause "%(where)s" did not select any rows from the table %(table)s.') % {u'table': evaluationDataTable, u'where': where}))
828            else:
829                Logger.RaiseException(ValueError(_(u'The table %(table)s is empty.') % {u'table': evaluationDataTable}))
830
831        # Create predictions from the evaluation data.
832
833        try:
834            Logger.Info(_(u'Predicting the response for the evaluation data...'))
835            r('if (exists("rPackage")) library(rPackage, character.only=TRUE)')
836            newPredictionsName = R.GetUniqueVariableName()
837           
838            if r('exists("rPackage")') and r['rPackage'] == 'rpart':
839                r('%s <- as.vector(predict(%s, newdata=%s, type="prob")[,2])' % (newPredictionsName, modelName, dataFrameName))
840            else:
841                r('%s <- as.vector(predict(%s, newdata=na.omit(%s), type="response"))' % (newPredictionsName, modelName, dataFrameName))
842        except:
843            r('if (exists("%s")) rm("%s")' % (dataFrameName, dataFrameName))
844            r('if (exists("%s")) rm("%s")' % (newPredictionsName, newPredictionsName))
845            raise
846
847        # Return the names of the R variables we created for the data
848        # frame and the predictions.
849
850        return dataFrameName, newPredictionsName
851
852    @classmethod
853    def RandomlySplitArcGISTableIntoTrainingAndEvaluationRecords(cls, table, percentEvaluation=33.33333, field=u'IsEvalData', where=None):
854        cls.__doc__.Obj.ValidateMethodInvocation()
855
856        # If the field already exists, validate that it is the correct
857        # type.
858
859        conn = ArcGIS91DatabaseConnection()
860        dataType = conn.GetFieldDataType(table, field)
861        if dataType is not None:
862            if dataType not in [u'SHORT', u'LONG', u'FLOAT', u'DOUBLE']:
863                Logger.RaiseException(ValueError(_(u'Existing field %(field)s of table %(table)s cannot be used because it has the data type %(dt)s. Please specify a field that has the data type SHORT, LONG, FLOAT, or DOUBLE.') % {u'field': field, u'table': table, u'dt': dataType}))
864
865        # If the field does not already exist, create it.
866
867        else:
868            conn.AddField(table, field, u'SHORT')
869            Logger.Info(_(u'Added field %(field)s to table %(table)s.') % {u'field': field, u'table': table})
870
871        # If the caller provided a where clause, create a table view.
872
873        gp = GeoprocessorManager.GetWrappedGeoprocessor()
874       
875        if where is None:
876            tableView = table
877        else:
878            tableView = GeoprocessorManager.GetUniqueLayerName()
879            gp.MakeTableView_management(table, tableView, where)
880        try:
881
882            # If there were no rows, warn the user.
883
884            totalRows = conn.GetRowCount(tableView)
885            if totalRows <= 0:
886                if where is not None:
887                    Logger.Warning(_(u'No rows of table %(table)s were changed because the where clause "%(where)s" did not select any rows from the table.') % {u'table': table, u'where': where})
888                else:
889                    Logger.Warning(_(u'Table %(table)s is empty.') % {u'table': table})
890
891            # Otherwise, compute the number of rows that should be
892            # designated training and evaluation, create a shuffled
893            # list of 0 and 1 values to draw from, and designate the
894            # rows.
895
896            else:
897                evaluationRows = int(round(totalRows * percentEvaluation / 100.0))
898                trainingRows = totalRows - evaluationRows
899                values = [0] * trainingRows + [1] * evaluationRows
900                random.shuffle(values)
901
902                Logger.Info(_(u'Updating table %(table)s: designating %(t)i rows as training records and %(e)i rows as evaluation records...') % {u'table': table, u't': trainingRows, u'e': evaluationRows})
903
904                i = 0
905                cur = conn.OpenUpdateCursor(tableView, rowCount=totalRows)
906                try:
907                    while cur.NextRow():
908                        cur.SetValue(field, values[i])
909                        cur.UpdateRow()
910                        i += 1
911                        if i >= len(values):        # This should never happen
912                            break
913                finally:
914                    del cur
915
916        # Delete the table view, if we created one.
917
918        finally:
919            try:
920                if tableView != table and GeoprocessorManager.GetArcGISMajorVersion() == 9 and GeoprocessorManager.GetArcGISMajorVersion() == 1:
921                    gp.Delete(tableView)
922            except:
923                pass
924
925        # Return successfully.
926       
927        return table
928
929
930# Private helper functions
931
932def _LoadDataFrameForModelFitting(inputTable, formulas, formulaIsRequired, family, link, variance, theta, where, xColumnName, yColumnName, zColumnName, mColumnName, extraFieldsToLoad=None):
933
934    # Determine if the input table is a point feature class.
935
936    gp = GeoprocessorManager.GetWrappedGeoprocessor()
937    d = gp.Describe(inputTable)
938    gotPointFeatures = hasattr(d, u'ShapeType') and isinstance(d.ShapeType, basestring) and d.ShapeType.lower() == u'point'
939
940    # Create a list of all of the fields in the formula.
941
942    r = R.GetInterpreter()
943    fields = set()
944
945    for name, formula in formulas.items():
946        if formula is not None:
947            terms = r('all.vars(%s)' % formula)
948            if isinstance(terms, types.ListType):
949                fields = fields.union(set(terms))
950            elif isinstance(terms, basestring):
951                fields = fields.union(set([terms]))
952            elif formulaIsRequired[name]:
953                Logger.RaiseException(ValueError(_(u'The %(name)s "%(formula)s" did not include any terms. Please provide a %(name)s that includes at least one term.') % {u'name': name, u'formula': formula}))
954
955    if extraFieldsToLoad is not None:
956        fields = fields.union(set(extraFieldsToLoad))
957
958    fields = list(fields)
959
960    # If the caller provided an X, Y, Z, or M column name and it
961    # appears in a formula, report an error if the input table is not
962    # a point feature class.
963
964    if xColumnName is not None and xColumnName in fields:
965        if gotPointFeatures:
966            if xColumnName == u'x' or xColumnName == u'X':
967                Logger.RaiseException(ValueError(_(u'The variable representing the X coordinates of point features cannot be named "x". We apologize for this inconvenient restriction. Please specify a different name.')))        # Imposed by rgdal
968            fields.remove(xColumnName)
969        else:
970            Logger.RaiseException(ValueError(_(u'The variable "%(var)s" is designated as the X coordinate of point features, but the input table "%(table)s" does not contain point features. Please specify a table that includes point features or remove this variable from the model.') % {u'var': xColumnName, u'table': inputTable}))
971    else:
972        xColumnName = None
973   
974    if yColumnName is not None and yColumnName in fields:
975        if gotPointFeatures:
976            if yColumnName == u'y' or yColumnName == u'Y':
977                Logger.RaiseException(ValueError(_(u'The variable representing the Y coordinates of point features cannot be named "y". We apologize for this inconvenient restriction. Please specify a different name.')))        # Imposed by rgdal
978            fields.remove(yColumnName)
979        else:
980            Logger.RaiseException(ValueError(_(u'The variable "%(var)s" is designated as the Y coordinate of point features, but the input table "%(table)s" does not contain point features. Please specify a table that includes point features or remove this variable from the model.') % {u'var': yColumnName, u'table': inputTable}))
981    else:
982        yColumnName = None
983   
984    if zColumnName is not None and zColumnName in fields:
985        if gotPointFeatures:
986            if d.HasZ:
987                fields.remove(zColumnName)
988            else:
989                Logger.RaiseException(ValueError(_(u'The variable "%(var)s" is designated as the Z coordinate of point features, but the feature class "%(table)s" does not have Z coordinates. Please specify a feature class that includes Z coordinates or remove this variable from the model.') % {u'var': zColumnName, u'table': inputTable}))
990        else:
991            Logger.RaiseException(ValueError(_(u'The variable "%(var)s" is designated as the X coordinate of point features, but the input table "%(table)s" does not contain point features. Please specify a table that includes point features or remove this variable from the model.') % {u'var': zColumnName, u'table': inputTable}))
992    else:
993        zColumnName = None
994   
995    if mColumnName is not None and mColumnName in fields:
996        if gotPointFeatures:
997            if d.HasZ:
998                fields.remove(mColumnName)
999            else:
1000                Logger.RaiseException(ValueError(_(u'The variable "%(var)s" is designated as the M value of point features, but the feature class "%(table)s" does not have M values. Please specify a feature class that includes M values or remove this variable from the model.') % {u'var': mColumnName, u'table': inputTable}))
1001        else:
1002            Logger.RaiseException(ValueError(_(u'The variable "%(var)s" is designated as the M value of point features, but the input table "%(table)s" does not contain point features. Please specify a table that includes point features or remove this variable from the model.') % {u'var': mColumnName, u'table': inputTable}))
1003    else:
1004        mColumnName = None
1005
1006    if u'x' in fields or u'X' in fields or u'y' in fields or u'Y' in fields:
1007        Logger.RaiseException(ValueError(_(u'The model cannot reference a field named "x" or "y". We apologize for this inconvenient restriction. To use this field in the model, rename it to something other than "x" or "y".')))        # Imposed by rgdal
1008
1009    # Load the table into an R data frame.
1010   
1011    dataFrameName = R.GetUniqueVariableName()
1012    R.LoadDataFrameFromArcGISTable(inputTable, dataFrameName, fields=fields, where=where, xColumnName=xColumnName, yColumnName=yColumnName, zColumnName=zColumnName, mColumnName=mColumnName)
1013    try:
1014
1015        # Validate that the data frame contains at least one row.
1016       
1017        if r('length(%s)' % dataFrameName) <= 0:
1018            if where is not None:
1019                Logger.RaiseException(ValueError(_(u'The where clause "%(where)s" did not select any rows from the table %(table)s.') % {u'table': inputTable, u'where': where}))
1020            else:
1021                Logger.RaiseException(ValueError(_(u'The table %(table)s is empty.') % {u'table': inputTable}))
1022
1023        # Prepare the column names to be passed as parameters to the R
1024        # model-fitting function.
1025
1026        if xColumnName is not None:
1027            xColumnParam = u'"' + xColumnName + u'"'
1028        else:
1029            xColumnParam = u'NULL'
1030           
1031        if yColumnName is not None:
1032            yColumnParam = u'"' + yColumnName + u'"'
1033        else:
1034            yColumnParam = u'NULL'
1035           
1036        if zColumnName is not None:
1037            zColumnParam = u'"' + zColumnName + u'"'
1038        else:
1039            zColumnParam = u'NULL'
1040           
1041        if mColumnName is not None:
1042            mColumnParam = u'"' + mColumnName + u'"'
1043        else:
1044            mColumnParam = u'NULL'
1045
1046        # If the input table is a point feature class, prepare its
1047        # coordinate system string to be passed as a parameter to the
1048        # R model-fitting function.
1049           
1050        if gotPointFeatures:
1051            coordinateSystemParam = u'"' + gp.CreateSpatialReference_management(d.SpatialReference).split(u';')[0] + u'"'
1052        else:
1053            coordinateSystemParam = u'NULL'
1054
1055        # Create a family string to be passed as a parameter to the R
1056        # model-fitting function.
1057
1058        if family is not None:
1059            if link is None:
1060                if family == u'binomial':
1061                    link = u'logit'
1062                elif family == u'gaussian':
1063                    link = u'identity'
1064                elif family == u'Gamma':
1065                    link = u'inverse'
1066                elif family == u'inverse.gaussian':
1067                    link = u'1/mu^2'
1068                elif family == u'negbin':
1069                    link = u'log'
1070                elif family == u'poisson':
1071                    link = u'log'
1072                elif family == u'quasi':
1073                    link = u'identity'
1074                elif family == u'quasibinomial':
1075                    link = u'logit'
1076                elif family == u'quasipoisson':
1077                    link = u'log'
1078                else:
1079                    Logger.RaiseException(_(u'Programming error in this tool: unknown model family "%s". Please contact the author of this tool for assistance.') % family)
1080           
1081            if family == u'negbin':
1082                familyParam = '%s(theta=%s, link="%s")' % (family, theta, link)
1083
1084            elif family == u'quasi':
1085                if variance is None:
1086                    variance = u'constant'
1087                familyParam = '%s(link="%s", variance="%s")' % (family, link, variance)
1088
1089            else:
1090                familyParam = '%s(link="%s")' % (family, link)
1091
1092        else:
1093            familyParam = None
1094
1095    except:
1096        r('rm("%s")' % dataFrameName)
1097        raise
1098
1099    # Return successfully.
1100
1101    return dataFrameName, xColumnParam, yColumnParam, zColumnParam, mColumnParam, coordinateSystemParam, familyParam
1102
1103def _PredictFromArcGISRasters(modelType, fitToolName, inputModelFile, inputPredictorRasters, variableNames, outputResponseRaster, templateRaster, resamplingTechniques, ignoreOutOfRangeValues, outputErrorRaster, outputBinaryResponseRaster, cutoff, buildPyramids, overwriteExisting):
1104
1105    # Perform additional parameter validation.
1106
1107    gp = GeoprocessorManager.GetWrappedGeoprocessor()
1108
1109    for raster in inputPredictorRasters:
1110        d = gp.Describe(raster)
1111        if d.SpatialReference.Name is None or d.SpatialReference.Name.lower() == u'unknown':
1112            Logger.RaiseException(ValueError(_(u'The predictor raster %(raster)s does not have a coordinate system defined. Please define the coordinate system for this raster and try again. (Use the ArcGIS Define Projection tool.)') % {u'raster': raster}))
1113
1114    if outputResponseRaster.lower() in map(lambda s: unicode(s).lower(), inputPredictorRasters):
1115        Logger.RaiseException(ValueError(_(u'The output response raster %(out)s also appears in the list of input predictor rasters. This is not allowed. Please specify a different output response raster or remove it from the list of input predictor rasters.') % {u'out': outputResponseRaster}))
1116
1117    if outputErrorRaster is not None and outputErrorRaster.lower() in map(lambda s: unicode(s).lower(), inputPredictorRasters):
1118        Logger.RaiseException(ValueError(_(u'The output standard error raster %(out)s also appears in the list of input predictor rasters. This is not allowed. Please specify a different output standard error raster or remove it from the list of input predictor rasters.') % {u'out': outputResponseRaster}))
1119
1120    if outputBinaryResponseRaster is not None and cutoff is None:
1121        Logger.RaiseException(ValueError(_(u'To create a binary response raster, you must also supply a value for the Cutoff parameter.')))
1122
1123    # Look up the coordinate system, cell size, and extent of the
1124    # template raster.
1125
1126    if templateRaster is None:
1127        templateRaster = inputPredictorRasters[0]
1128
1129    templateDescribe = gp.Describe(templateRaster)
1130    if templateDescribe.SpatialReference.Name is None or templateDescribe.SpatialReference.Name.lower() == u'unknown':
1131        Logger.RaiseException(ValueError(_(u'The template raster %(raster)s does not have a coordinate system defined. Please define the coordinate system for this raster and try again. (Use the ArcGIS Define Projection tool.)') % {u'raster': templateRaster}))
1132
1133    coordinateSystem = gp.CreateSpatialReference(templateDescribe.SpatialReference).split(';')[0]
1134    cellSize = templateDescribe.MeanCellWidth
1135    extent = templateDescribe.Extent
1136
1137    # Load the fitted model.
1138
1139    r = R.GetInterpreter()
1140
1141    r('load("%s")' % inputModelFile.replace('\\', '\\\\'))
1142    try:
1143        if not r('exists("model")'):
1144            Logger.RaiseException(ValueError(_('The input model file %(file)s does not contain a variable called "model", indicating that it was not generated by the Fit %(type)s tool. Please provide a file that was generated by the %(fitToolName)s tool.') % {u'file': inputModelFile, u'fitToolName': fitToolName}))
1145        if modelType.lower() not in r('class(model)'):
1146            Logger.RaiseException(ValueError(_('The input model file %(file)s contains a variable called "model" but it is not a %(type)s, indicating that it was not generated by the %(fitToolName)s tool. Please provide a file that was generated by the %(fitToolName)s tool.') % {u'file': inputModelFile, u'type': modelType, u'fitToolName': fitToolName}))
1147
1148        if r('exists("rPackage")'):
1149            Logger.Info(_(u'Loaded %(type)s from %(file)s. The model was fitted with the R %(pkg)s package.') % {u'type': r('class(model)[1]').upper(), u'file': inputModelFile, u'pkg': r['rPackage']})
1150        else:
1151            Logger.Info(_(u'Loaded %(type)s from %(file)s.') % {u'type': r('class(model)[1]').upper(), u'file': inputModelFile, })
1152
1153        # If the model required an R package, load it.
1154
1155        if r('exists("rPackage")'):
1156            dependency = RPackageDependency(unicode(r['rPackage']))
1157            dependency.Initialize()
1158
1159        # If it is a GAM fitted by the R gam package, report an error
1160        # if the caller specified an outputErrorRaster, because that
1161        # package's predict.gam function cannot generate predictions
1162        # from the newdata parameter.
1163
1164        if outputErrorRaster is not None and modelType.lower() == 'gam' and r['rPackage'] == 'gam':
1165            Logger.RaiseException(ValueError(_('The input model from file %(file)s is a GAM that was fitted using the R gam package. Because the gam package cannot return standard errors for responses predicted from data other than those used to fit the model, a standard error raster cannot be produced. Please do not specify a standard error raster, or, if you must have one, re-fit the GAM using the R mgcv package.') % {u'file': inputModelFile}))
1166
1167        # If it is a GAM fitted by the R gam package, we have to work
1168        # around a bug in predict.gam. The situation is described in
1169        # more detail in a message to the R help mailing list titled
1170        # "use of step.gam (from package 'gam') and superassignment
1171        # inside functions" dated 28-Feb-2008 (see
1172        # https://stat.ethz.ch/pipermail/r-help/2008-February/155586.html).
1173        # Define uniquely-named global variables for the formula, data
1174        # and family parameters. This is very hacky but it is the only
1175        # way I could get it working. Note the use of the <<-
1176        # operator. Hopefully these names are unique.
1177       
1178        if modelType.lower() == 'gam' and r['rPackage'] == 'gam':
1179            r('f20985982305 <<- model$formula')
1180            r('data20985982305 <<- model$data')
1181            r('fam20985982305 <<- model$family')
1182
1183        # Log a summery of the model, to remind the user what it is.
1184
1185        if r('exists("rPackage")') and r['rPackage'] != 'rpart':
1186            R.EvaluateFile(os.path.join(os.path.dirname(sys.modules[GLM.__module__].__file__), 'SummarizeModel.r'), False)
1187
1188            r('writeLines("")')
1189            r('writeLines("MODEL SUMMARY:")')
1190            r('writeLines("==============")')
1191            r('writeLines(SummarizeModel(model))')
1192            r('writeLines("")')
1193
1194        # If the caller provided a non-rpart, non-lme model without any
1195        # predictor variables, then the predicted response and
1196        # standard errors will be constant values. Report a warning
1197        # and create constant rasters having these values.
1198
1199        if (not r('exists("rPackage")') or r['rPackage'] not in ['rpart', 'nlme']) and isinstance(r('all.vars(model$formula)'), basestring):        # If only one term is in the model (i.e. the response variable), rpy will return it as a string, rather than a list with one string in it.
1200            if outputErrorRaster is not None:
1201                Logger.Warning(_(u'The model\'s formula does not contain any predictor variables. Because of this, the output response and standard error rasters will contain constant values.'))
1202            else:
1203                Logger.Warning(_(u'The model\'s formula does not contain any predictor variables. Because of this, the output response raster will contain a constant value.'))
1204
1205            responseValue = r('predict(model, newdata=data.frame(0), type="response")')
1206            gp.CreateRandomRaster_management(os.path.dirname(outputResponseRaster), os.path.basename(outputResponseRaster), u'UNIFORM %g %g' % (responseValue, responseValue), extent, cellSize)
1207            gp.DefineProjection_management(outputResponseRaster, coordinateSystem)
1208            if buildPyramids:
1209                gp.BuildPyramids_Management(outputResponseRaster)
1210
1211            if outputErrorRaster is not None:
1212                errorValue = r('predict(model, newdata=data.frame(0), type="response", se.fit=TRUE)$se.fit')
1213                gp.CreateRandomRaster_management(os.path.dirname(outputErrorRaster), os.path.basename(outputErrorRaster), u'UNIFORM %g %g' % (errorValue, errorValue), extent, cellSize)
1214                gp.DefineProjection_management(outputErrorRaster, coordinateSystem)
1215                if buildPyramids:
1216                    gp.BuildPyramids_Management(outputResponseRaster)
1217
1218        # Otherwise, the model contains at least one predictor
1219        # variable, do the prediction.
1220
1221        else:
1222            tempDir = TemporaryDirectory()
1223           
1224            # First prepare the input rasters for the prediction.
1225
1226            _PreparePredictorRasters(r, gp, tempDir, variableNames, inputPredictorRasters, templateRaster, templateDescribe, coordinateSystem, cellSize, extent, resamplingTechniques)
1227
1228            # Do the prediction. This creates files in GDAL-compatible
1229            # .bil format.
1230            #
1231            # Force the logging system to log R Info messages as
1232            # Debug so that the user is not spammed with "Closing
1233            # GDAL dataset handle" messages. There appears to be
1234            # no way to suppress these messages from within R.
1235
1236            Logger.Info(_(u'Predicting...'))
1237
1238            R.EvaluateFile(os.path.join(os.path.dirname(sys.modules[GLM.__module__].__file__), 'Utils.r'), False)
1239            R.EvaluateFile(os.path.join(os.path.dirname(sys.modules[GLM.__module__].__file__), 'PredictLMForArcGISRasters.r'), False)
1240
1241            oldLoggingLevel = r.LogInfoAsDebug
1242            r.LogInfoAsDebug = True
1243            try:
1244                if r('exists("rPackage")'):
1245                    rPackage = '"' + r['rPackage'] + '"'
1246                else:
1247                    rPackage = 'NULL'
1248
1249                tempResponseFile = os.path.join(tempDir.Path, u'temp_response')
1250               
1251                if outputErrorRaster is None:
1252                    r('PredictLMForArcGISRasters(model, %s, rastersForPredictors, "%s", ignoreOutOfRangeValues=%s)' % (rPackage, tempResponseFile.replace('\\', '\\\\'), str(ignoreOutOfRangeValues).upper()))
1253                else:
1254                    tempErrorFile = os.path.join(tempDir.Path, u'temp_standarderror')
1255                    r('PredictLMForArcGISRasters(model, %s, rastersForPredictors, "%s", "%s", ignoreOutOfRangeValues=%s)' % (rPackage, tempResponseFile.replace('\\', '\\\\'), tempErrorFile.replace('\\', '\\\\'), str(ignoreOutOfRangeValues).upper()))
1256            finally:
1257                r.LogInfoAsDebug = oldLoggingLevel
1258
1259            # Copy the .bil files to the destination rasters.
1260
1261            Logger.Info(_(u'Creating outputs...'))
1262
1263            from GeoEco.Datasets import Dataset
1264            from GeoEco.Datasets.GDAL import GDALDataset
1265            from GeoEco.Datasets.ArcGIS import ArcGISWorkspace, ArcGISRaster
1266
1267            bilFile = GDALDataset(tempResponseFile + '.bil', lazyPropertyValues={'SpatialReference': Dataset.ConvertSpatialReference('arcgis', coordinateSystem, 'obj')})
1268            workspace = ArcGISWorkspace(os.path.dirname(outputResponseRaster), ArcGISRaster, pathCreationExpressions=[os.path.basename(outputResponseRaster)])
1269            try:
1270                workspace.ImportDatasets(bilFile.QueryDatasets(reportProgress=False), {False: 'Add', True: 'Replace'}[overwriteExisting], reportProgress=False, calculateStatistics=True, buildPyramids=buildPyramids)
1271            finally:
1272                del bilFile
1273                del workspace
1274
1275            if outputErrorRaster is not None:
1276                bilFile = GDALDataset(tempErrorFile + '.bil', lazyPropertyValues={'SpatialReference': Dataset.ConvertSpatialReference('arcgis', coordinateSystem, 'obj')})
1277                workspace = ArcGISWorkspace(os.path.dirname(outputErrorRaster), ArcGISRaster, pathCreationExpressions=[os.path.basename(outputErrorRaster)])
1278                try:
1279                    workspace.ImportDatasets(bilFile.QueryDatasets(reportProgress=False), {False: 'Add', True: 'Replace'}[overwriteExisting], reportProgress=False, calculateStatistics=True, buildPyramids=buildPyramids)
1280                finally:
1281                    del bilFile
1282                    del workspace
1283
1284        # If the caller requested a binary response raster,
1285        # create it.
1286       
1287        if outputBinaryResponseRaster is not None:
1288            gp.Con_sa(outputResponseRaster, 1, outputBinaryResponseRaster, 0, 'Value >= ' + str(cutoff))
1289            if buildPyramids:
1290                gp.BuildPyramids_Management(outputBinaryResponseRaster)
1291
1292    # Delete R variables assigned by this function.
1293   
1294    finally:
1295        r('if (exists("rastersForPredictors")) rm("rastersForPredictors")')
1296        r('if (exists("model")) rm("model")')
1297        r('if (exists("rPackage")) rm("rPackage")')
1298        r('if (exists("xVar")) rm("xVar")')
1299        r('if (exists("yVar")) rm("yVar")')
1300        r('if (exists("zVar")) rm("zVar")')
1301        r('if (exists("mVar")) rm("mVar")')
1302        r('if (exists("coordinateSystem")) rm("coordinateSystem")')
1303        r('if (exists("f20985982305")) rm("f20985982305", pos=globalenv())')
1304        r('if (exists("data20985982305")) rm("data20985982305", pos=globalenv())')
1305        r('if (exists("fam20985982305")) rm("fam20985982305", pos=globalenv())')
1306
1307def _PreparePredictorRasters(r, gp, tempDir, variableNames, inputPredictorRasters, templateRaster, templateDescribe, coordinateSystem, cellSize, extent, resamplingTechniques):
1308
1309    # Build a dictionary that maps predictor variable names in the
1310    # model to rasters provided by the caller.
1311
1312    rastersForPredictors = {}
1313    createXRaster = False
1314    createYRaster = False
1315
1316    if r('exists("rPackage")') and r['rPackage'] == 'rpart':
1317        allVars = r('all.vars(model$terms)')[1:]
1318        if isinstance(allVars, basestring):
1319            allVars = [allVars]
1320
1321    elif r('exists("rPackage")') and r['rPackage'] == 'nlme':
1322        fixed = r('all.vars(fixed)')[1:]
1323        if isinstance(fixed, basestring):
1324            fixed = [fixed]
1325        random = r('all.vars(random)')
1326        if isinstance(random, basestring):
1327            random = [random]
1328        allVars = list(set(fixed).union(set(random)))
1329
1330    else:
1331        allVars = r('all.vars(model$formula)')[1:]
1332        if isinstance(allVars, basestring):
1333            allVars = [allVars]
1334   
1335    for variable in allVars:
1336        if variable not in variableNames:
1337            if variable == r['xVar']:
1338                createXRaster = True
1339            elif variable == r['yVar']:
1340                createYRaster = True
1341            else:
1342                Logger.RaiseException(ValueError(_('The predictor variable %(var)s appears in the model\'s formula but does not appear in the list of model variable names for predictor rasters provided to this tool. Add that predictor variable to the list and try again.') % {u'var': variable}))
1343        else:
1344            rastersForPredictors[variable] = inputPredictorRasters[variableNames.index(variable)]
1345
1346    # Generate the X and Y coordinate rasters, if needed and possible.
1347
1348    if createXRaster:
1349        if r['coordinateSystem'] != coordinateSystem:
1350            Logger.RaiseException(ValueError(_('The model formula includes the X coordinate as predictor variable %(var)s, but the list of predictor rasters does not include this variable. Because the model was fitted to points that used a different coordinate system than the one you want to use for the predictions, this tool cannot automatically generate a raster for the X coordinates. Please generate one yourself and add it to the list of predictor rasters') % {u'var': r['xVar']}))
1351
1352        xRaster = os.path.join(tempDir.Path, u'xvar')
1353        oldLogInfoAsDebug = Logger.LogInfoAndSetInfoToDebug(_(u'Creating a predictor raster for the variable "%(var)s" representing the X coordinate...') % {u'var': r['xVar']})
1354        try:
1355            ArcGISRaster.CreateXRaster(xRaster, extent, cellSize, u'Center', coordinateSystem)
1356        finally:
1357            Logger.SetLogInfoAsDebug(oldLogInfoAsDebug)
1358        rastersForPredictors[r['xVar']] = xRaster
1359
1360    if createYRaster:
1361        if r['coordinateSystem'] != coordinateSystem:
1362            Logger.RaiseException(ValueError(_('The model formula includes the Y coordinate as predictor variable %(var)s, but the list of predictor rasters does not include this variable. Because the model was fitted to points that used a different coordinate system than the one you want to use for the predictions, this tool cannot automatically generate a raster for the Y coordinates. Please generate one yourself and add it to the list of predictor rasters') % {u'var': r['yVar']}))
1363
1364        yRaster = os.path.join(tempDir.Path, u'yvar')
1365        oldLogInfoAsDebug = Logger.LogInfoAndSetInfoToDebug(_(u'Creating a predictor raster for the variable "%(var)s" representing the Y coordinate...') % {u'var': r['yVar']})
1366        try:
1367            ArcGISRaster.CreateYRaster(yRaster, extent, cellSize, u'Center', coordinateSystem)
1368        finally:
1369            Logger.SetLogInfoAsDebug(oldLogInfoAsDebug)
1370        rastersForPredictors[r['yVar']] = yRaster
1371
1372    # Check each predictor raster to see if it needs to be copied,
1373    # projected or clipped before we can use it.
1374
1375    Logger.Info(_('Checking coordinate systems, extents, and cell sizes of predictor rasters and reprojecting and clipping as needed to make them conform to the template raster...'))
1376
1377    from GeoEco.Types import EnvelopeTypeMetadata
1378    [templateLeft, templateBottom, templateRight, templateTop] = EnvelopeTypeMetadata.ParseFromArcGISString(extent)
1379
1380    vars = copy.deepcopy(rastersForPredictors.keys())           # I'm going to modify the dictionary in the loop below; not sure if deepcopy is needed, but playing it safe.
1381    rasters = copy.deepcopy(rastersForPredictors.values())
1382
1383    for i in range(len(vars)):
1384        needToCopy = False
1385        needToProject = False
1386        needToClip = False
1387
1388        # If this is predictor raster is a raster layer, we need to
1389        # make a copy of it in the file system so that it can be read
1390        # by the R code using the rgdal package.
1391       
1392        d = gp.Describe(rasters[i])
1393        if d.DataType.lower() == u'rasterlayer':
1394            needToCopy = True
1395
1396        # If this is not the template raster, check to see whether it
1397        # needs to be projected or clipped.
1398
1399        if rasters[i] != templateRaster:
1400            cs = gp.CreateSpatialReference_management(d.SpatialReference).split(u';')[0]
1401
1402            if cs.lower() != coordinateSystem.lower():
1403                Logger.Info(_(u'Projecting and clipping %(raster)s. Its coordinate system does not match that of the template raster.') % {u'raster': rasters[i]})
1404                needToProject = True
1405
1406            elif abs(1.0 - cellSize / d.MeanCellWidth) > 0.00001:
1407                Logger.Info(_(u'Projecting and clipping %(raster)s. Its cell size (%(cs1)s) does not match the cell size of the template raster (%(cs2)s).') % {u'raster': rasters[i], u'cs1': repr(d.MeanCellWidth), u'cs2': repr(cellSize)})
1408                needToProject = True
1409
1410            else:
1411                [rasterLeft, rasterBottom, rasterRight, rasterTop] = EnvelopeTypeMetadata.ParseFromArcGISString(d.Extent)
1412                   
1413                deltaLeft = (templateLeft - rasterLeft) / cellSize          # Difference, in cells, between the left edge of the template and this raster
1414                deltaBottom = (templateBottom - rasterBottom) / cellSize    # Difference, in cells, between the bottom edge of the template and this raster
1415                deltaRight = (templateRight - rasterRight) / cellSize       # Difference, in cells, between the right edge of the template and this raster
1416                deltaTop = (templateTop - rasterTop) / cellSize             # Difference, in cells, between the top edge of the template and this raster
1417
1418                if deltaLeft < -0.01:
1419                    Logger.RaiseException(ValueError(_(u'The template raster is too large; its left edge falls (%(e1)s) outside the left edge of the predictor raster %(raster)s (%(e2)s). Please reduce the extent of the template raster such that it does not exceed the extents of any of the predictor rasters.') % {u'raster': rasters[i], u'e1': repr(templateLeft), u'e2': repr(rasterLeft)}))
1420                if deltaRight > 0.01:
1421                    Logger.RaiseException(ValueError(_(u'The template raster is too large; its right edge falls (%(e1)s) outside the right edge of the predictor raster %(raster)s (%(e2)s). Please reduce the extent of the template raster such that it does not exceed the extents of any of the predictor rasters.') % {u'raster': rasters[i], u'e1': repr(templateRight), u'e2': repr(rasterRight)}))
1422                if deltaTop > 0.01:
1423                    Logger.RaiseException(ValueError(_(u'The template raster is too large; its top edge falls (%(e1)s) outside the top edge of the predictor raster %(raster)s (%(e2)s). Please reduce the extent of the template raster such that it does not exceed the extents of any of the predictor rasters.') % {u'raster': rasters[i], u'e1': repr(templateTop), u'e2': repr(rasterTop)}))
1424                if deltaBottom < -0.01:
1425                    Logger.RaiseException(ValueError(_(u'The template raster is too large; its bottom edge falls (%(e1)s) outside the bottom edge of the predictor raster %(raster)s (%(e2)s). Please reduce the extent of the template raster such that it does not exceed the extents of any of the predictor rasters.') % {u'raster': rasters[i], u'e1': repr(templateBottom), u'e2': repr(rasterBottom)}))
1426
1427                if abs(deltaLeft) < 0.01 and abs(deltaBottom) < 0.01 and abs(deltaRight) < 0.01 and abs(deltaTop) < 0.01 and d.Width == templateDescribe.Width and d.Height == templateDescribe.Height:
1428                    Logger.Debug(_(u'%(raster)s has the same coordinate system, cell size, and extent as the template raster.') % {u'raster': rasters[i]})
1429
1430                elif abs(deltaLeft - round(deltaLeft)) < 0.01 and abs(deltaBottom - round(deltaBottom)) < 0.01 and abs(deltaRight - round(deltaRight)) < 0.01 and abs(deltaTop - round(deltaTop)) < 0.01:
1431                    Logger.Info(_(u'Clipping %(raster)s. Although it has the same coordinate system and cell size as the template raster and is aligned on the same grid coordinates, it has a different extent.') % {u'raster': rasters[i]})
1432                    needToClip = True
1433
1434                else:
1435                    Logger.Info(_(u'Projecting and clipping %(raster)s. Although it has the same coordinate system and cell size as the template raster, it is aligned on different grid coordinates and has a different extent.') % {u'raster': rasters[i]})
1436                    needToProject = True
1437
1438        # If this raster does not need to be copied, projected, or
1439        # clipped, go on to the next one.
1440
1441        if not (needToCopy or needToProject or needToClip):
1442            continue
1443
1444        # If we need to project, do it now. This will also take care
1445        # of copying and clipping the raster, if needed.
1446
1447        if needToProject:
1448
1449            # Use the caller's resampling technique, if one was
1450            # provided. Otherwise use BILINEAR if it is a
1451            # floating-point raster, or NEAREST if it is not.
1452           
1453            if resamplingTechniques is not None and vars[i] in variableNames and variableNames.index(vars[i]) < len(resamplingTechniques):
1454                resamplingTechnique = resamplingTechniques[variableNames.index(vars[i])]
1455            elif d.PixelType[0] in ['F', 'f']:
1456                resamplingTechnique = u'BILINEAR'
1457            else:
1458                resamplingTechnique = u'NEAREST'
1459
1460            # Set the geoprocessor's Extent environment
1461            # variable to the output extent. This will
1462            # force ProjectRaster to clip to the desired
1463            # extent.
1464           
1465            oldGPExtent = gp.Extent
1466            if oldGPExtent is None:
1467                oldGPExtent = u''
1468            gp.Extent = extent
1469
1470            try:
1471                # If this is ArcGIS 9.3, set the SnapRaster
1472                # environment variable to the template raster.
1473               
1474                if GeoprocessorManager.GetArcGISMajorVersion() > 9 or GeoprocessorManager.GetArcGISMajorVersion() == 9 and GeoprocessorManager.GetArcGISMinorVersion() >= 3:
1475                    oldSnapRaster = gp.SnapRaster
1476                    if oldSnapRaster is None:
1477                        oldSnapRaster = u''
1478                    gp.SnapRaster = templateRaster
1479
1480                # Project the raster. If this is ArcGIS 9.3 or later,
1481                # the SnapRaster and Extent environment variables will
1482                # ensure the projected cells are snapped and clipped
1483                # as requested by the caller. If it is 9.2, use the
1484                # Registration_Point parameter instead of SnapRaster,
1485                # which did not exist in 9.2 (there was something like
1486                # it, but it did not work correctly).
1487
1488                try:
1489                    tempRaster = os.path.join(tempDir.Path, u'projected%i' % i)
1490                    if GeoprocessorManager.GetArcGISMajorVersion() > 9 or GeoprocessorManager.GetArcGISMajorVersion() == 9 and GeoprocessorManager.GetArcGISMinorVersion() >= 3:
1491                        gp.ProjectRaster_management(rasters[i], tempRaster, coordinateSystem, resamplingTechnique, cellSize)
1492                    else:
1493                        gp.ProjectRaster_management(rasters[i], tempRaster, coordinateSystem, resamplingTechnique, cellSize, None, extent.rsplit(' ', 2)[0])
1494
1495                    # Even though we provided an extent and cell size
1496                    # of an existing raster (the template raster),
1497                    # using values we obtained through ArcGIS APIs,
1498                    # the ProjectRaster tool will not necessarily
1499                    # create a raster that has exactly that extent. It
1500                    # may be one cell larger or smaller in any of the
1501                    # four directions. Different versions of ArcGIS
1502                    # seem to work differently in this respect. It is
1503                    # probably due to whether it uses 32-bit or 64-bit
1504                    # floats, and other various floating-point
1505                    # rounding issues.
1506                    #
1507                    # The prediction code in R requires that all
1508                    # predictor rasters have the same extent (really,
1509                    # the same number of rows and columns). If ArcGIS
1510                    # produced a raster that was too small or large in
1511                    # any dimension, try ProjectRaster again with
1512                    # values that will hopefully yield what we want.
1513
1514                    describeTemp = gp.Describe(tempRaster)
1515                    newEdges = _CheckResultingEdges(tempRaster, describeTemp.Extent, templateLeft, templateBottom, templateRight, templateTop, cellSize)
1516
1517                    if newEdges is not None:
1518                        Logger.Debug(_(u'Reprojecting because the first attempt produced a raster with an extent (%(e1)s) that did not match the extent of the template raster (%(e2)s).') % {u'e1': describeTemp.Extent, u'e2': extent})
1519                        gp.Extent = u' '.join(map(unicode, newEdges))
1520                        tempRaster = os.path.join(tempDir.Path, u'projected%ia' % i)
1521                        if GeoprocessorManager.GetArcGISMajorVersion() > 9 or GeoprocessorManager.GetArcGISMajorVersion() == 9 and GeoprocessorManager.GetArcGISMinorVersion() >= 3:
1522                            gp.ProjectRaster_management(rasters[i], tempRaster, coordinateSystem, resamplingTechnique, cellSize)
1523                        else:
1524                            gp.ProjectRaster_management(rasters[i], tempRaster, coordinateSystem, resamplingTechnique, cellSize, None, extent.rsplit(' ', 2)[0])
1525
1526                        # Check the extent of the new attempt. If we
1527                        # still did not get it, report an error.
1528
1529                        describeTemp = gp.Describe(tempRaster)
1530                        [projectedLeft, projectedBottom, projectedRight, projectedTop] = EnvelopeTypeMetadata.ParseFromArcGISString(describeTemp.Extent)
1531
1532                        if projectedLeft != templateLeft or projectedBottom != templateBottom or projectedRight != templateRight or projectedTop != templateTop:
1533                            Logger.RaiseException(RuntimeError(_(u'This tool was unable to project %(raster)s to the coordinate system of the template raster and obtain a projected raster with an extent that matched that of the template raster. The extent of the projected raster was %(e1)s, while the extent of the template raster is %(e2)s. (The extent values are ordered LEFT, BOTTOM, RIGHT, TOP.) If the extent of the template is larger than the projected raster, it may mean that the template is too large (i.e. that the predictor raster does not cover the entire extent of the template). In that case, try reducing the extent of the template. If that is not the problem, try projecting the predictor raster yourself prior to providing it to this tool, such that its coordinate system, cell size, and extent match that of the template.') % {u'raster': rasters[i], u'e1': describeTemp.Extent, u'e2': extent}))
1534
1535                # If this is ArcGIS 9.3, reset the SnapRaster ArcGIS
1536                # environment variable to what it was before.
1537               
1538                finally:
1539                    try:
1540                        if GeoprocessorManager.GetArcGISMajorVersion() > 9 or GeoprocessorManager.GetArcGISMajorVersion() == 9 and GeoprocessorManager.GetArcGISMinorVersion() >= 3:
1541                            gp.SnapRaster = oldSnapRaster
1542                    except:
1543                        pass
1544
1545            # Reset the Extent ArcGIS environment variable to what it
1546            # was before.
1547           
1548            finally:
1549                try:
1550                    gp.Extent = oldGPExtent
1551                except:
1552                    pass
1553
1554        # If we need to clip (but not project), do it now. This will
1555        # also take care of copying the raster, if needed.
1556               
1557        elif needToClip:
1558
1559            # Clip the raster. If this is ArcGIS 9.2, pass rasters[i]
1560            # for the in_template_dataset parameter, to work around
1561            # the 9.2 bug described in
1562            # http://forums.esri.com/Thread.asp?c=93&f=1729&t=230897&mc=11.
1563           
1564            tempRaster = os.path.join(tempDir.Path, u'clipped%i' % i)
1565            if GeoprocessorManager.GetArcGISMajorVersion() == 9 and GeoprocessorManager.GetArcGISMinorVersion() == 2:
1566                gp.Clip_management(rasters[i], extent, tempRaster, rasters[i])
1567            else:
1568                gp.Clip_management(rasters[i], extent, tempRaster)
1569
1570            # Even though we provided an extent of an existing raster
1571            # (the template raster) that has the same cell size and is
1572            # aligned on the same coordinates, using values we
1573            # obtained through ArcGIS APIs, the Clip tool will not
1574            # necessarily create a raster that has exactly that
1575            # extent. It may be one cell larger or smaller in any of
1576            # the four directions. Different versions of ArcGIS seem
1577            # to work differently in this respect. It is probably due
1578            # to whether it uses 32-bit or 64-bit floats, and other
1579            # various floating-point rounding issues.
1580            #
1581            # The prediction code in R requires that all predictor
1582            # rasters have the same extent (really, the same number of
1583            # rows and columns). If ArcGIS produced a raster that was
1584            # too small or large in any dimension, try Clip again with
1585            # values that will hopefully yield what we want.
1586
1587            describeTemp = gp.Describe(tempRaster)
1588            newEdges = _CheckResultingEdges(tempRaster, describeTemp.Extent, templateLeft, templateBottom, templateRight, templateTop, cellSize)
1589
1590            if newEdges is not None:
1591                Logger.Debug(_(u'Reclipping because the first attempt produced a raster with an extent (%(e1)s) that did not match the extent of the template raster (%(e2)s).') % {u'e1': describeTemp.Extent, u'e2': extent})
1592                tempRaster = os.path.join(tempDir.Path, u'clipped%ia' % i)
1593                if GeoprocessorManager.GetArcGISMajorVersion() == 9 and GeoprocessorManager.GetArcGISMinorVersion() == 2:
1594                    gp.Clip_management(rasters[i], u' '.join(map(unicode, newEdges)), tempRaster, rasters[i])
1595                else:
1596                    gp.Clip_management(rasters[i], u' '.join(map(unicode, newEdges)), tempRaster)
1597
1598        # If we got here but did not need to project or clip, we
1599        # needed to copy. Create a copy of the raster in ArcInfo
1600        # binary grid format.
1601
1602        else:
1603            tempRaster = os.path.join(tempDir.Path, u'copied%i' % i)
1604            ArcGISRaster.CopySilent(rasters[i], tempRaster)
1605
1606        # If we had to project or clip, make absolutely sure that the
1607        # projected or clipped raster has the expected number of rows
1608        # and columns.
1609
1610        if needToProject or needToClip:
1611            d = gp.Describe(tempRaster)
1612            if d.Height != templateDescribe.Height or d.Width != templateDescribe.Width:
1613                Logger.RaiseException(RuntimeError(_(u'Internal error in this tool. Please contact the MGET development team for assistance. Error details: after %(raster)s was projected and/or clipped, the resulting raster had %(cols1)i columns and %(rows1)i rows, while the template raster has %(cols2)i columns and %(rows2)i rows. This tool expected that the resulting raster would have the same number of rows and columns as the template raster. You can work around this problem by projecting and/or clipping the raster yourself, so it has the same coordinate system, cell size, and extent as the template raster.') % {u'raster': rasters[i], u'rows1': d.Height, u'cols1': d.Width, u'rows2': templateDescribe.Height, u'cols2': templateDescribe.Width}))
1614
1615        # Update our dictionary to use the projected/clipped/copied
1616        # raster.
1617
1618        rastersForPredictors[vars[i]] = tempRaster
1619
1620    # Pass the dictionary that maps predictor variables to rasters to
1621    # R, so the R code can read it.
1622
1623    r['rastersForPredictors'] = rastersForPredictors
1624
1625def _CheckResultingEdges(resultingRaster, resultingExtent, templateLeft, templateBottom, templateRight, templateTop, cellSize):
1626    from GeoEco.Types import EnvelopeTypeMetadata
1627
1628    [resultingLeft, resultingBottom, resultingRight, resultingTop] = EnvelopeTypeMetadata.ParseFromArcGISString(resultingExtent)
1629    needToTryAgain = False
1630    newEdges = [None, None, None, None]
1631   
1632    if resultingLeft < templateLeft - cellSize * 0.01:
1633        needToTryAgain = True
1634        newEdges[0] = templateLeft + cellSize * 0.5
1635    elif resultingLeft > templateLeft + cellSize * 0.01:
1636        needToTryAgain = True
1637        newEdges[0] = templateLeft - cellSize * 0.5
1638    else:
1639        newEdges[0] = templateLeft
1640   
1641    if resultingBottom < templateBottom - cellSize * 0.01:
1642        needToTryAgain = True
1643        newEdges[1] = templateBottom + cellSize * 0.5
1644    elif resultingBottom > templateBottom + cellSize * 0.01:
1645        needToTryAgain = True
1646        newEdges[1] = templateBottom - cellSize * 0.5
1647    else:
1648        newEdges[1] = templateBottom
1649   
1650    if resultingRight < templateRight - cellSize * 0.01:
1651        needToTryAgain = True
1652        newEdges[2] = templateRight + cellSize * 0.5
1653    elif resultingRight > templateRight + cellSize * 0.01:
1654        needToTryAgain = True
1655        newEdges[2] = templateRight - cellSize * 0.5
1656    else:
1657        newEdges[2] = templateRight
1658   
1659    if resultingTop < templateTop - cellSize * 0.01:
1660        needToTryAgain = True
1661        newEdges[3] = templateTop + cellSize * 0.5
1662    elif resultingTop > templateTop + cellSize * 0.01:
1663        needToTryAgain = True
1664        newEdges[3] = templateTop - cellSize * 0.5
1665    else:
1666        newEdges[3] = templateTop
1667
1668    if needToTryAgain:
1669        return newEdges
1670    return None
1671
1672
1673###############################################################################
1674# Metadata: module
1675###############################################################################
1676
1677from GeoEco.ArcGIS import ArcGISDependency, ArcGISExtensionDependency
1678from GeoEco.DatabaseAccess.ArcGIS import ArcGIS91SelectCursor
1679from GeoEco.Dependencies import PythonModuleDependency
1680from GeoEco.R import RDependency, RPackageDependency
1681from GeoEco.Metadata import *
1682from GeoEco.Types import *
1683
1684AddModuleMetadata(shortDescription=_(u'Provides methods for modeling and prediction.'))
1685
1686###############################################################################
1687# Metadata: GLM class
1688###############################################################################
1689
1690AddClassMetadata(GLM,
1691    shortDescription=_(u'Provides methods for modeling and prediction using Generalized Linear Models (GLMs).'),
1692    isExposedAsCOMServer=True,
1693    comIID=u'{4A349150-2A25-4B35-851C-A9BFFADF53F3}',
1694    comCLSID=u'{16411D8A-C8CA-40C6-A9D3-F34EA8CD8D6A}')
1695
1696# Public method: GLM.FitToArcGISTable
1697
1698AddMethodMetadata(GLM.FitToArcGISTable,
1699    shortDescription=_(u'Fits a generalized linear model (GLM) to data in an ArcGIS table using the R glm function.'),
1700    isExposedToPythonCallers=True,
1701    isExposedByCOM=True,
1702    isExposedAsArcGISTool=True,
1703    arcGISDisplayName=_(u'Fit GLM'),
1704    arcGISToolCategory=_(u'Statistics\\Model Data\\Generalized Linear Models'),
1705    dependencies=[ArcGISDependency(9, 1), RDependency(2, 6, 0)])
1706
1707AddArgumentMetadata(GLM.FitToArcGISTable, u'cls',
1708    typeMetadata=ClassOrClassInstanceTypeMetadata(cls=GLM),
1709    description=_(u'%s class or an instance of it.') % GLM.__name__)
1710
1711AddArgumentMetadata(GLM.FitToArcGISTable, u'inputTable',
1712    typeMetadata=ArcGISTableViewTypeMetadata(mustExist=True),
1713    description=_(
1714u"""ArcGIS table, table view, feature class, or feature layer
1715containing the data for which the model should be fitted."""),
1716    arcGISDisplayName=_(u'Input table'))
1717
1718AddArgumentMetadata(GLM.FitToArcGISTable, u'outputModelFile',
1719    typeMetadata=FileTypeMetadata(mustBeDifferentThanArguments=[u'inputTable'], deleteIfParameterIsTrue=u'overwriteExisting', createParentDirectories=True),
1720    description=_(
1721u"""Output file to receive the fitted model. The file will not be in a
1722user-readable format. After the model is fitted, you can provide the
1723file to other tools that perform further analysis or visualization of
1724the fitted model.
1725
1726It is suggested, but not required, that you give the file an .Rdata
1727extension."""),
1728    direction=u'Output',
1729    arcGISDisplayName=_(u'Output model file'))
1730
1731_FormulaDescription = _(
1732u"""The formula must be in the format expected by the R glm function::
1733
1734    response ~ term1 + term2 + ... + termN
1735
1736response is the table field that will be modeled as the response
1737variable and the terms are the table fields that will serve as the
1738predictor variables. The field names are case sensitive. If any field
1739used in the formula is NULL for a given row, that row will not be used
1740in fitting the model.
1741
1742For example, if you have a field Presence that indicates the presence
1743or absence of a species (1 or 0) and you want to model it in terms of
1744sampled environmental covariates stored in the SST, ChlDensity, and
1745Depth fields, you would use the formula::
1746
1747    Presence ~ SST + ChlDensity + Depth
1748
1749By default, all terms are treated as continuous variables. To indicate
1750that a term should be treated as a categorical variable, use the
1751factor function. For example, if SubstrateType is an integer code that
1752should be treated as categorical::
1753
1754    Presence ~ SST + ChlDensity + Depth + factor(SubstrateType)
1755
1756The model terms may also use these operators:
1757
1758* The : operator denotes the interaction of variables a and b. For
1759  example: a:b.
1760
1761* The * operator denotes "crossing". For example, a*b is identical to
1762  a+b+a:b.
1763
1764* The ^ operator denotes crossing to the Nth degree. For example,
1765  (a+b+c)^2 is identical to (a+b+c)*(a+b+c) which in turn expands to a
1766  formula containing the main effects for a, b and c together with
1767  their second-order interactions.
1768
1769* The %in% operator indicates that the terms on its left are nested
1770  within those on the right. For example a + b %in% a expands to the
1771  formula a + a:b.
1772
1773* The - operator (minus) removes the specified terms, so that
1774  (a+b+c)^2 - a:b is identical to a + b + c + b:c + a:c. It can also
1775  used to remove the intercept term: y ~ x - 1 is a line through the
1776  origin. A model with no intercept can be also specified as y ~ x + 0
1777  or y ~ 0 + x.
1778
1779While formulae usually involve just variable names, they can also
1780involve arithmetic expressions. The formula log(y) ~ a + log(x) is
1781quite legal. When such arithmetic expressions involve operators which
1782are also used symbolically in model formulae, there can be confusion
1783between arithmetic and symbolic operator use.
1784
1785To avoid this confusion, the function I() can be used to bracket those
1786portions of a model formula where the operators are used in their
1787arithmetic sense. For example, in the formula y ~ a + I(b+c), the term
1788b+c is to be interpreted as the sum of b and c.
1789
1790Please see the topics "glm" and "formula" in the R documentation for
1791more information.""")
1792
1793AddArgumentMetadata(GLM.FitToArcGISTable, u'formula',
1794    typeMetadata=UnicodeStringTypeMetadata(),
1795    description=_(
1796u"""Formula that specifies the table field that is the response
1797variable and the table fields that are the terms of the model.
1798
1799""" + _FormulaDescription),
1800    arcGISDisplayName=_(u'Formula'))
1801
1802AddArgumentMetadata(GLM.FitToArcGISTable, u'family',
1803    typeMetadata=UnicodeStringTypeMetadata(allowedValues=[u'binomial', u'gaussian', u'Gamma', u'inverse.gaussian', u'poisson', u'quasi', u'quasibinomial', u'quasipoisson']),
1804    description=_(
1805u"""Name of the family for the model. The family, together with the
1806link function and variance parameters, describes the error
1807distribution that will be used by the model. The family name is case
1808sensitive."""),
1809    arcGISDisplayName=_(u'Family'))
1810
1811AddArgumentMetadata(GLM.FitToArcGISTable, u'where',
1812    typeMetadata=SQLWhereClauseTypeMetadata(canBeNone=True),
1813    description=ArcGIS91SelectCursor.__init__.__doc__.Obj.GetArgumentByName(u'where').Description,
1814    arcGISParameterDependencies=[u'inputTable'],
1815    arcGISDisplayName=_(u'Where clause'),
1816    arcGISCategory=_(u'Model options'))
1817
1818AddArgumentMetadata(GLM.FitToArcGISTable, u'link',
1819    typeMetadata=UnicodeStringTypeMetadata(allowedValues=[u'1/mu^2', u'cauchit', u'cloglog', u'identity', u'inverse', u'log', u'logit', u'probit', u'sqrt'], canBeNone=True),
1820    description=_(
1821u"""Name of the link function for the model. The allowed link
1822functions depend on the model family:
1823
1824* binomial family - cauchit, cloglog, log, logit, and probit
1825
1826* gaussian family - identity, inverse, and log
1827
1828* Gamma family - identity, inverse, and log
1829
1830* inverse.gaussian family - 1/mu^2, inverse, identity, and log
1831
1832* poisson family - identity, log, and sqrt
1833
1834* quasi family - 1/mu^2, cloglog, identity, inverse, log, logit, probit, and sqrt
1835
1836* quasibinomial family - cauchit, cloglog, log, logit, and probit
1837
1838* quasipoisson family - identity, log, and sqrt
1839
1840If a link function is not specified, the one that is used by default
1841also depends on the model family:
1842
1843* binomial family - logit
1844
1845* gaussian family - identity
1846
1847* Gamma family - inverse
1848
1849* inverse.gaussian family - 1/mu^2
1850
1851* poisson family - log
1852
1853* quasi family - identity
1854
1855* quasibinomial family - logit
1856
1857* quasipoisson family - log
1858"""),
1859    arcGISDisplayName=_(u'Link function'),
1860    arcGISCategory=_(u'Model options'))
1861
1862AddArgumentMetadata(GLM.FitToArcGISTable, u'variance',
1863    typeMetadata=UnicodeStringTypeMetadata(allowedValues=[u'constant', u'mu(1-mu)', u'mu', u'mu^2', u'mu^3'], canBeNone=True),
1864    description=_(
1865u"""Variance function to use when the "quasi" family is used. For all
1866families other than quasi, this parameter is ignored because the
1867variance function is determined by the family. If the quasi family is
1868used but a variance function is not specified, the "constant" variance
1869function will be used by default."""),
1870    arcGISDisplayName=_(u'Variance function for quasi family'),
1871    arcGISCategory=_(u'Model options'))
1872
1873AddArgumentMetadata(GLM.FitToArcGISTable, u'xColumnName',
1874    typeMetadata=ArcGISFieldTypeMetadata(canBeNone=True),
1875    description=_(
1876u"""Name to use in the formula for the X coordinates of point
1877features. If the input table is a point feature class or layer, the X
1878coordinates will be extracted from the points and be accessible in the
1879formula using the name provided for this parameter."""),
1880    arcGISDisplayName=_(u'Name to use for X coordinates of points'),
1881    arcGISCategory=_(u'Point feature options'))
1882
1883AddArgumentMetadata(GLM.FitToArcGISTable, u'yColumnName',
1884    typeMetadata=ArcGISFieldTypeMetadata(canBeNone=True, mustBeDifferentThanArguments=[u'xColumnName']),
1885    description=_(
1886u"""Name to use in the formula for the Y coordinates of point
1887features. If the input table is a point feature class or layer, the Y
1888coordinates will be extracted from the points and be accessible in the
1889formula using the name provided for this parameter."""),
1890    arcGISDisplayName=_(u'Name to use for Y coordinates of points'),
1891    arcGISCategory=_(u'Point feature options'))
1892
1893AddArgumentMetadata(GLM.FitToArcGISTable, u'zColumnName',
1894    typeMetadata=ArcGISFieldTypeMetadata(canBeNone=True, mustBeDifferentThanArguments=[u'xColumnName', u'yColumnName']),
1895    description=_(
1896u"""Name to use in the formula for the Z coordinates of point
1897features. If the input table is a point feature class or layer that
1898has Z coordinates, the Z coordinates will be extracted from the points
1899and be accessible in the formula using the name provided for this
1900parameter."""),
1901    arcGISDisplayName=_(u'Name to use for Z coordinates of points'),
1902    arcGISCategory=_(u'Point feature options'))
1903
1904AddArgumentMetadata(GLM.FitToArcGISTable, u'mColumnName',
1905    typeMetadata=ArcGISFieldTypeMetadata(canBeNone=True, mustBeDifferentThanArguments=[u'xColumnName', u'yColumnName', u'zColumnName']),
1906    description=_(
1907u"""Name to use in the formula for the measure values of point
1908features. If the input table is a point feature class or layer that
1909has measure values, the measure values will be extracted from the
1910points and be accessible in the formula using the name provided for this
1911parameter."""),
1912    arcGISDisplayName=_(u'Name to use for M values of points'),
1913    arcGISCategory=_(u'Point feature options'))
1914
1915AddArgumentMetadata(GLM.FitToArcGISTable, u'selectionMethod',
1916    typeMetadata=UnicodeStringTypeMetadata(canBeNone=True, allowedValues=[u'Stepwise backward'], makeLowercase=True),
1917    description=_(
1918u"""Automated model selection method to execute after fitting the
1919model using the original formula you specified. If automated model
1920selection is performed, the output model file will contain the final
1921model that was selected, and the output summary and plots will be for
1922this model.
1923
1924The selection methods currently available are:
1925
1926* Stepwise backward - backward stepwise model selection by AIC.
1927  The basic idea of this method is to keep dropping model terms from
1928  the original formula so long as a better model fit can be achieved.
1929  Model selection occurs in a loop. First, the Akaike Information
1930  Criterion (AIC) is computed for the current model. Then, N candidate
1931  model are generated, where N equals the number of terms in the
1932  current model. Each candidate model drops a different single term
1933  from the model. The AIC is computed for each candidate model, and
1934  the candidate model that provides the greatest reduction in AIC
1935  becomes the new current model. The loop proceeds until the AIC can
1936  no longer be reduced by dropping terms.
1937
1938Additional methods may be implemented in a future release of this
1939tool.
1940
1941Automated model selection will increase the run time and possibly the
1942memory utilization of this tool. The amount of increase depends on the
1943complexity of your model and the amount of data in your table.
1944
1945Model selection can be a difficult task. You should never blindly rely
1946on an automated selection method that you do not fully understand. For
1947the best results, always consult a statistician."""),
1948    arcGISDisplayName=_(u'Automated model selection method'),
1949    arcGISCategory=_(u'Automated model selection options'))
1950
1951AddArgumentMetadata(GLM.FitToArcGISTable, u'logSelectionDetails',
1952    typeMetadata=BooleanTypeMetadata(),
1953    description=_(
1954u"""If True, detailed information will be logged as automated model
1955selection proceeds. You can use this to monitor the progress of
1956automated model selection and diagnose how the selection method
1957arrived at the final model.
1958
1959If False, no detailed information will be logged during automated
1960model selection."""),
1961    arcGISDisplayName=_(u'Log model selection details'),
1962    arcGISCategory=_(u'Automated model selection options'))
1963
1964AddArgumentMetadata(GLM.FitToArcGISTable, u'writeSummaryFile',
1965    typeMetadata=BooleanTypeMetadata(),
1966    description=_(
1967u"""If True, this tool will write summary information about the fitted
1968model to a text file. (This is the same information that the tool
1969outputs as log messages.) If automated model selection is performed,
1970the output text file will contain summary information for both the
1971initial model and the final model.
1972
1973The file will have the name X_summary.txt, where X is the name of the
1974output model file, minus any extension."""),
1975    arcGISDisplayName=_(u'Write model summary file'),
1976    arcGISCategory=_(u'Additional output options'))
1977
1978AddArgumentMetadata(GLM.FitToArcGISTable, u'writeDiagnosticPlots',
1979    typeMetadata=BooleanTypeMetadata(),
1980    description=_(
1981u"""If True, this tool will write six diagnostic plots for the fitted
1982model. If automated model selection is performed, plots will be
1983generated only for the final model.
1984
1985Each plot will be written to a PNG file:
1986
1987* X_resid_fit.png - residuals vs. fitted values
1988
1989* X_qq.png - normal Q-Q plot
1990
1991* X_scale_loc.png - scale-location plot of sqrt(abs(residuals)) vs.
1992  fitted values
1993
1994* X_cooks.png - Cook's distances vs. row labels
1995
1996* X_resid_lev.png - residuals vs. leverages
1997
1998* X_cooks_lev.png - Cook's distances vs. leverage/(1-leverage)
1999
2000In the file names above, X is the name of the output model file, minus
2001any extension. Please see the R documentation for the plot.lm function
2002for more information about the plots."""),
2003    arcGISDisplayName=_(u'Write diagnostic plots'),
2004    arcGISCategory=_(u'Additional output options'))
2005
2006AddArgumentMetadata(GLM.FitToArcGISTable, u'numDiagLabels',
2007    typeMetadata=IntegerTypeMetadata(minValue=0),
2008    description=_(
2009u"""Number of extreme points to label in diagnostic plots.
2010
2011This parameter has no effect when diagnostic plots are not
2012written."""),
2013    arcGISParameterDependencies=[u'inputTable'],
2014    arcGISDisplayName=_(u'Number of extreme points to label in diagnostic plots'),
2015    arcGISCategory=_(u'Additional output options'))
2016
2017AddArgumentMetadata(GLM.FitToArcGISTable, u'diagLabelField',
2018    typeMetadata=ArcGISFieldTypeMetadata(mustExist=True, canBeNone=True),
2019    description=_(
2020u"""Table field to for labeling extreme points in diagnostic plots.
2021
2022Usually, you choose a field that uniquely identifies the table row. If
2023you do not specify a field, R will assign labels for you, and it may
2024be difficult to determine which row a given point corresponds to.
2025
2026This parameter has no effect when diagnostic plots are not
2027written."""),
2028    arcGISParameterDependencies=[u'inputTable'],
2029    arcGISDisplayName=_(u'Field for labeling extreme points in diagnostic plots'),
2030    arcGISCategory=_(u'Additional output options'))
2031
2032AddArgumentMetadata(GLM.FitToArcGISTable, u'writeTermPlots',
2033    typeMetadata=BooleanTypeMetadata(),
2034    description=_(
2035u"""If True, this tool will write a partial plot for each term in the
2036fitted model's formula. If automated model selection is performed,
2037plots will be generated only for the final model.
2038
2039Each plot will be written to a PNG file named X_termY.png, where X is
2040the name of the output model file, minus any extension, and Y is the
2041term number in the model (minimum of two digits). For example, if the
2042model had this formula::
2043
2044    Presence ~ SST + log(ChlDensity) + Depth
2045
2046The output files would be X_term01.png, X_term02.png, and
2047X_term03.png, which correspond to the SST, log(ChlDensity), and Depth
2048terms respectively."""),
2049    arcGISDisplayName=_(u'Write plots of model terms'),
2050    arcGISCategory=_(u'Additional output options'))
2051
2052AddArgumentMetadata(GLM.FitToArcGISTable, u'residuals',
2053    typeMetadata=BooleanTypeMetadata(),
2054    description=_(
2055u"""If True, residuals will be plotted in model term plots of
2056continuous terms (i.e. terms that are not factors).
2057
2058Residuals are calculated using the method employed by the mgcv
2059package. That is, they are "the working residuals from the IRLS
2060iteration weighted by the IRLS weights. [These] are the residuals that
2061would be obtained by dropping the term concerned from the model, while
2062leaving all other estimates fixed (i.e. the estimates for the term
2063plus the residuals)." """),
2064    arcGISDisplayName=_(u'Plot residuals'),
2065    arcGISCategory=_(u'Additional output options'))
2066
2067AddArgumentMetadata(GLM.FitToArcGISTable, u'xAxis',
2068    typeMetadata=BooleanTypeMetadata(),
2069    description=_(
2070u"""If True, a straight line will be drawn at y=0 in model term plots
2071of continuous terms (i.e. terms that are not factors)."""),
2072    arcGISDisplayName=_(u'Plot x-axis'),
2073    arcGISCategory=_(u'Additional output options'))
2074
2075AddArgumentMetadata(GLM.FitToArcGISTable, u'commonScale',
2076    typeMetadata=BooleanTypeMetadata(),
2077    description=_(
2078u"""If True, model term plots of continuous terms (i.e. terms that are
2079not factors) will have the same y-axis scale, allowing the relative
2080importance of different terms to be ascertained by comparing the plots
2081side by side."""),
2082    arcGISDisplayName=_(u'Use common y-axis scale'),
2083    arcGISCategory=_(u'Additional output options'))
2084
2085AddArgumentMetadata(GLM.FitToArcGISTable, u'plotFileFormat',
2086    typeMetadata=UnicodeStringTypeMetadata(allowedValues=[u'emf', u'png'], makeLowercase=True),
2087    description=_(
2088u"""Plot file format, one of:
2089
2090* emf - Windows enhanced metafile (EMF) format. This is a vector
2091  format that may be printed and resized without any pixelation and is
2092  therefore suitable for use in printable documents that recognize
2093  this format (e.g. Microsoft Word or Microsoft Visio).
2094
2095* png - Portable network graphics (PNG) format. This is a compressed,
2096  lossless, highly portable raster format suitable for use in web
2097  pages or other locations where a raster format is desired. Most
2098  scientific journals accept PNG; they typically request that files
2099  have a resolution of at least 1000 DPI.
2100"""),
2101    arcGISDisplayName=_(u'Plot file format'),
2102    arcGISCategory=_(u'Additional output options'))
2103
2104AddArgumentMetadata(GLM.FitToArcGISTable, u'res',
2105    typeMetadata=FloatTypeMetadata(mustBeGreaterThan=0.),
2106    description=_(
2107u"""PNG plot file resolution, in dots per inch (DPI). The default is
2108set to a high value (1000) because this is the minimum resolution
2109typically required by scientific journals that accept figures in PNG
2110format.
2111
2112This parameter is ignored for EMF format because it is a vector
2113format."""),
2114    arcGISDisplayName=_(u'Plot resolution, in DPI'),
2115    arcGISCategory=_(u'Additional output options'))
2116
2117AddArgumentMetadata(GLM.FitToArcGISTable, u'width',
2118    typeMetadata=FloatTypeMetadata(mustBeGreaterThan=0.),
2119    description=_(
2120u"""Plot file width in thousandths of inches (for EMF format; e.g. the
2121value 3000 is 3 inches) or pixels (for PNG format)."""),
2122    arcGISDisplayName=_(u'Plot width'),
2123    arcGISCategory=_(u'Additional output options'))
2124
2125AddArgumentMetadata(GLM.FitToArcGISTable, u'height',
2126    typeMetadata=FloatTypeMetadata(mustBeGreaterThan=0.),
2127    description=_(
2128u"""Plot file height in thousandths of inches (for EMF format; e.g. the
2129value 3000 is 3 inches) or pixels (for PNG format)."""),
2130    arcGISDisplayName=_(u'Plot height'),
2131    arcGISCategory=_(u'Additional output options'))
2132
2133AddArgumentMetadata(GLM.FitToArcGISTable, u'pointSize',
2134    typeMetadata=FloatTypeMetadata(minValue=1.0),
2135    description=_(
2136u"""The default pointsize of plotted text."""),
2137    arcGISDisplayName=_(u'Default pointsize of plotted text'),
2138    arcGISCategory=_(u'Additional output options'))
2139
2140AddArgumentMetadata(GLM.FitToArcGISTable, u'bg',
2141    typeMetadata=UnicodeStringTypeMetadata(),
2142    description=_(
2143u"""PNG plot file background color. The color must be a valid name in
2144R's color palette, or "transparent" if there is no background color.
2145This parameter is ignored if the plot format file is EMF."""),
2146    arcGISDisplayName=_(u'Plot background color'),
2147    arcGISCategory=_(u'Additional output options'))
2148
2149AddArgumentMetadata(GLM.FitToArcGISTable, u'overwriteExisting',
2150    typeMetadata=BooleanTypeMetadata(),
2151    description=_(
2152u"""If True, the output files will be overwritten, if it exists. If
2153False, a ValueError will be raised if an output file exists."""),
2154    initializeToArcGISGeoprocessorVariable=u'OverwriteOutput')
2155
2156# Public method: GLM.PredictFromArcGISRasters
2157
2158AddMethodMetadata(GLM.PredictFromArcGISRasters,
2159    shortDescription=_(u'Using a fitted generalized linear model (GLM), this tool creates a raster representing the response variable predicted from ArcGIS rasters representing the predictor variables.'),
2160    isExposedToPythonCallers=True,
2161    isExposedByCOM=True,
2162    isExposedAsArcGISTool=True,
2163    arcGISDisplayName=_(u'Predict GLM From Rasters'),
2164    arcGISToolCategory=_(u'Statistics\\Model Data\\Generalized Linear Models'),
2165    dependencies=[ArcGISDependency(9, 2), RDependency(2, 6, 0), RPackageDependency(u'rgdal')])
2166
2167CopyArgumentMetadata(GLM.FitToArcGISTable, u'cls', GLM.PredictFromArcGISRasters, u'cls')
2168
2169AddArgumentMetadata(GLM.PredictFromArcGISRasters, u'inputModelFile',
2170    typeMetadata=FileTypeMetadata(mustExist=True),
2171    description=_(
2172u"""File that contains the fitted model, generated by the Fit GLM
2173tool."""),
2174    arcGISDisplayName=_(u'Input model file'))
2175
2176AddArgumentMetadata(GLM.PredictFromArcGISRasters, u'inputPredictorRasters',
2177    typeMetadata=ListTypeMetadata(ArcGISRasterLayerTypeMetadata(mustExist=True), minLength=1),
2178    description=_(
2179u"""Rasters that represent the predictor variables used in the model.
2180
2181This list must include one raster for each predictor variable that
2182appears in the model's formula. The list provided for the Model
2183Variable Names for Predictor Rasters parameter specifies the predictor
2184variable that each raster represents.
2185
2186For example, if your model used the formula::
2187
2188    Presence ~ SST + Chl + Depth
2189
2190You must specify three rasters, one for each predictor variable. If
2191you want to predict Presence for the month of January 1999, you might
2192use a static bathymetry raster and SST and chlorophyll rasters for
2193that month::
2194
2195    C:\\Data\\etopo2v2
2196    C:\\Data\\SST\\sst199901
2197    C:\\Data\\Chlorophyll\\chl199901
2198
2199Then, for the Model Variable Names for Predictor Rasters parameter,
2200you would specify the predictor variable that each raster represents::
2201
2202    Depth
2203    SST
2204    Chl
2205
2206You can specify extra predictors that do not appear in the formula.
2207They will be ignored. In the example above, you might include the
2208variable SSH (sea surface height) and the raster
2209C:\\\\Data\\\\SSH\\\\ssh199901. Because this variable does not appear
2210in the model formula, it will be ignored.
2211
2212All of the predictor rasters must have a coordinate system defined.
2213They must all share the same datum. If you also specify the Template
2214Raster For Outputs parameter, they must have the datum of the template
2215raster. If you do not specify that parameter, the first predictor
2216raster will be used as the template raster.
2217
2218The predictor rasters need not have the same coordinate system,
2219extent, or cell size as the template raster. If these characteristics
2220differ from the template raster, this tool will automatically project
2221and clip the predictor rasters to conform to the template raster. The
2222predictor rasters must be able to be projected to the template
2223raster's coordinate system without requiring the specification of a
2224geographic transformation. An error will be reported if a geographic
2225transformation must be specified. In this case, you must project the
2226predictor rasters manually before providing them to this tool.
2227
2228By default, floating point predictor rasters will be projected using
2229bilinear interpolation and integer predictor rasters will be projected
2230using nearest neighbor assignment. Bilinear interpolation is
2231appropriate for continuous variables such as sea surface temperature,
2232while nearest neighbor is appropriate for categorical variables such
2233as bottom substrate type. If these defaults are not appropriate for
2234your predictors, specify different algorithms using the Resampling
2235Techniques for Predictor Rasters parameter."""),
2236    arcGISDisplayName=_(u'Input predictor rasters'))
2237
2238AddArgumentMetadata(GLM.PredictFromArcGISRasters, u'variableNames',
2239    typeMetadata=ListTypeMetadata(UnicodeStringTypeMetadata(), mustBeSameLengthAsArgument=u'inputPredictorRasters'),
2240    description=_(
2241u"""Model variable names for the input predictor rasters. The variable
2242names are case sensitive. Please see the documentation for the Input
2243Predictor Rasters parameter for more information."""),
2244    arcGISDisplayName=_(u'Model variable names for predictor rasters'))
2245
2246AddArgumentMetadata(GLM.PredictFromArcGISRasters, u'outputResponseRaster',
2247    typeMetadata=ArcGISRasterTypeMetadata(mustBeDifferentThanArguments=[u'inputModelFile'], deleteIfParameterIsTrue=u'overwriteExisting', createParentDirectories=True),
2248    description=_(
2249u"""Output raster representing the response predicted by the model
2250from the input predictor rasters.
2251
2252If any pixel is NoData in any predictor raster (after it has been
2253projected and clipped as needed), the pixel will be NoData in the
2254output raster as well. This is because the response variable cannot be
2255predicted by the model if any of the predictor variables are
2256missing."""),
2257    direction=u'Output',
2258    arcGISDisplayName=_(u'Output response raster'))
2259
2260AddArgumentMetadata(GLM.PredictFromArcGISRasters, u'templateRaster',
2261    typeMetadata=ArcGISRasterLayerTypeMetadata(mustExist=True, mustBeDifferentThanArguments=[u'inputModelFile', u'outputResponseRaster'], canBeNone=True),
2262    description=_(
2263u"""Template raster that defines the coordinate system, extent, and
2264cell size of the output rasters produced by this tool.
2265
2266If you do not specify a template raster, the first predictor raster
2267will be used instead.
2268
2269All predictor rasters must share the same datum as the template
2270raster. The predictor rasters need not have the same coordinate
2271system, extent, or cell size as the template raster. If these
2272characteristics differ from the template raster, this tool will
2273automatically project and clip the predictor rasters to conform to the
2274template raster. Please see the documentation for the Input Predictor
2275Rasters parameter for more information."""),
2276    arcGISDisplayName=_(u'Template raster for outputs'))
2277
2278AddArgumentMetadata(GLM.PredictFromArcGISRasters, u'resamplingTechniques',
2279    typeMetadata=ListTypeMetadata(elementType=UnicodeStringTypeMetadata(allowedValues=[u'NEAREST', u'BILINEAR', u'CUBIC'], makeLowercase=True), canBeNone=True),
2280    description=_(
2281u"""Resampling technique to be used for each input predictor raster if
2282that raster needs to be automatically projected, as described in the
2283documentation for the Input Predictor Rasters parameter.
2284
2285The available resampling techniques are:
2286
2287* NEAREST - Nearest neighbor assignment
2288
2289* BILINEAR - Bilinear interpolation
2290
2291* CUBIC - Cubic convolution
2292
2293If you do not specify a resampling technique for a given input
2294predictor raster, BILINEAR will be used.
2295
2296The ArcGIS 9.2 documentation provides the following information about the
2297resampling techniques:
2298
2299* The NEAREST option, which performs a nearest neighbor assignment, is
2300  the fastest of the interpolation methods. It is primarily used for
2301  categorical data, such as a land use classification, because it will
2302  not change the cell values. Do not use NEAREST for continuous data,
2303  such as elevation surfaces.
2304
2305* The BILINEAR option, bilinear interpolation, determines the new
2306  value of a cell based on a weighted distance average of surrounding
2307  cells. The CUBIC option, cubic convolution, determines the new cell
2308  value by fitting a smooth curve through the surrounding points.
2309  These are most appropriate for continuous data and may cause some
2310  smoothing; also, cubic convolution may result in the output raster
2311  containing values outside the range of the input raster. It is not
2312  recommended that BILINEAR or CUBIC be used with categorical data
2313  because the cell values may be altered.
2314"""),
2315    arcGISDisplayName=_(u'Resampling techniques'),
2316    arcGISCategory=_(u'Prediction options'))
2317
2318AddArgumentMetadata(GLM.PredictFromArcGISRasters, u'ignoreOutOfRangeValues',
2319    typeMetadata=BooleanTypeMetadata(),
2320    description=_(
2321u"""If True, predictions will not be made where the values of
2322predictor rasters fall outside of the range of values used to fit the
2323model. These cells will appear as NoData in the output rasters.
2324
2325If False, predictions will be attempted regardless of what the
2326predictor values are.
2327
2328This parameter is set to True by default because many believe that it
2329is a bad practice to extrapolate a model beyond the range of values
2330used to fit it. But if your model provides a very strong fit, or you
2331have some other reason to believe it is very robust, you can set this
2332parameter to False to perform the extrapolation."""),
2333    arcGISDisplayName=_(u'Ignore raster values outside the modeled range'),
2334    arcGISCategory=_(u'Prediction options'))
2335
2336AddArgumentMetadata(GLM.PredictFromArcGISRasters, u'outputErrorRaster',
2337    typeMetadata=ArcGISRasterTypeMetadata(canBeNone=True, mustBeDifferentThanArguments=[u'inputModelFile', u'outputResponseRaster'], deleteIfParameterIsTrue=u'overwriteExisting', createParentDirectories=True),
2338    description=_(
2339u"""Output raster representing the standard errors of the predicted
2340response.
2341
2342If any pixel is NoData in any predictor raster (after it has been
2343projected and clipped as needed), it will be NoData in the output
2344raster as well. This is because the response variable cannot be
2345predicted by the model if any of the predictor variables are
2346missing."""),
2347    direction=u'Output',
2348    arcGISDisplayName=_(u'Output standard error raster'),
2349    arcGISCategory=_(u'Additional output options'))
2350
2351AddArgumentMetadata(GLM.PredictFromArcGISRasters, u'outputBinaryResponseRaster',
2352    typeMetadata=ArcGISRasterTypeMetadata(canBeNone=True, mustBeDifferentThanArguments=[u'inputModelFile', u'outputResponseRaster', u'outputErrorRaster'], deleteIfParameterIsTrue=u'overwriteExisting', createParentDirectories=True),
2353    description=_(
2354u"""Output raster with the predicted response classified into two
2355possible values, 0 or 1, according to the cutoff parameter (which you
2356must also provide). Predicted response values less than the cutoff
2357will be classified as 0; values greater than or equal to the cutoff
2358will be classified as 1.
2359
2360If any pixel is NoData in any predictor raster (after it has been
2361projected and clipped as needed), it will be NoData in the output
2362raster as well. This is because the response variable cannot be
2363predicted by the model if any of the predictor variables are
2364missing.
2365
2366You can examine the performance of your model for different cutoff
2367values using the Plot ROC of Binary Classification Model tool or the
2368Plot Performance of Binary Classification Model tool."""),
2369    direction=u'Output',
2370    arcGISDisplayName=_(u'Output binary response raster'),
2371    arcGISCategory=_(u'Additional output options'),
2372    dependencies=[ArcGISExtensionDependency(u'spatial')])
2373
2374AddArgumentMetadata(GLM.PredictFromArcGISRasters, u'cutoff',
2375    typeMetadata=FloatTypeMetadata(canBeNone=True),
2376    description=_(
2377u"""Cutoff for classifying the continous predicted response into a
2378binary response. See the documentation for the Output Binary Response
2379Raster for more information."""),
2380    arcGISDisplayName=_(u'Cutoff'),
2381    arcGISCategory=_(u'Additional output options'))
2382
2383AddArgumentMetadata(GLM.PredictFromArcGISRasters, u'buildPyramids',
2384    typeMetadata=BooleanTypeMetadata(),
2385    description=_(
2386u"""If True, pyramids will be built for the output rasters, which will
2387improve their display speed in the ArcGIS user interface."""),
2388    arcGISDisplayName=_(u'Build pyramids'),
2389    arcGISCategory=_(u'Additional output options'))
2390
2391AddArgumentMetadata(GLM.PredictFromArcGISRasters, u'overwriteExisting',
2392    typeMetadata=BooleanTypeMetadata(),
2393    description=_(
2394u"""If True, the output rasters will be overwritten, if they exist.
2395If False, a ValueError will be raised if either output raster
2396exists."""),
2397    initializeToArcGISGeoprocessorVariable=u'OverwriteOutput')
2398
2399###############################################################################
2400# Metadata: GAM class
2401###############################################################################
2402
2403AddClassMetadata(GAM,
2404    shortDescription=_(u'Provides methods for modeling and prediction using Generalized Additive Models (GAMs).'),
2405    isExposedAsCOMServer=True,
2406    comIID=u'{515D570A-CF61-463A-8250-C65B67BEE0C7}',
2407    comCLSID=u'{D875D98A-6E4D-486B-B681-225FF40A0D7E}')
2408
2409# Public method: GAM.FitToArcGISTable
2410
2411AddMethodMetadata(GAM.FitToArcGISTable,
2412    shortDescription=_(u'Fits a generalized additive model (GAM) to data in an ArcGIS table.'),
2413    isExposedToPythonCallers=True,
2414    isExposedByCOM=True,
2415    isExposedAsArcGISTool=True,
2416    arcGISDisplayName=_(u'Fit GAM'),
2417    arcGISToolCategory=_(u'Statistics\\Model Data\\Generalized Additive Models'),
2418    dependencies=[ArcGISDependency(9, 1), RDependency(2, 6, 0)])
2419
2420AddArgumentMetadata(GAM.FitToArcGISTable, u'cls',
2421    typeMetadata=ClassOrClassInstanceTypeMetadata(cls=GAM),
2422    description=_(u'%s class or an instance of it.') % GAM.__name__)
2423
2424CopyArgumentMetadata(GLM.FitToArcGISTable, u'inputTable', GAM.FitToArcGISTable, u'inputTable')
2425CopyArgumentMetadata(GLM.FitToArcGISTable, u'outputModelFile', GAM.FitToArcGISTable, u'outputModelFile')
2426
2427AddArgumentMetadata(GAM.FitToArcGISTable, u'formula',
2428    typeMetadata=GLM.FitToArcGISTable.__doc__.Obj.GetArgumentByName(u'formula').Type,
2429    description=GLM.FitToArcGISTable.__doc__.Obj.GetArgumentByName(u'formula').Description + _(
2430u"""
2431
2432A principal advantage of GAMs over GLMs is the ability to fit a model
2433using smoothing functions. The most common smoothing function is
2434called s::
2435
2436    Presence ~ SST + s(ChlDensity) + Depth
2437
2438The available smoothing functions and their meanings and parameters
2439depend on which R package is used to fit the GAM:
2440
2441* The R gam package provides two smoothing functions: s for spline
2442  smooths and lo for loess smooths. For more information, please see
2443  the documentation for these functions in the
2444  `gam package documentation <http://cran.r-project.org/web/packages/gam/gam.pdf>`_.
2445
2446* The R mgcv package also provides two smoothing functions. The s
2447  function fits a thin plate regression spline. The te function fits a
2448  tensor product smooth using a cubic regression spline. Both
2449  functions allow you to specify different spline types, such as cubic
2450  regression splines, cubic regression splines with shrinkage, cyclic
2451  cubic regression splines, and thin plate regression splines with
2452  shrinkage. For more information, please see the documentation for
2453  these functions in the
2454  `mgcv package documentation <http://cran.r-project.org/web/packages/mgcv/mgcv.pdf>`_."""),
2455    arcGISDisplayName=GLM.FitToArcGISTable.__doc__.Obj.GetArgumentByName(u'formula').ArcGISDisplayName)
2456
2457AddArgumentMetadata(GAM.FitToArcGISTable, u'family',
2458    typeMetadata=UnicodeStringTypeMetadata(allowedValues=[u'binomial', u'gaussian', u'Gamma', u'inverse.gaussian', u'poisson', u'negbin', u'quasi', u'quasibinomial', u'quasipoisson']),
2459    description=_(
2460u"""Name of the family for the model. The family, together with the
2461link function and variance parameters, describes the error
2462distribution that will be used by the model. The family name is case
2463sensitive.
2464
2465The negbin family is only available when the mgcv R package is used to
2466fit the model. When negbin is used, the theta parameter must also be
2467specified (under Model Options)."""),
2468    arcGISDisplayName=_(u'Family'))
2469
2470AddArgumentMetadata(GAM.FitToArcGISTable, u'rPackage',
2471    typeMetadata=UnicodeStringTypeMetadata(allowedValues=[u'gam', u'mgcv']),
2472    description=_(
2473u"""R package to use when fitting the GAM.
2474
2475* mgcv - the GAM package written by Simon Wood, an expert in
2476  statistical modeling with smooth functions. This package allows
2477  terms to be smoothed with a variety of spline types. Stepwise model
2478  selection is not used with these models because terms can be fitted
2479  with "shrinkage" splines, allowing them to drop out of the model
2480  during the fitting process. Because this package is under active
2481  development and maintenance and provides more features than the gam
2482  package (below), it is the default.
2483
2484* gam - the GAM package written by Trevor Hastie, one of the inventors
2485  of GAMs. This package allows terms to be smoothed with splines or
2486  loess functions and models to be optimized using stepwise model
2487  selection. One disadvantage is that the Predict GAM From ArcGIS
2488  Rasters tool cannot produce standard error rasters for models fitted
2489  with this package.
2490
2491For more information on the two packages, please see the
2492`gam package documentation <http://cran.r-project.org/web/packages/gam/gam.pdf>`_
2493and the
2494`mgcv package documentation <http://cran.r-project.org/web/packages/mgcv/mgcv.pdf>`_."""),
2495    arcGISDisplayName=_(u'R package to use'))
2496
2497CopyArgumentMetadata(GLM.FitToArcGISTable, u'where', GAM.FitToArcGISTable, u'where')
2498
2499AddArgumentMetadata(GAM.FitToArcGISTable, u'link',
2500    typeMetadata=UnicodeStringTypeMetadata(allowedValues=[u'1/mu^2', u'cauchit', u'cloglog', u'identity', u'inverse', u'log', u'logit', u'probit', u'sqrt'], canBeNone=True),
2501    description=_(
2502u"""Name of the link function for the model. The allowed link
2503functions depend on the model family:
2504
2505* binomial family - cauchit, cloglog, log, logit, and probit
2506
2507* gaussian family - identity, inverse, and log
2508
2509* Gamma family - identity, inverse, and log
2510
2511* inverse.gaussian family - 1/mu^2, inverse, identity, and log
2512
2513* negbin family - identity, log, and sqrt
2514
2515* poisson family - identity, log, and sqrt
2516
2517* quasi family - 1/mu^2, cloglog, identity, inverse, log, logit, probit, and sqrt
2518
2519* quasibinomial family - cauchit, cloglog, log, logit, and probit
2520
2521* quasipoisson family - identity, log, and sqrt
2522
2523If a link function is not specified, the one that is used by default
2524also depends on the model family:
2525
2526* binomial family - logit
2527
2528* gaussian family - identity
2529
2530* Gamma family - inverse
2531
2532* inverse.gaussian family - 1/mu^2
2533
2534* negbin family - log
2535
2536* poisson family - log
2537
2538* quasi family - identity
2539
2540* quasibinomial family - logit
2541
2542* quasipoisson family - log
2543"""),
2544    arcGISDisplayName=_(u'Link function'),
2545    arcGISCategory=_(u'Model options'))
2546
2547CopyArgumentMetadata(GLM.FitToArcGISTable, u'variance', GAM.FitToArcGISTable, u'variance')
2548
2549AddArgumentMetadata(GAM.FitToArcGISTable, u'theta',
2550    typeMetadata=UnicodeStringTypeMetadata(canBeNone=True),
2551    description=_(
2552u"""Theta parameter for the negbin model family.
2553
2554Theta specifies the amount of dispersion in the data and is required
2555when fitting a negbin model (it is ignored for all other model
2556families). It may be specified in several different ways:
2557
2558* A single number - this number defines the known fixed value of
2559  theta. Use this alternative when you have determined theta yourself.
2560
2561* Two numbers - these numbers define the range over which mgcv
2562  will search for the optimal theta. Use this alternative when you
2563  have not determined theta yourself. Use the following syntax,
2564  replacing A and B with the two numbers::
2565
2566      c(A,B)
2567
2568* A list of three or more numbers - these numbers define the possible
2569  values of theta; mgcv will select the optimal value from the list.
2570  Use this alternative when you only want mgcv to consider specific
2571  values. Use the following syntax, replacing, A, B, and so on with
2572  your numbers::
2573
2574      c(A,B,...)
2575
2576Please see the article titled "GAM negative binomial family" in the
2577`mgcv package documentation <http://cran.r-project.org/web/packages/mgcv/mgcv.pdf>`_.
2578for more information about how to use this parameter."""),
2579    arcGISDisplayName=_(u'Theta parameter for negbin family'),
2580    arcGISCategory=_(u'Model options'))
2581
2582AddArgumentMetadata(GAM.FitToArcGISTable, u'method',
2583    typeMetadata=UnicodeStringTypeMetadata(canBeNone=True),
2584    description=_(
2585u"""Smoothing parameter estimation method to use, when fitting the GAM
2586with the R mgcv package (this parameter is ignored when using the R
2587gam package).
2588
2589The methods available depend on the version of mgcv you have
2590installed. In mgcv version 1.7-5, the following methods were
2591available:
2592
2593* GCV.Cp - GCV for Poisson and binomial models and Mallows'
2594  Cp/UBRE/AIC for others. (This is based on the fact that this tool
2595  does not provide a scale parameter when it calls mgcv. The default
2596  value of scale is 0, indicating that the scale should be 1 for
2597  Poisson and binomial models and unknown otherwise.)
2598
2599* GACV.Cp - equivalent to GCV.Cp, but uses GACV in place of GCV.
2600
2601* REML - REML estimation.
2602
2603* P-REML - REML estimation, but using a Pearson estimate of the scale.
2604
2605* ML - maximum likelihood estimation.
2606
2607* P-ML - maximum likelihood estimation, but using a Pearson estimate
2608  of the scale.
2609
2610The default is GCV.Cp."""),
2611    arcGISDisplayName=_(u'Smoothing parameter estimation method'),
2612    arcGISCategory=_(u'Model options'))
2613
2614AddArgumentMetadata(GAM.FitToArcGISTable, u'optimizer',
2615    typeMetadata=UnicodeStringTypeMetadata(canBeNone=True),
2616    description=_(
2617u"""Numerical optimization method to use to optimize the smoothing
2618parameter estimation criterion, when fitting the GAM with the R mgcv
2619package (this parameter is ignored when using the R gam package).
2620
2621The optimizers available depend on the version of mgcv you have
2622installed. In mgcv version 1.7-5, the following optimizers were
2623available:
2624
2625* perf - performance iteration.
2626
2627* outer - a more stable direct approach. This is the default. If this
2628  method is selected, you may also specify an alternative optimizer.
2629
2630Please consult the mgcv documentation and authors for more information
2631about these optimizers."""),
2632    arcGISDisplayName=_(u'Optimizer'),
2633    arcGISCategory=_(u'Model options'))
2634
2635AddArgumentMetadata(GAM.FitToArcGISTable, u'alternativeOptimizer',
2636    typeMetadata=UnicodeStringTypeMetadata(canBeNone=True),
2637    description=_(
2638u"""Alternative mgcv optimizer to use when the "outer" optimizer is
2639selected for the previous parameter.
2640
2641The optimizers available depend on the version of mgcv you have
2642installed. In mgcv version 1.7-5, the following optimizers were
2643available:
2644
2645* newton - the default.
2646
2647* bfgs
2648
2649* optim
2650
2651* nlm
2652
2653* nlm.fd - the mgcv documentation says this method "is based entirely
2654  on finite differenced derivatives and is very slow".
2655
2656Please consult the mgcv documentation and authors for more information
2657about these optimizers."""),
2658    arcGISDisplayName=_(u'Alternative optimizer'),
2659    arcGISCategory=_(u'Model options'))
2660
2661CopyArgumentMetadata(GLM.FitToArcGISTable, u'xColumnName', GAM.FitToArcGISTable, u'xColumnName')
2662CopyArgumentMetadata(GLM.FitToArcGISTable, u'yColumnName', GAM.FitToArcGISTable, u'yColumnName')
2663CopyArgumentMetadata(GLM.FitToArcGISTable, u'zColumnName', GAM.FitToArcGISTable, u'zColumnName')
2664CopyArgumentMetadata(GLM.FitToArcGISTable, u'mColumnName', GAM.FitToArcGISTable, u'mColumnName')
2665
2666AddArgumentMetadata(GAM.FitToArcGISTable, u'selectionMethod',
2667    typeMetadata=UnicodeStringTypeMetadata(canBeNone=True, allowedValues=[u'Stepwise backward'], makeLowercase=True),
2668    description=_(
2669u"""Automated model selection method to execute after fitting the
2670model using the original formula you specified. If automated model
2671selection is performed, the output model file will contain the final
2672model that was selected, and the output summary and plots will be for
2673this model.
2674
2675The selection methods currently available are:
2676
2677* Stepwise backward - backward stepwise model selection by AIC. This
2678  method is only available if the gam was fitted using the R gam
2679  package. The basic idea of this method is to keep dropping model
2680  terms from the original formula so long as a better model fit can be
2681  achieved. Model selection occurs in a loop. First, the Akaike
2682  Information Criterion (AIC) is computed for the current model. Then,
2683  N candidate model are generated, where N equals the number of terms
2684  in the current model. Each candidate model drops a different single
2685  term from the model. The AIC is computed for each candidate model,
2686  and the candidate model that provides the greatest reduction in AIC
2687  becomes the new current model. The loop proceeds until the AIC can
2688  no longer be reduced by dropping terms.
2689
2690Additional methods may be implemented in a future release of this
2691tool.
2692
2693Automated model selection will increase the run time and possibly the
2694memory utilization of this tool. The amount of increase depends on the
2695complexity of your model and the amount of data in your table.
2696
2697For an alternative to automated model selection, you can fit the GAM
2698using the R mgcv package and use splines with "shrinkage" by setting
2699the bs parameter of the s function to "ts" for a thin plate regression
2700spline with shrinkage or "cs" for a cubic regression spline with
2701shrinkage. For example::
2702
2703    Presence ~ s(SST,bs="ts") + s(Chl,bs="ts") + s(Depth,bs="ts")
2704
2705After the model is fitted, if the model summary shows that a term has
2706an effective degrees of freedom (edf) close to zero, the term has been
2707"zeroed out" and the plot of the term will show a horizontal line at
2708the x axis. This indicates that the term is a poor predictor of the
2709response variable and should be removed from the model.
2710
2711Model selection can be a difficult task. You should never blindly rely
2712on an automated selection method that you do not fully understand. For
2713the best results, always consult a statistician."""),
2714    arcGISDisplayName=_(u'Automated model selection method'),
2715    arcGISCategory=_(u'Automated model selection options'))
2716
2717CopyArgumentMetadata(GLM.FitToArcGISTable, u'logSelectionDetails', GAM.FitToArcGISTable, u'logSelectionDetails')
2718CopyArgumentMetadata(GLM.FitToArcGISTable, u'writeSummaryFile', GAM.FitToArcGISTable, u'writeSummaryFile')
2719
2720AddArgumentMetadata(GAM.FitToArcGISTable, u'writeDiagnosticPlots',
2721    typeMetadata=BooleanTypeMetadata(),
2722    description=_(
2723u"""If True and the R mgcv package was used to fit the GAM, this tool
2724will write four diagnostic plots for the fitted model. If False or the
2725R gam package was used to fit the GAM, no plots will be written.
2726
2727The four plots are written to a single PNG file named X_diag.png,
2728where X is the name of the output model file, minus any extension. The
2729plots include a normal Q-Q plot, a plot of residuals vs. linear
2730predictors, a histogram of residuals, and a plot of the response vs.
2731the fitted values. For more information about these plots, please see
2732the documentation for the gam.check function in the
2733`mgcv package documentation <http://cran.r-project.org/web/packages/mgcv/mgcv.pdf>`_."""),
2734    arcGISDisplayName=_(u'Write diagnostic plots'),
2735    arcGISCategory=_(u'Additional output options'))
2736
2737CopyArgumentMetadata(GLM.FitToArcGISTable, u'writeTermPlots', GAM.FitToArcGISTable, u'writeTermPlots')
2738
2739AddArgumentMetadata(GAM.FitToArcGISTable, u'residuals',
2740    typeMetadata=BooleanTypeMetadata(),
2741    description=_(
2742u"""If True, residuals will be plotted in model term plots of
2743continuous terms (i.e. terms that are not factors).
2744
2745Regardless of what R package was used to fit the GAM, they are
2746calculated using the method employed by the mgcv package. That is,
2747they are "the working residuals from the IRLS iteration weighted by
2748the IRLS weights. Partial residuals for a smooth term [i.e. a term
2749fitted using the s, lo, or te function] are the residuals that would
2750be obtained by dropping the term concerned from the model, while
2751leaving all other estimates fixed (i.e. the estimates for the term
2752plus the residuals)." (The same method is used for linear terms, as
2753well.)"""),
2754    arcGISDisplayName=_(u'Plot residuals'),
2755    arcGISCategory=_(u'Additional output options'))
2756
2757CopyArgumentMetadata(GLM.FitToArcGISTable, u'xAxis', GAM.FitToArcGISTable, u'xAxis')
2758CopyArgumentMetadata(GLM.FitToArcGISTable, u'commonScale', GAM.FitToArcGISTable, u'commonScale')
2759CopyArgumentMetadata(GLM.FitToArcGISTable, u'plotFileFormat', GAM.FitToArcGISTable, u'plotFileFormat')
2760CopyArgumentMetadata(GLM.FitToArcGISTable, u'res', GAM.FitToArcGISTable, u'res')
2761CopyArgumentMetadata(GLM.FitToArcGISTable, u'width', GAM.FitToArcGISTable, u'width')
2762CopyArgumentMetadata(GLM.FitToArcGISTable, u'height', GAM.FitToArcGISTable, u'height')
2763CopyArgumentMetadata(GLM.FitToArcGISTable, u'pointSize', GAM.FitToArcGISTable, u'pointSize')
2764CopyArgumentMetadata(GLM.FitToArcGISTable, u'bg', GAM.FitToArcGISTable, u'bg')
2765CopyArgumentMetadata(GLM.FitToArcGISTable, u'overwriteExisting', GAM.FitToArcGISTable, u'overwriteExisting')
2766
2767# Public method: GAM.PredictFromArcGISRasters
2768
2769AddMethodMetadata(GAM.PredictFromArcGISRasters,
2770    shortDescription=_(u'Using a fitted generalized additive model (GAM), this tool creates a raster representing the response variable predicted from ArcGIS rasters representing the predictor variables.'),
2771    isExposedToPythonCallers=True,
2772    isExposedByCOM=True,
2773    isExposedAsArcGISTool=True,
2774    arcGISDisplayName=_(u'Predict GAM From Rasters'),
2775    arcGISToolCategory=_(u'Statistics\\Model Data\\Generalized Additive Models'),
2776    dependencies=[ArcGISDependency(9, 2), RDependency(2, 6, 0), RPackageDependency(u'rgdal')])
2777
2778CopyArgumentMetadata(GAM.FitToArcGISTable, u'cls', GAM.PredictFromArcGISRasters, u'cls')
2779
2780AddArgumentMetadata(GAM.PredictFromArcGISRasters, u'inputModelFile',
2781    typeMetadata=FileTypeMetadata(mustExist=True),
2782    description=_(
2783u"""File that contains the fitted model, generated by the Fit GAM
2784tool."""),
2785    arcGISDisplayName=_(u'Input model file'))
2786
2787CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'inputPredictorRasters', GAM.PredictFromArcGISRasters, u'inputPredictorRasters')
2788CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'variableNames', GAM.PredictFromArcGISRasters, u'variableNames')
2789CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'outputResponseRaster', GAM.PredictFromArcGISRasters, u'outputResponseRaster')
2790CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'templateRaster', GAM.PredictFromArcGISRasters, u'templateRaster')
2791CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'resamplingTechniques', GAM.PredictFromArcGISRasters, u'resamplingTechniques')
2792CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'ignoreOutOfRangeValues', GAM.PredictFromArcGISRasters, u'ignoreOutOfRangeValues')
2793CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'outputErrorRaster', GAM.PredictFromArcGISRasters, u'outputErrorRaster')
2794CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'outputBinaryResponseRaster', GAM.PredictFromArcGISRasters, u'outputBinaryResponseRaster')
2795CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'cutoff', GAM.PredictFromArcGISRasters, u'cutoff')
2796CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'buildPyramids', GAM.PredictFromArcGISRasters, u'buildPyramids')
2797CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'overwriteExisting', GAM.PredictFromArcGISRasters, u'overwriteExisting')
2798
2799# Public method: GAM.BayesPredictFromArcGISRasters
2800
2801AddMethodMetadata(GAM.BayesPredictFromArcGISRasters,
2802    shortDescription=_(u'Using a binomial generalized additive model (GAM) fitted using the R mgcv package, this tool creates rasters representing the estimated probabilities that the response variable will equal or exceed specified thresholds, given ArcGIS rasters representing the predictor variables.'),
2803    isExposedToPythonCallers=True,
2804    isExposedByCOM=True,
2805    isExposedAsArcGISTool=True,
2806    arcGISDisplayName=_(u'Predict Bayesian Probabilities for GAM From Rasters'),
2807    arcGISToolCategory=_(u'Statistics\\Model Data\\Generalized Additive Models'),
2808    dependencies=[ArcGISDependency(9, 2), RDependency(2, 6, 0), RPackageDependency(u'mgcv'), RPackageDependency(u'MASS'), RPackageDependency(u'rgdal')])
2809
2810CopyArgumentMetadata(GAM.FitToArcGISTable, u'cls', GAM.BayesPredictFromArcGISRasters, u'cls')
2811
2812AddArgumentMetadata(GAM.BayesPredictFromArcGISRasters, u'inputModelFile',
2813    typeMetadata=FileTypeMetadata(mustExist=True),
2814    description=_(
2815u"""File that contains the fitted model, generated by the Fit GAM
2816tool. The model must be of the binomial family, use a logit link
2817function, and have been fitted using R mgcv package."""),
2818    arcGISDisplayName=_(u'Input model file'))
2819
2820CopyArgumentMetadata(GAM.PredictFromArcGISRasters, u'inputPredictorRasters', GAM.BayesPredictFromArcGISRasters, u'inputPredictorRasters')
2821CopyArgumentMetadata(GAM.PredictFromArcGISRasters, u'variableNames', GAM.BayesPredictFromArcGISRasters, u'variableNames')
2822
2823AddArgumentMetadata(GAM.BayesPredictFromArcGISRasters, u'thresholds',
2824    typeMetadata=ListTypeMetadata(elementType=FloatTypeMetadata(), minLength=1),
2825    description=_(
2826u"""List of thresholds for which probabilities should be estimated.
2827You must specify one output probability raster for each threshold. See
2828the documentation for that parameter for more information."""),
2829    arcGISDisplayName=_(u'Thresholds'))
2830
2831AddArgumentMetadata(GAM.BayesPredictFromArcGISRasters, u'outputProbabilityRasters',
2832    typeMetadata=ListTypeMetadata(elementType=ArcGISRasterTypeMetadata(mustBeDifferentThanArguments=[u'inputModelFile'], deleteIfParameterIsTrue=u'overwriteExisting', createParentDirectories=True), minLength=1, mustBeSameLengthAsArgument=u'thresholds'),
2833    description=_(
2834u"""List of output rasters representing the estimated probabilities
2835that the response variable will meet or exceed the thresholds you
2836specified, given the predictor rasters you specified. The
2837probabilities will range from 0 to 1.
2838
2839For example, if you specify the threshold 0.2, the output probability
2840raster represents the estimated probability that the response variable
2841will be in the range 0.2 - 1.0. If a given pixel of the output raster
2842is close to 0, it indicates high likelihood that the response will be
2843less than 0.2 at that location, while a value close to 1.0 indicates
2844high likelihood that the response will be greater than 0.2.
2845
2846The probability is estimated by comparing the predictor raster values
2847to samples drawn from the posterior distribution of these predictors
2848in the model. Please contact the author of this tool for more
2849information. (I will provide a better explanation once I have obtained
2850it from the statistician who wrote the algorithm.)
2851
2852If any pixel is NoData in any predictor raster (after it has been
2853projected and clipped as needed), the pixel will be NoData in the
2854output rasters as well. This is because the response variable cannot
2855be predicted by the model if any of the predictor variables are
2856missing."""),
2857    direction=u'Output',
2858    arcGISDisplayName=_(u'Output probability rasters'))
2859
2860CopyArgumentMetadata(GAM.PredictFromArcGISRasters, u'templateRaster', GAM.BayesPredictFromArcGISRasters, u'templateRaster')
2861CopyArgumentMetadata(GAM.PredictFromArcGISRasters, u'resamplingTechniques', GAM.BayesPredictFromArcGISRasters, u'resamplingTechniques')
2862CopyArgumentMetadata(GAM.PredictFromArcGISRasters, u'ignoreOutOfRangeValues', GAM.BayesPredictFromArcGISRasters, u'ignoreOutOfRangeValues')
2863
2864AddArgumentMetadata(GAM.BayesPredictFromArcGISRasters, u'samples',
2865    typeMetadata=IntegerTypeMetadata(minValue=1),
2866    description=_(
2867u"""Number of samples to draw from the posterior distribution when
2868estimating the probability that a given prediction meets or exceeds
2869the thresholds you specified.
2870
2871Large numbers increase statistical accuracy but require more memoryand
2872processing time than small numbers. The memory increase is linear. I
2873am not sure about the processing time.
2874
2875For quick, inaccurate results, use 100 samples. For publishable
2876results, use 1000 or 10,000. If the tool fails due to insufficient
2877memory, increase the value of the Split Prediction Into Chunk
2878parameter. For example, if you increase the number of samples by 10x,
2879you should also increase the number of chunks by 10x."""),
2880    arcGISDisplayName=_(u'Number of samples from the posterior distribution'),
2881    arcGISCategory=_(u'Prediction options'))
2882
2883AddArgumentMetadata(GAM.BayesPredictFromArcGISRasters, u'chunks',
2884    typeMetadata=IntegerTypeMetadata(canBeNone=True, minValue=1),
2885    description=_(
2886u"""Number of chunks to split the prediction into.
2887
2888Increase this parameter if you find that this tool fails because there
2889is insufficient memory to perform your prediction. That failure occurs
2890because the underlying R function that performs the prediction assumes
2891that infinite memory is available and does not attempt to control its
2892utilization when performing large predictions. When a value is
2893provided for this parameter, this tool splits up the prediction job
2894into the specified number of same-size chunks and invokes the R
2895function on each chunk. This requires some additional processing but
2896cuts the memory required for the prediction to that needed for just
2897one of the chunks.
2898
2899If you need to use this parameter we recommend that you start with a
2900value of 100. If that does not work, try 1000, 10000, and so on."""),
2901    arcGISDisplayName=_(u'Split prediction into chunks'),
2902    arcGISCategory=_(u'Prediction options'))
2903
2904CopyArgumentMetadata(GAM.PredictFromArcGISRasters, u'buildPyramids', GAM.BayesPredictFromArcGISRasters, u'buildPyramids')
2905CopyArgumentMetadata(GAM.PredictFromArcGISRasters, u'overwriteExisting', GAM.BayesPredictFromArcGISRasters, u'overwriteExisting')
2906
2907###############################################################################
2908# Metadata: TreeModel class
2909###############################################################################
2910
2911AddClassMetadata(TreeModel,
2912    shortDescription=_(u'Provides methods for modeling and prediction using tree models (a.k.a. CARTs).'),
2913    isExposedAsCOMServer=True,
2914    comIID=u'{737951E5-51F7-4D54-85E1-ED955485C9CD}',
2915    comCLSID=u'{F6F9A24E-C466-4C15-BF74-D26547854EA0}')
2916
2917# Public method: TreeModel.FitToArcGISTable
2918
2919AddMethodMetadata(TreeModel.FitToArcGISTable,
2920    shortDescription=_(u'Fits a tree model to data in an ArcGIS table.'),
2921    longDescription=_(
2922u"""Tree models were first introduced by Breiman et al. (1984) in the
2923classic Classification and Regression Tree (CART) software and are
2924frequently referenced by that name. Since that time, the original
2925methods have been reimplemented in R and many other statistical
2926programs. This tool fits tree models using the R rpart package by
2927Terry M. Therneau and Elizabeth J. Atkinson, and plots them using the
2928R rpart.plot package by Stephen Milborrow.
2929
2930**References**
2931
2932Breiman, L., Friedman, J.H., Olshen, R.A., Stone, C.J. (1984). Classification and regression trees. Chapman & Hall/CRC.
2933
2934`An Introduction to Recursive Partitioning Using the RPART Routines <http://www.mayo.edu/hsr/techrpt/61.pdf>`_
2935
2936`R rpart package documentation <http://cran.r-project.org/web/packages/rpart/rpart.pdf>`_"""),
2937    isExposedToPythonCallers=True,
2938    isExposedByCOM=True,
2939    isExposedAsArcGISTool=True,
2940    arcGISDisplayName=_(u'Fit Tree Model'),
2941    arcGISToolCategory=_(u'Statistics\\Model Data\\Tree Models'),
2942    dependencies=[ArcGISDependency(9, 1), RDependency(2, 12, 0), RPackageDependency(u'rpart', u'3.1-48'), RPackageDependency(u'rpart.plot', u'1.0-0')])
2943
2944AddArgumentMetadata(TreeModel.FitToArcGISTable, u'cls',
2945    typeMetadata=ClassOrClassInstanceTypeMetadata(cls=TreeModel),
2946    description=_(u'%s class or an instance of it.') % TreeModel.__name__)
2947
2948CopyArgumentMetadata(GLM.FitToArcGISTable, u'inputTable', TreeModel.FitToArcGISTable, u'inputTable')
2949CopyArgumentMetadata(GLM.FitToArcGISTable, u'outputModelFile', TreeModel.FitToArcGISTable, u'outputModelFile')
2950CopyArgumentMetadata(GLM.FitToArcGISTable, u'formula', TreeModel.FitToArcGISTable, u'formula')
2951
2952AddArgumentMetadata(TreeModel.FitToArcGISTable, u'method',
2953    typeMetadata=UnicodeStringTypeMetadata(allowedValues=[u'ANOVA', u'Class', u'Exp', u'Poisson'], makeLowercase=True),
2954    description=_(
2955u"""Method to use for splitting the tree, one of:
2956
2957* ANOVA - Use this method to build a regression tree, i.e. when you
2958  are modeling a continuous response variable, such as the abundance
2959  of a species. With this method, the splits will be chosen to
2960  maximize the between-groups sum-of-squares in a simple analysis of
2961  variance.
2962
2963* Class - Use this method to build a classification tree, i.e. when
2964  you are modeling a categorical response variable, such as the
2965  presence or absence of a species. When this method is selected, the
2966  response variable is assumed to be categorical and the R factor
2967  function is automatically applied to it.
2968
2969* Exp - Use this method to build a regression tree using exponential
2970  scaling. For more information about this method, please see the
2971  references below.
2972
2973* Poisson - Use this method to build a regression tree using Poisson
2974  regression, which is appropriate for event rate data. For more
2975  information about this method, please see the references below.
2976
2977**References**
2978
2979`An Introduction to Recursive Partitioning Using the RPART Routines <http://www.mayo.edu/hsr/techrpt/61.pdf>`_
2980
2981`R rpart package documentation <http://cran.r-project.org/web/packages/rpart/rpart.pdf>`_"""),
2982    arcGISDisplayName=_(u'Splitting method'))
2983
2984AddArgumentMetadata(TreeModel.FitToArcGISTable, u'where',
2985    typeMetadata=SQLWhereClauseTypeMetadata(canBeNone=True),
2986    description=ArcGIS91SelectCursor.__init__.__doc__.Obj.GetArgumentByName(u'where').Description,
2987    arcGISParameterDependencies=[u'inputTable'],
2988    arcGISDisplayName=_(u'Where clause'),
2989    arcGISCategory=_(u'Tree growing options'))
2990
2991AddArgumentMetadata(TreeModel.FitToArcGISTable, u'allowMissingCovariates',
2992    typeMetadata=BooleanTypeMetadata(),
2993    description=_(
2994u"""If this option is enabled (the default), records will be included
2995in the model fitting process so long as they have a value for the
2996response variable and at least one predictor variable. If this option
2997is disabled, records must have values for the response variable and
2998all predictor variables in order to be included.
2999
3000The R rpart package that is used to fit the model has the novel
3001capability of allowing records that are missing some data to still
3002participate in the model fitting process. For more information about
3003how this works, please see
3004`An Introduction to Recursive Partitioning Using the RPART Routines <http://www.mayo.edu/hsr/techrpt/61.pdf>`_."""),
3005    arcGISDisplayName=_(u'Include records that are missing covariates'),
3006    arcGISCategory=_(u'Tree growing options'))
3007
3008AddArgumentMetadata(TreeModel.FitToArcGISTable, u'minSplit',
3009    typeMetadata=IntegerTypeMetadata(minValue=2),
3010    description=_(
3011u"""The minimum number of observations that must exist in a node of
3012the tree in order for a split of that node to be attempted. The
3013default value, 20, was taken from the R rpart package that is used to
3014fit the model."""),
3015    arcGISDisplayName=_(u'Minimum number of observations to attempt a split'),
3016    arcGISCategory=_(u'Tree growing options'))
3017
3018AddArgumentMetadata(TreeModel.FitToArcGISTable, u'minBucket',
3019    typeMetadata=IntegerTypeMetadata(minValue=1),
3020    description=_(
3021u"""The minimum number of observations that may be in any leaf node of
3022the tree. The default value, 7, was taken from the R rpart package
3023that is used to fit the model. By default, rpart recommends that this
3024parameter be set to one third of the previous parameter."""),
3025    arcGISDisplayName=_(u'Minimum number of observations in a leaf node'),
3026    arcGISCategory=_(u'Tree growing options'))
3027
3028AddArgumentMetadata(TreeModel.FitToArcGISTable, u'cp',
3029    typeMetadata=FloatTypeMetadata(minValue=0.),
3030    description=_(
3031u"""Any split that does not decrease the overall lack of fit by a
3032factor of this parameter will not be attempted. For instance, with
3033ANOVA splitting, this means that the overall Rsquare must increase by
3034this parameter at each step. The main role of this parameter is to
3035save computing time by pruning off splits that are obviously not
3036worthwhile. Essentially, you inform the tool that any split which does
3037not improve the fit by this parameter will likely be pruned off by
3038cross-validation, and that hence the tool need not pursue it.
3039
3040The default value, 0.01, was taken from the R rpart package that is
3041used to fit the model."""),
3042    arcGISDisplayName=_(u'Complexity parameter'),
3043    arcGISCategory=_(u'Tree growing options'))
3044
3045AddArgumentMetadata(TreeModel.FitToArcGISTable, u'maxCompete',
3046    typeMetadata=IntegerTypeMetadata(minValue=0),
3047    description=_(
3048u"""The number of competitor splits to retain in the output. It is
3049useful to know not just which split was chosen, but which variable
3050came in second, third, etc. The default value, 4, was taken from the R
3051rpart package that is used to fit the model."""),
3052    arcGISDisplayName=_(u'Number of competitor splits to retain'),
3053    arcGISCategory=_(u'Tree growing options'))
3054
3055AddArgumentMetadata(TreeModel.FitToArcGISTable, u'maxSurrogate',
3056    typeMetadata=IntegerTypeMetadata(minValue=0),
3057    description=_(
3058u"""The number of surrogate splits to retain in the output. If this is
3059set to zero the compute time will be shortened, since approximately
3060half of the computational time (other than setup) is used in the
3061search for surrogate splits. The default value, 5, was taken from the
3062R rpart package that is used to fit the model."""),
3063    arcGISDisplayName=_(u'Number of surrogate splits to retain'),
3064    arcGISCategory=_(u'Tree growing options'))
3065
3066AddArgumentMetadata(TreeModel.FitToArcGISTable, u'useSurrogate',
3067    typeMetadata=IntegerTypeMetadata(allowedValues=[0, 1, 2]),
3068    description=_(
3069u"""The method for using surrogates in the splitting process, one of:
3070
3071* 0 - display only; an observation with a missing value for the
3072  primary split rule is not sent further down the tree.
3073
3074* 1 - use surrogates, in order, to split subjects missing the primary
3075  variable; if all surrogates are missing the observation is not
3076  split.
3077
3078* 2 - if all surrogates are missing, then send the observation in the
3079  majority direction. This is the recommendations of Breiman, et al.
3080
3081The default value, 2, was taken from the R rpart package that is used
3082to fit the model."""),
3083    arcGISDisplayName=_(u'Surrogate usage method'),
3084    arcGISCategory=_(u'Tree growing options'))
3085
3086AddArgumentMetadata(TreeModel.FitToArcGISTable, u'surrogateStyle',
3087    typeMetadata=IntegerTypeMetadata(allowedValues=[0, 1]),
3088    description=_(
3089u"""The method used to select the best surrogate, one of:
3090
3091* 0 - the tool uses the total number of correct classifications for a
3092  potential surrogate variable.
3093
3094* 1 - the tool uses the percent correct, calculated over the
3095  non-missing values of the surrogate.
3096
3097The default value, 0, was taken from the R rpart package that is used
3098to fit the model. This value more severely penalizes covariates with a
3099large number of missing values."""),
3100    arcGISDisplayName=_(u'Surrogate selection method'),
3101    arcGISCategory=_(u'Tree growing options'))
3102
3103AddArgumentMetadata(TreeModel.FitToArcGISTable, u'xval',
3104    typeMetadata=IntegerTypeMetadata(minValue=1),
3105    description=_(
3106u"""The number of cross-validations to perform.
3107
3108The R rpart package that is used to fit the model uses a default of
310910, but we have found that so few iterations can cause the calculated
3110cross-validation error to differ substantially over several runs of
3111the tool using identical input data and parameter values. In one case,
3112we observed the cross-validation errors to vary by over 10%. Because
3113the cross-validation errors are often used to prune the tree, we
3114believe it is important to have accurate estimates of them, so we
3115increased the default to 1000. This will cause complicated models to
3116run substantially slower. If you find your model is too slow, decrease
3117the value."""),
3118    arcGISDisplayName=_(u'Number of cross-validations'),
3119    arcGISCategory=_(u'Tree growing options'))
3120
3121AddArgumentMetadata(TreeModel.FitToArcGISTable, u'maxDepth',
3122    typeMetadata=IntegerTypeMetadata(minValue=0, maxValue=30),
3123    description=_(
3124u"""The maximum depth of any node of the final tree, with the root
3125node counted as depth 0. The default value, 30, was taken from the R
3126rpart package that is used to fit the model."""),
3127    arcGISDisplayName=_(u'Maximum tree depth'),
3128    arcGISCategory=_(u'Tree growing options'))
3129
3130AddArgumentMetadata(TreeModel.FitToArcGISTable, u'pruningMethod',
3131    typeMetadata=UnicodeStringTypeMetadata(canBeNone=True, allowedValues=[u'Minimum error', u'1-SE rule', u'Interactive', u'User specified CP'], makeLowercase=True),
3132    description=_(
3133u"""Method to use for pruning the tree, one of:
3134
3135* Minimum error - The tool will prune the tree using the complexity
3136  parameter associated with the minimum cross-validation error.
3137
3138* 1-SE rule - The tool will prune the tree using the complexity
3139  parameter obtained using the 1-SE rule described by Therneau and
3140  Atkinson in chapter 4 of
3141  `An Introduction to Recursive Partitioning Using the RPART Routines <http://www.mayo.edu/hsr/techrpt/61.pdf>`_.
3142  A plot of the mean cross-validation errors versus candidate values
3143  of the complexity parameter often has an initial sharp drop followed
3144  by a relatively flat plateau and then a slow rise. (This tool
3145  produces that plot as an optional diagnostic output.) According to
3146  the 1-SE rule, any cross-validation error within one standard error
3147  of the minimum cross-validation error is considered equivalent to
3148  the minimum (i.e. considered to be part of the flat plateau). The
3149  1-SE rule chooses the largest complexity parameter that yields a
3150  cross-validation error equivalent ot the minimum. This results in an
3151  optimal tree, i.e. the tree with the fewest number of splits that
3152  yields a cross-validation error equivalent to the minimum.
3153
3154* Interactive - The tool will display the unpruned tree in a window,
3155  allowing you to prune it interactively with the mouse. If you click
3156  on a split it will be marked as deleted. If you click on an
3157  already-deleted split it will be undeleted (if its parent is not
3158  deleted). Information about the node is printed as you click. When
3159  you have finished pruning, click on the QUIT button.
3160
3161* User specified CP - The tool will prune the tree using the
3162  complexity parameter you specify below.
3163
3164If this parameter is omitted, the tree will not be pruned."""),
3165    arcGISDisplayName=_(u'Pruning method'),
3166    arcGISCategory=_(u'Tree pruning options'))
3167
3168AddArgumentMetadata(TreeModel.FitToArcGISTable, u'pruningCP',
3169    typeMetadata=FloatTypeMetadata(canBeNone=True, mustBeGreaterThan=0.),
3170    description=_(
3171u"""Complexity parameter for pruning the tree. This parameter is only
3172used when the Pruning Method is set to 'User specified'."""),
3173    arcGISDisplayName=_(u'Complexity parameter for pruning'),
3174    arcGISCategory=_(u'Tree pruning options'))
3175
3176CopyArgumentMetadata(GLM.FitToArcGISTable, u'xColumnName', TreeModel.FitToArcGISTable, u'xColumnName')
3177CopyArgumentMetadata(GLM.FitToArcGISTable, u'yColumnName', TreeModel.FitToArcGISTable, u'yColumnName')
3178CopyArgumentMetadata(GLM.FitToArcGISTable, u'zColumnName', TreeModel.FitToArcGISTable, u'zColumnName')
3179CopyArgumentMetadata(GLM.FitToArcGISTable, u'mColumnName', TreeModel.FitToArcGISTable, u'mColumnName')
3180
3181AddArgumentMetadata(TreeModel.FitToArcGISTable, u'writeSummaryFile',
3182    typeMetadata=BooleanTypeMetadata(),
3183    description=_(
3184u"""If True, this tool will write summary information about the fitted
3185model to a text file. (This is the same information that the tool
3186outputs as log messages.) The file will have the name X_summary.txt,
3187where X is the name of the output model file, minus any
3188extension."""),
3189    arcGISDisplayName=_(u'Write model summary file'),
3190    arcGISCategory=_(u'Additional output options'))
3191
3192AddArgumentMetadata(TreeModel.FitToArcGISTable, u'writeDiagnosticPlots',
3193    typeMetadata=BooleanTypeMetadata(),
3194    description=_(
3195u"""If True, this tool will write diagnostic plots:
3196
3197* X_cp.Y - visual representation of the cross-validation results for
3198  the unpruned tree, to assist you with choosing a Complexity
3199  Parameter for pruning the tree. The x-axis represents possible
3200  choices for the Complexity Parameter and the y-axis represents the
3201  means and standard deviations of the errors in the cross-validated
3202  prediction that would result. The dashed horizontal line is drawn 1
3203  standard error above the minimum of the curve. A good choice of the
3204  Complexity Parameter for pruning is the leftmost value for which the
3205  mean error lies below the line. This value will be chosen
3206  automatically if the Pruning Method parameter is set to '1-SE rule'.
3207
3208* X_rsquare.Y - two-panel plot only produced for the ANOVA splitting
3209  method. The first panel shows the r-square (both apparent and
3210  apparent from cross-validation) versus the number of splits. The
3211  second panel shows the mean error in the cross-validated prediction
3212  versus the number of splits (this is essentially the same plot as
3213  the X_cp.Y plot described above). Both panels are produced for the
3214  unpruned tree.
3215
3216* X_residuals.Y - plot of the residuals vs. the fitted values for the
3217  unpruned tree.
3218
3219* X_pruned_residuals.Y - plot of the residuals vs. the fitted values
3220  for the pruned tree. This plot will only be produced if the tree is
3221  pruned.
3222
3223In the file names above, X is the name of the output model file, minus
3224any extension, and Y is the extension of the selected output plot
3225format."""),
3226    arcGISDisplayName=_(u'Write diagnostic plots'),
3227    arcGISCategory=_(u'Additional output options'))
3228
3229AddArgumentMetadata(TreeModel.FitToArcGISTable, u'writeTreePlot',
3230    typeMetadata=BooleanTypeMetadata(),
3231    description=_(
3232u"""If True, this tool will write a plot of the unpruned tree to a
3233file having the name X_unpruned_tree.Y, where X is the name of the
3234output model file minus the extension and Y is the extension of the
3235selected output plot format."""),
3236    arcGISDisplayName=_(u'Write tree plot'),
3237    arcGISCategory=_(u'Additional output options'))
3238
3239AddArgumentMetadata(TreeModel.FitToArcGISTable, u'writePrunedTreePlot',
3240    typeMetadata=BooleanTypeMetadata(),
3241    description=_(
3242u"""If True, this tool will write a plot of the pruned tree to a file
3243having the name X_pruned_tree.Y, where X is the name of the output
3244model file minus the extension and Y is the extension of the selected
3245output plot format. This plot will only be produced if the tree is
3246pruned."""),
3247    arcGISDisplayName=_(u'Write pruned tree plot'),
3248    arcGISCategory=_(u'Additional output options'))
3249
3250CopyArgumentMetadata(GLM.FitToArcGISTable, u'plotFileFormat', TreeModel.FitToArcGISTable, u'plotFileFormat')
3251CopyArgumentMetadata(GLM.FitToArcGISTable, u'res', TreeModel.FitToArcGISTable, u'res')
3252CopyArgumentMetadata(GLM.FitToArcGISTable, u'width', TreeModel.FitToArcGISTable, u'width')
3253CopyArgumentMetadata(GLM.FitToArcGISTable, u'height', TreeModel.FitToArcGISTable, u'height')
3254
3255AddArgumentMetadata(TreeModel.FitToArcGISTable, u'pointSize',
3256    typeMetadata=FloatTypeMetadata(minValue=1.0),
3257    description=_(
3258u"""The default pointsize of text in diagnostic plots (the size of the
3259text in tree plots is controlled by a different parameter)."""),
3260    arcGISDisplayName=_(u'Default pointsize of text in diagnostic plots'),
3261    arcGISCategory=_(u'Additional output options'))
3262
3263CopyArgumentMetadata(GLM.FitToArcGISTable, u'bg', TreeModel.FitToArcGISTable, u'bg')
3264
3265AddArgumentMetadata(TreeModel.FitToArcGISTable, u'treePlotType',
3266    typeMetadata=IntegerTypeMetadata(allowedValues=[0, 1, 2, 3, 4]),
3267    description=_(
3268u"""Type of tree plots to create, one of:
3269
3270* 0 - The default. Draw a split label at each split and a node label
3271  at each leaf.
3272
3273* 1 - Label all nodes, not just leaves.
3274
3275* 2 - Like 1 but draw the split labels below the node labels. Similar
3276  to the plots in the CART book.
3277
3278* 3 - Draw separate split labels for the left and right directions.
3279
3280* 4 - Like 3 but label all nodes, not just leaves.
3281"""),
3282    arcGISDisplayName=_(u'Plot type'),
3283    arcGISCategory=_(u'Tree plot options'))
3284
3285AddArgumentMetadata(TreeModel.FitToArcGISTable, u'extra',
3286    typeMetadata=IntegerTypeMetadata(allowedValues=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
3287    description=_(
3288u"""Extra information to display at the nodes, one of:
3289
3290* 0 - No extra information.
3291
3292* 1 - The default. Display the number of observations that fall in the
3293  node (per class for Class models; prefixed by the number of events
3294  for Poisson and Exp models).
3295
3296* 2 - Class models: display the classification rate at the node, expressed as the
3297  number of correct classifications and the number of observations in the node.
3298  Poisson and Exp models: display the number of events.
3299
3300* 3 - Class models only: misclassification rate at the node, expressed
3301  as the number of incorrect classifications and the number of
3302  observations in the node.
3303
3304* 4 - Class models only: probability per class of observations in the
3305  node (conditioned on the node, sum across a node is 1).
3306
3307* 5 - Class models only: like 4 but do not display the fitted class.
3308
3309* 6 - Class models only: the probability of the second class only.
3310  Useful for binary responses.
3311
3312* 7 - Class models only: like 6 but do not display the fitted class.
3313
3314* 8 - Class models only: the probability of the fitted class.
3315
3316* 9 - Class models only: the probabilities times the fraction of
3317  observations in the node (the probability relative to all
3318  observations, sum across all leaves is 1).
3319"""),
3320    arcGISDisplayName=_(u'Extra information'),
3321    arcGISCategory=_(u'Tree plot options'))
3322
3323AddArgumentMetadata(TreeModel.FitToArcGISTable, u'percentage',
3324    typeMetadata=BooleanTypeMetadata(),
3325    description=_(
3326u"""If True, the default, nodes will be labeled with the percentage of
3327observations in the node. The percentage will be displayed below the
3328"extra information" (if any is requested)."""),
3329    arcGISDisplayName=_(u'Display percentage of observations'),
3330    arcGISCategory=_(u'Tree plot options'))
3331
3332AddArgumentMetadata(TreeModel.FitToArcGISTable, u'under',
3333    typeMetadata=BooleanTypeMetadata(),
3334    description=_(
3335u"""If True, the default, extra information and percentage of
3336observations will be displayed below the nodes. If False, they will be
3337displayed within the nodes' boxes.
3338
3339This parameter is ignored if neither extra information nor percentage
3340of observations are requested."""),
3341    arcGISDisplayName=_(u'Display extra text under node boxes'),
3342    arcGISCategory=_(u'Tree plot options'))
3343
3344AddArgumentMetadata(TreeModel.FitToArcGISTable, u'clipRightLabels',
3345    typeMetadata=BooleanTypeMetadata(),
3346    description=_(
3347u"""If True, the default, the right-hand split labels on plots of type
33483 or 4 will not include "variable=". If False, the right-hand labels
3349will include "variable=", just like the left-hand labels.
3350
3351This parameter is ignored the plot type is not 3 or 4."""),
3352    arcGISDisplayName=_(u'Clip right-hand split labels'),
3353    arcGISCategory=_(u'Tree plot options'))
3354
3355AddArgumentMetadata(TreeModel.FitToArcGISTable, u'fallenLeaves',
3356    typeMetadata=BooleanTypeMetadata(),
3357    description=_(
3358u"""If True, all leaf nodes will be displayed at the bottom. If False,
3359the default, leaf nodes will be displayed where they would normally
3360appear."""),
3361    arcGISDisplayName=_(u'Display leaves at bottom'),
3362    arcGISCategory=_(u'Tree plot options'))
3363
3364AddArgumentMetadata(TreeModel.FitToArcGISTable, u'branchType',
3365    typeMetadata=IntegerTypeMetadata(allowedValues=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
3366    description=_(
3367u"""Type of branches to draw. If zero, the default, the tool will draw
3368conventional branches having a constant narrow width. If nonzero, the tool
3369will draw "wide branches", with branch widths proportional to the
3370specified parameter, one of:
3371
3372* 1 - Deviance
3373
3374* 2 - Square root of deviance
3375
3376* 3 - Deviance / number of observations
3377
3378* 4 - Square root of (deviance / number of observations)
3379
3380* 5 - Number of observations
3381
3382* 6 - Complexity parameter
3383
3384* 7 - Absolute value of the predicted value
3385
3386* 8 - Predicted value minus the minimum predicted value
3387
3388* 9 - Constant wide width, for checking the visual distortion that
3389  results when wide branches are drawn at different angles
3390"""),
3391    arcGISDisplayName=_(u'Branch type'),
3392    arcGISCategory=_(u'Tree plot options'))
3393
3394AddArgumentMetadata(TreeModel.FitToArcGISTable, u'branch',
3395    typeMetadata=FloatTypeMetadata(minValue=0., maxValue=1.),
3396    description=_(
3397u"""Controls the shape of the branches from parent to child nodes. Any
3398number from 0 to 1 is allowed. A value of 1 gives square shouldered
3399branches, a value of 0 give V shaped branches, with other values being
3400intermediate.
3401
3402Note that if the Branch Type parameter is nonzero, the Branch Shape
3403parameter will be rounded to 1 or 0 (e.g. a Branch Shape of 0.75 will
3404be rounded to 1)."""),
3405    arcGISDisplayName=_(u'Branch shape'),
3406    arcGISCategory=_(u'Tree plot options'))
3407
3408AddArgumentMetadata(TreeModel.FitToArcGISTable, u'uniform',
3409    typeMetadata=BooleanTypeMetadata(),
3410    description=_(
3411u"""If True, the default, the vertical spacing of the nodes will be
3412uniform. If False, the nodes will be spaced proportionally to the fit
3413(more precisely, to the difference between a node's deviance and the
3414sum of its children's deviances). Small spaces must be expanded to
3415leave room for the labels.
3416
3417Note: if this parameter is False and the Text Magnification Factor is
3418omitted (the default), very small text can sometimes result."""),
3419    arcGISDisplayName=_(u'Use uniform vertical spacing'),
3420    arcGISCategory=_(u'Tree plot options'))
3421
3422AddArgumentMetadata(TreeModel.FitToArcGISTable, u'digits',
3423    typeMetadata=IntegerTypeMetadata(minValue=0),
3424    description=_(
3425u"""Number of significant digits to display in floating-point numbers.
3426
3427Probabilities and percentages are treated specially. Probabilities are
3428displayed with the specified number of digits after the decimal point
3429(by default 2 digits). Percentages are displayed with the specified
3430number of digits minus 2 after the decimal point (by default no
3431digits)."""),
3432    arcGISDisplayName=_(u'Significant digits for labels'),
3433    arcGISCategory=_(u'Tree plot options'))
3434
3435AddArgumentMetadata(TreeModel.FitToArcGISTable, u'varlen',
3436    typeMetadata=IntegerTypeMetadata(),
3437    description=_(
3438u"""Length of variable names in text at the splits (and, for class
3439responses, the class displayed at the node). There are three
3440possibilities:
3441
3442* 0 - The default. Use full names.
3443
3444* >0 - Use an abbreviation algorithm to shorten the names to at
3445  least the specified number, such that they remain unique.
3446
3447* <0 - Truncate names to the shortest length where they are still
3448  unique, but never truncate to shorter than the specified number
3449  (e.g. the value -5 means never truncate to shorter than 5
3450  characters).
3451"""),
3452    arcGISDisplayName=_(u'Length of variable names at splits'),
3453    arcGISCategory=_(u'Tree plot options'))
3454
3455AddArgumentMetadata(TreeModel.FitToArcGISTable, u'faclen',
3456    typeMetadata=IntegerTypeMetadata(),
3457    description=_(
3458u"""Length of factor level names (i.e. categorical variable values) in
3459splits. There are four possibilities:
3460
3461* 0 - The default. Use full names.
3462
3463* 1 - Represent factor levels with alphabetic characters (a for the
3464  first level, b for the second, and so on).
3465
3466* >1 - Use an abbreviation algorithm to shorten the names to at
3467  least the specified number, such that they remain unique.
3468
3469* <0 - Truncate names to the shortest length where they are still
3470  unique, but never truncate to shorter than the specified number
3471  (e.g. the value -5 means never truncate to shorter than 5
3472  characters).
3473"""),
3474    arcGISDisplayName=_(u'Length of factor level names in splits'),
3475    arcGISCategory=_(u'Tree plot options'))
3476
3477AddArgumentMetadata(TreeModel.FitToArcGISTable, u'cex',
3478    typeMetadata=FloatTypeMetadata(canBeNone=True, mustBeGreaterThan=0.),
3479    description=_(
3480u"""A numerical value giving the amount by which text should be
3481magnified relative to the default. If omitted, the default, the text
3482size will be calculated automatically.
3483
3484The default automatic calculation means that this seemingly innocuous
3485argument has a far reaching effect. If necessary it will trigger the
3486node shifting engine to get a decent type size (see the Compress Tree
3487Vertically parameter)."""),
3488    arcGISDisplayName=_(u'Text magnification factor'),
3489    arcGISCategory=_(u'Tree plot options'))
3490
3491AddArgumentMetadata(TreeModel.FitToArcGISTable, u'tweak',
3492    typeMetadata=FloatTypeMetadata(mustBeGreaterThan=0.),
3493    description=_(
3494u"""Adjust the (possibly automatically calculated) Text Magnification
3495Factor. For example, use 1.1 to make the text 10% larger. The default
3496is 1, meaning no adjustment.
3497
3498Note that font sizes are discrete, so the Text Magnification Factor
3499you ask for may not be the one you get. And a small tweak may not
3500actually change the type size or change it more than you want."""),
3501    arcGISDisplayName=_(u'Tweak text magnification'),
3502    arcGISCategory=_(u'Tree plot options'))
3503
3504AddArgumentMetadata(TreeModel.FitToArcGISTable, u'compress',
3505    typeMetadata=BooleanTypeMetadata(),
3506    description=_(
3507u"""If True, the default, the tree will be compressed horizontally by
3508shifting nodes horizontally where space is available."""),
3509    arcGISDisplayName=_(u'Compress tree horizontally'),
3510    arcGISCategory=_(u'Tree plot options'))
3511
3512AddArgumentMetadata(TreeModel.FitToArcGISTable, u'ycompress',
3513    typeMetadata=BooleanTypeMetadata(),
3514    description=_(
3515u"""If True, the default, and the initial automatically calculated
3516Text Magnification Factor is less than 0.7, crowded labels will be
3517shifted vertically where space is available. This often allows
3518considerably larger text.
3519
3520Set this parameter to False if you fell the resulting plot is too
3521messy. The shifting algorithm may work a little better (allowing
3522larger text) for plot types 1, 2, and 3."""),
3523    arcGISDisplayName=_(u'Compress tree vertically'),
3524    arcGISCategory=_(u'Tree plot options'))
3525
3526CopyArgumentMetadata(GLM.FitToArcGISTable, u'overwriteExisting', TreeModel.FitToArcGISTable, u'overwriteExisting')
3527
3528# Public method: TreeModel.PredictFromArcGISRasters
3529
3530AddMethodMetadata(TreeModel.PredictFromArcGISRasters,
3531    shortDescription=_(u'Using a fitted tree model, this tool creates a raster representing the response variable predicted from rasters representing the predictor variables.'),
3532    isExposedToPythonCallers=True,
3533    isExposedByCOM=True,
3534    isExposedAsArcGISTool=True,
3535    arcGISDisplayName=_(u'Predict Tree Model From Rasters'),
3536    arcGISToolCategory=_(u'Statistics\\Model Data\\Tree Models'),
3537    dependencies=[ArcGISDependency(9, 1), RDependency(2, 12, 0), RPackageDependency(u'rpart', u'3.1-48'), RPackageDependency(u'rgdal')])
3538
3539CopyArgumentMetadata(TreeModel.FitToArcGISTable, u'cls', TreeModel.PredictFromArcGISRasters, u'cls')
3540
3541AddArgumentMetadata(TreeModel.PredictFromArcGISRasters, u'inputModelFile',
3542    typeMetadata=FileTypeMetadata(mustExist=True),
3543    description=_(
3544u"""File that contains the fitted model, generated by the Fit Tree
3545Model tool."""),
3546    arcGISDisplayName=_(u'Input model file'))
3547
3548CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'inputPredictorRasters', TreeModel.PredictFromArcGISRasters, u'inputPredictorRasters')
3549CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'variableNames', TreeModel.PredictFromArcGISRasters, u'variableNames')
3550
3551AddArgumentMetadata(TreeModel.PredictFromArcGISRasters, u'outputResponseRaster',
3552    typeMetadata=ArcGISRasterTypeMetadata(mustBeDifferentThanArguments=[u'inputModelFile'], deleteIfParameterIsTrue=u'overwriteExisting', createParentDirectories=True),
3553    description=_(
3554u"""Output raster representing the response predicted by the model
3555from the input predictor rasters."""),
3556    direction=u'Output',
3557    arcGISDisplayName=_(u'Output response raster'))
3558
3559CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'templateRaster', TreeModel.PredictFromArcGISRasters, u'templateRaster')
3560CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'resamplingTechniques', TreeModel.PredictFromArcGISRasters, u'resamplingTechniques')
3561CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'ignoreOutOfRangeValues', TreeModel.PredictFromArcGISRasters, u'ignoreOutOfRangeValues')
3562
3563AddArgumentMetadata(TreeModel.PredictFromArcGISRasters, u'buildPyramids',
3564    typeMetadata=BooleanTypeMetadata(),
3565    description=_(
3566u"""If True, pyramids will be built for the output raster, which will
3567improve its display speed in the ArcGIS user interface."""),
3568    arcGISDisplayName=_(u'Build pyramids'),
3569    arcGISCategory=_(u'Additional output options'))
3570
3571AddArgumentMetadata(TreeModel.PredictFromArcGISRasters, u'overwriteExisting',
3572    typeMetadata=BooleanTypeMetadata(),
3573    description=_(
3574u"""If True, the output raster will be overwritten, if it exists. If
3575False, a ValueError will be raised if the output raster exists."""),
3576    initializeToArcGISGeoprocessorVariable=u'OverwriteOutput')
3577
3578###############################################################################
3579# Metadata: LinearMixedModel class
3580###############################################################################
3581
3582AddClassMetadata(LinearMixedModel,
3583    shortDescription=_(u'Provides methods for fitting and predicting linear mixed-effects models.'),
3584    isExposedAsCOMServer=True,
3585    comIID=u'{4678EDA8-073A-496B-81B6-6DC8F47E4EC3}',
3586    comCLSID=u'{A65B2686-1240-44E8-8764-397EB58BFCD7}')
3587
3588# Public method: LinearMixedModel.FitToArcGISTable
3589
3590AddMethodMetadata(LinearMixedModel.FitToArcGISTable,
3591    shortDescription=_(u'Fits a linear mixed-effects model to data in an ArcGIS table.'),
3592    longDescription=_(
3593u"""This tool fits a linear mixed-effects model using the lme function
3594from the R `nlme package <http://cran.r-project.org/web/packages/nlme/nlme.pdf>`_
3595written by Jose Pinherio and Douglas Bates (2000). Please see the
3596documentation for that function for more information.
3597
3598**References**
3599
3600The R documentation for the nlme package states the following about
3601the lme function: The computational methods follow the general
3602framework of Lindstrom and Bates (1988). The model formulation is
3603described in Laird and Ware (1982). The variance-covariance
3604parametrizations are described in Pinheiro and Bates (1996). The
3605different correlation structures that may be used are described in
3606Box, Jenkins and Reinse (1994), Littel et al (1996), and Venables and
3607Ripley, (1997). The use of variance functions for linear and nonlinear
3608mixed effects models is presented in detail in Davidian and Giltinan
3609(1995).
3610
3611Box, G.E.P., Jenkins, G.M., and Reinsel G.C. (1994) "Time Series
3612Analysis: Forecasting and Control", 3rd Edition, Holden-Day.
3613
3614Davidian, M. and Giltinan, D.M. (1995) "Nonlinear Mixed Effects Models
3615for Repeated Measurement Data", Chapman and Hall.
3616
3617Laird, N.M. and Ware, J.H. (1982) "Random-Effects Models for
3618Longitudinal Data", Biometrics, 38, 963-974.
3619
3620Lindstrom, M.J. and Bates, D.M. (1988) "Newton-Raphson and EM
3621Algorithms for Linear Mixed-Effects Models for Repeated-Measures
3622Data", Journal of the American Statistical Association, 83, 1014-1022.
3623
3624Littel, R.C., Milliken, G.A., Stroup, W.W., and Wolfinger, R.D. (1996)
3625"SAS Systems for Mixed Models", SAS Institute.
3626
3627Pinheiro, J.C. and Bates., D.M. (1996) "Unconstrained Parametrizations
3628for Variance-Covariance Matrices", Statistics and Computing, 6,
3629289-296.
3630
3631Pinheiro, J.C., and Bates, D.M. (2000) "Mixed-Effects Models in S and
3632S-PLUS", Springer.
3633
3634Venables, W.N. and Ripley, B.D. (2002) "Modern Applied Statistics with
3635S", 4th Edition, Springer-Verlag.
3636
3637`R nlme package documentation <http://cran.r-project.org/web/packages/nlme/nlme.pdf>`_"""),
3638    isExposedToPythonCallers=True,
3639    isExposedByCOM=True,
3640    isExposedAsArcGISTool=True,
3641    arcGISDisplayName=_(u'Fit Linear Mixed Model'),
3642    arcGISToolCategory=_(u'Statistics\\Model Data\\Linear Mixed Models'),
3643    dependencies=[ArcGISDependency(9, 1), RDependency(2, 7, 0)])
3644
3645AddArgumentMetadata(LinearMixedModel.FitToArcGISTable, u'cls',
3646    typeMetadata=ClassOrClassInstanceTypeMetadata(cls=LinearMixedModel),
3647    description=_(u'%s class or an instance of it.') % LinearMixedModel.__name__)
3648
3649CopyArgumentMetadata(GLM.FitToArcGISTable, u'inputTable', LinearMixedModel.FitToArcGISTable, u'inputTable')
3650CopyArgumentMetadata(GLM.FitToArcGISTable, u'outputModelFile', LinearMixedModel.FitToArcGISTable, u'outputModelFile')
3651
3652AddArgumentMetadata(LinearMixedModel.FitToArcGISTable, u'fixedFormula',
3653    typeMetadata=UnicodeStringTypeMetadata(),
3654    description=_(
3655u"""Two-sided formula that specifies the fixed-effects part of the
3656model.
3657
3658""" + _FormulaDescription),
3659    arcGISDisplayName=_(u'Fixed effects formula'))
3660
3661AddArgumentMetadata(LinearMixedModel.FitToArcGISTable, u'randomFormula',
3662    typeMetadata=UnicodeStringTypeMetadata(),
3663    description=_(
3664u"""One-sided formula that specifies the random-effects part of the
3665model. The formula has the form::
3666
3667    ~x1+...+xn | g1/.../gm
3668
3669with x1+...+xn specifying the model for the random effects and
3670g1/.../gm the grouping structure (m may be equal to 1, in which case
3671no / is required). The random effects formula will be repeated for all
3672levels of grouping, in the case of multiple levels of grouping."""),
3673    arcGISDisplayName=_(u'Random effects formula'))
3674
3675CopyArgumentMetadata(GLM.FitToArcGISTable, u'where', LinearMixedModel.FitToArcGISTable, u'where')
3676
3677AddArgumentMetadata(LinearMixedModel.FitToArcGISTable, u'method',
3678    typeMetadata=UnicodeStringTypeMetadata(allowedValues=[u'REML', u'ML']),
3679    description=_(
3680u"""Model fitting method, one of:
3681
3682* REML - the model will be fit by maximizing the restricted
3683  log-likelihood. This is the default.
3684
3685* ML - the model will be fit by maximizing the log-likelihood.
3686"""),
3687    arcGISDisplayName=_(u'Model fitting method'),
3688    arcGISCategory=_(u'Model options'))
3689
3690AddArgumentMetadata(LinearMixedModel.FitToArcGISTable, u'correlationStructure',
3691    typeMetadata=UnicodeStringTypeMetadata(canBeNone=True, makeLowercase=True, allowedValues=[u'Exponential', u'Gaussian', u'Linear', u'Rational quadratic', u'Spherical']),
3692    description=_(
3693u"""TODO: Write documentation for this parameter."""),
3694    arcGISDisplayName=_(u'Correlation structure'),
3695    arcGISCategory=_(u'Within-group correlation structure'))
3696
3697AddArgumentMetadata(LinearMixedModel.FitToArcGISTable, u'correlationFormula',
3698    typeMetadata=UnicodeStringTypeMetadata(canBeNone=True),
3699    description=_(
3700u"""TODO: Write documentation for this parameter."""),
3701    arcGISDisplayName=_(u'Correlation formula'),
3702    arcGISCategory=_(u'Within-group correlation structure'))
3703
3704AddArgumentMetadata(LinearMixedModel.FitToArcGISTable, u'range_',
3705    typeMetadata=FloatTypeMetadata(canBeNone=True, mustBeGreaterThan=0.),
3706    description=_(
3707u"""TODO: Write documentation for this parameter."""),
3708    arcGISDisplayName=_(u'Range'),
3709    arcGISCategory=_(u'Within-group correlation structure'))
3710
3711AddArgumentMetadata(LinearMixedModel.FitToArcGISTable, u'nugget',
3712    typeMetadata=FloatTypeMetadata(canBeNone=True, minValue=0., maxValue=1.),
3713    description=_(
3714u"""TODO: Write documentation for this parameter."""),
3715    arcGISDisplayName=_(u'Nugget'),
3716    arcGISCategory=_(u'Within-group correlation structure'))
3717
3718AddArgumentMetadata(LinearMixedModel.FitToArcGISTable, u'metric',
3719    typeMetadata=UnicodeStringTypeMetadata(canBeNone=True, makeLowercase=True, allowedValues=[u'Euclidean', u'Manhattan', u'Maximum']),
3720    description=_(
3721u"""TODO: Write documentation for this parameter."""),
3722    arcGISDisplayName=_(u'Distance metric'),
3723    arcGISCategory=_(u'Within-group correlation structure'))
3724
3725AddArgumentMetadata(LinearMixedModel.FitToArcGISTable, u'fixed',
3726    typeMetadata=BooleanTypeMetadata(),
3727    description=_(
3728u"""TODO: Write documentation for this parameter."""),
3729    arcGISDisplayName=_(u'Keep coefficients fixed during model optimization'),
3730    arcGISCategory=_(u'Within-group correlation structure'))
3731
3732CopyArgumentMetadata(GLM.FitToArcGISTable, u'xColumnName', LinearMixedModel.FitToArcGISTable, u'xColumnName')
3733CopyArgumentMetadata(GLM.FitToArcGISTable, u'yColumnName', LinearMixedModel.FitToArcGISTable, u'yColumnName')
3734CopyArgumentMetadata(GLM.FitToArcGISTable, u'zColumnName', LinearMixedModel.FitToArcGISTable, u'zColumnName')
3735CopyArgumentMetadata(GLM.FitToArcGISTable, u'mColumnName', LinearMixedModel.FitToArcGISTable, u'mColumnName')
3736
3737AddArgumentMetadata(LinearMixedModel.FitToArcGISTable, u'writeSummaryFile',
3738    typeMetadata=BooleanTypeMetadata(),
3739    description=_(
3740u"""If True, this tool will write summary information about the fitted
3741model to a text file. (This is the same information that the tool
3742outputs as log messages.) The file will have the name X_summary.txt,
3743where X is the name of the output model file, minus any
3744extension."""),
3745    arcGISDisplayName=_(u'Write model summary file'),
3746    arcGISCategory=_(u'Additional output options'))
3747
3748AddArgumentMetadata(LinearMixedModel.FitToArcGISTable, u'writeDiagnosticPlots',
3749    typeMetadata=BooleanTypeMetadata(),
3750    description=_(
3751u"""If True, this tool will write diagnostic plots for the fitted
3752model.
3753
3754TODO: Write documentation for this parameter."""),
3755    arcGISDisplayName=_(u'Write diagnostic plots'),
3756    arcGISCategory=_(u'Additional output options'))
3757
3758CopyArgumentMetadata(GLM.FitToArcGISTable, u'plotFileFormat', LinearMixedModel.FitToArcGISTable, u'plotFileFormat')
3759CopyArgumentMetadata(GLM.FitToArcGISTable, u'res', LinearMixedModel.FitToArcGISTable, u'res')
3760CopyArgumentMetadata(GLM.FitToArcGISTable, u'width', LinearMixedModel.FitToArcGISTable, u'width')
3761CopyArgumentMetadata(GLM.FitToArcGISTable, u'height', LinearMixedModel.FitToArcGISTable, u'height')
3762CopyArgumentMetadata(GLM.FitToArcGISTable, u'pointSize', LinearMixedModel.FitToArcGISTable, u'pointSize')
3763CopyArgumentMetadata(GLM.FitToArcGISTable, u'bg', LinearMixedModel.FitToArcGISTable, u'bg')
3764
3765CopyArgumentMetadata(GLM.FitToArcGISTable, u'overwriteExisting', LinearMixedModel.FitToArcGISTable, u'overwriteExisting')
3766
3767# Public method: LinearMixedModel.PredictFromArcGISRasters
3768
3769AddMethodMetadata(LinearMixedModel.PredictFromArcGISRasters,
3770    shortDescription=_(u'Using a fitted linear mixed model, this tool creates a raster representing the response variable predicted from rasters representing the predictor variables.'),
3771    isExposedToPythonCallers=True,
3772    isExposedByCOM=True,
3773    isExposedAsArcGISTool=True,
3774    arcGISDisplayName=_(u'Predict Linear Mixed Model From Rasters'),
3775    arcGISToolCategory=_(u'Statistics\\Model Data\\Linear Mixed Models'),
3776    dependencies=[ArcGISDependency(9, 1), RDependency(2, 7, 0), RPackageDependency(u'rgdal')])
3777
3778CopyArgumentMetadata(LinearMixedModel.FitToArcGISTable, u'cls', LinearMixedModel.PredictFromArcGISRasters, u'cls')
3779
3780AddArgumentMetadata(LinearMixedModel.PredictFromArcGISRasters, u'inputModelFile',
3781    typeMetadata=FileTypeMetadata(mustExist=True),
3782    description=_(
3783u"""File that contains the fitted model, generated by the Fit Linear
3784Mixed Model tool."""),
3785    arcGISDisplayName=_(u'Input model file'))
3786
3787CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'inputPredictorRasters', LinearMixedModel.PredictFromArcGISRasters, u'inputPredictorRasters')
3788CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'variableNames', LinearMixedModel.PredictFromArcGISRasters, u'variableNames')
3789CopyArgumentMetadata(TreeModel.PredictFromArcGISRasters, u'outputResponseRaster', LinearMixedModel.PredictFromArcGISRasters, u'outputResponseRaster')
3790CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'templateRaster', LinearMixedModel.PredictFromArcGISRasters, u'templateRaster')
3791CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'resamplingTechniques', LinearMixedModel.PredictFromArcGISRasters, u'resamplingTechniques')
3792CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'ignoreOutOfRangeValues', LinearMixedModel.PredictFromArcGISRasters, u'ignoreOutOfRangeValues')
3793CopyArgumentMetadata(TreeModel.PredictFromArcGISRasters, u'buildPyramids', LinearMixedModel.PredictFromArcGISRasters, u'buildPyramids')
3794CopyArgumentMetadata(TreeModel.PredictFromArcGISRasters, u'overwriteExisting', LinearMixedModel.PredictFromArcGISRasters, u'overwriteExisting')
3795
3796###############################################################################
3797# Metadata: ModelEvaluation class
3798###############################################################################
3799
3800AddClassMetadata(ModelEvaluation,
3801    shortDescription=_(u'Provides methods for evaluating fitted models.'),
3802    isExposedAsCOMServer=True,
3803    comIID=u'{D5DC8EA1-B51A-4B0F-A43D-F91C516CD8A1}',
3804    comCLSID=u'{DD4042FC-A356-480E-9211-62B51C2113A7}')
3805
3806# Public method: ModelEvaluation.PlotPerformanceOfBinaryClassificationModel
3807
3808AddMethodMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel,
3809    shortDescription=_(u'Plots the performance of a binary classification model (a model where the response variable has two possible values) using the R ROCR package.'),
3810    isExposedToPythonCallers=True,
3811    isExposedByCOM=True,
3812    isExposedAsArcGISTool=True,
3813    arcGISDisplayName=_(u'Plot Performance of Binary Classification Model'),
3814    arcGISToolCategory=_(u'Statistics\\Model Data\\Evaluate Model Performance'),
3815    dependencies=[RDependency(2, 6, 0), RPackageDependency(u'ROCR')])
3816
3817AddArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'cls',
3818    typeMetadata=ClassOrClassInstanceTypeMetadata(cls=ModelEvaluation),
3819    description=_(u'%s class or an instance of it.') % ModelEvaluation.__name__)
3820
3821AddArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'inputModelFile',
3822    typeMetadata=FileTypeMetadata(mustExist=True),
3823    description=_(
3824u"""File that contains the fitted model, generated by one of the
3825fitting tools (e.g. Fit GLM).
3826
3827The model must conform to two constraints:
3828
3829* It must be a binary classification model, i.e. there must be only
3830  two possible values of the response variable in the data used to
3831  build the model.
3832
3833* The larger response value must represent the "positive" result, and
3834  the smaller the "negative" result.
3835
3836These constraints would be satisfied, for example, by a species
3837presence/absence model where the response variable was either 1
3838(species present) or 0 (species not present). It would not be
3839satisfied from a species density model where the response variable was
3840the number of species per unit area. Nor would it be satisfied if the
3841response variable values were switched, such that 1 represented
3842absence and 0 represented presence.
3843
3844If the first constraint is violated, you will receve the error::
3845
3846    ROCR currently supports only evaluation of binary classification tasks.
3847
3848If the second constraint is violated, you may not receive an error.
3849Instead, the output plot and statistics will be wrong. Take care to
3850avoid violating this constraint."""),
3851    arcGISDisplayName=_(u'Input model file'))
3852
3853AddArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'measure1',
3854    typeMetadata=UnicodeStringTypeMetadata(allowedValues=[u'acc', u'cal', u'chisq', u'ecost', u'err', u'f', u'fall', u'fnr', u'fpr', u'lift', u'mat', u'mi', u'miss', u'npv', u'odds', u'ppv', u'pcfall', u'pcmiss', u'phi', u'prec', u'rch', u'rec', u'rnp', u'rpp', u'sar', u'sens', u'spec', u'tnr', u'tpr']),
3855    description=_(
3856u"""The first performance measure to plot.
3857
3858This measure will serve as the Y coordinate for the plot. If a second
3859measure is specified, it will serve as the X coordinate. Otherwise the
3860model cutoff will serve as the X coordinate.
3861
3862When modeling a binary response, an important task is selecting a
3863cutoff. The cutoff is the value used to determine whether a given
3864predicted value of the response variable should be classified as
3865positive or negative. Because the model typically outputs predictions
3866along a continual range (e.g. between 0.0 and 1.0), you must determine
3867the portions of the range that represent positive and negative
3868responses. Response values less than the cutoff are classified as
3869negative and those greater than or equal to the cutoff are classified
3870as positive.
3871
3872Plots that use the cutoff as the X coordinate show you a measure of
3873the model's performance for the range of cutoff values. You can use
3874this this plot to select the cutoff value that maximizes, minimizes,
3875or otherwise optimizes a given performance measure. For example, if
3876you selected "acc" (accuracy) as the performance measure, you could
3877find the cutoff value that maximized the model's accuracy by finding
3878the highest point on the plot and then looking up the cutoff value on
3879the X axis.
3880
3881Plots that use a second performance measure as the X coordinate allow
3882you to balance one measure of the model's performance against another.
3883The plot will be color-coded by cutoff value, allowing you to look up
3884the cutoff for a given combination of the two performance measures.
3885Selecting an optimal cutoff will often involve making a tradeoff
3886between the two measures.
3887
3888For example, to create a classic receiver operating characteristic
3889(ROC) curve, select "tpr" (true positive rate) for the first measure
3890and "fpr" (false positive rate) for the second measure.
3891
3892For the first performance measure, you can select from the following
3893list. This list is taken from the documentation of the R ROCR package
3894that implements the plots. Some of them do not allow selection of a
3895second performance measure (you must omit it when invoking this tool),
3896as noted in the measure's description.
3897
3898In these descriptions, Y and Yhat are random variables representing
3899the class and the prediction for a randomly drawn sample,
3900respectively. + and - are the positive and negative class,
3901respectively. The following abbreviations are used for for empirical
3902quantities: P (# positive samples), N (# negative samples), TP (# true
3903positives), TN (# true negatives), FP (# false positives), FN (# false
3904negatives).
3905
3906* acc - Accuracy. P(Yhat = Y). Estimated as: (TP+TN)/(P+N).
3907
3908* cal - Calibration error. The calibration error is the absolute
3909  difference between predicted confidence and actual reliability. This
3910  error is estimated at all cutoffs by sliding a window of size 100
3911  across the range of possible cutoffs. E.g., if for several positive
3912  samples the output of the classifier is around 0.75, you might
3913  expect from a well-calibrated classifier that the fraction of them
3914  which is correctly predicted as positive is also around 0.75. In a
3915  well-calibrated classifier, the probabilistic confidence estimates
3916  are realistic. Only for use with probabilistic output (i.e. scores
3917  between 0 and 1; some of the other measures actually support values
3918  between -1 and 1).
3919
3920* chisq - Chi square test statistic. Note that R might raise a warning
3921  if the sample size is too small.
3922
3923* ecost - Expected cost. For details on cost curves, cf. Drummond &
3924  Holte 2000, 2004. ecost has an obligatory x axis, the so-called
3925  'probability-cost function'; thus you may not specify a second
3926  performance measure.
3927
3928* err - Error rate. P(Yhat != Y). Estimated as: (FP+FN)/(P+N).
3929
3930* f - Precision-recall F measure (van Rijsbergen, 1979). Weighted
3931  harmonic mean of precision (P) and recall (R). F = 1/ (alpha*1/P +
3932  (1-alpha)*1/R). For this tool, alpha is always 1/2, so the mean is
3933  balanced.
3934
3935* fall - Fallout. Same as fpr.
3936
3937* fnr - False negative rate. P(Yhat = - | Y = +). Estimated as: FN/P.
3938
3939* fpr - False positive rate. P(Yhat = + | Y = -). Estimated as: FP/N.
3940
3941* lift - Lift value. P(Yhat = + | Y = +)/P(Yhat = +).
3942
3943* mat - Matthews correlation coefficient. Same as phi.
3944
3945* mi - Mutual information. I(Yhat, Y) := H(Y) - H(Y | Yhat), where H
3946  is the (conditional) entropy. Entropies are estimated naively (no
3947  bias correction).
3948
3949* miss - Miss. Same as fnr.
3950
3951* npv - Negative predictive value. P(Y = - | Yhat = -). Estimated as:
3952  TN/(TN+FN).
3953
3954* odds - Odds ratio. (TP*TN)/(FN*FP). Note that odds ratio produces
3955  Inf or NA values for all cutoffs corresponding to FN=0 or FP=0. This
3956  can substantially decrease the plotted cutoff region.
3957
3958* pcfall - Prediction-conditioned fallout. P(Y = - | Yhat = +).
3959  Estimated as: FP/(TP+FP).
3960
3961* pcmiss - Prediction-conditioned miss. P(Y = + | Yhat = -). Estimated
3962  as: FN/(TN+FN).
3963
3964* phi - Phi correlation coefficient. (TP*TN -
3965  FP*FN)/(sqrt((TP+FN)*(TN+FP)*(TP+FP)*(TN+FN))). Yields a number
3966  between -1 and 1, with 1 indicating a perfect prediction, 0 indicating
3967  a random prediction. Values below 0 indicate a worse than random
3968  prediction.
3969
3970* ppv - Positive predictive value. P(Y = + | Yhat = +). Estimated as:
3971  TP/(TP+FP).
3972
3973* prec - Precision. Same as ppv.
3974
3975* rch - ROC convex hull. A ROC (=tpr vs fpr) curve with concavities
3976  (which represent suboptimal choices of cutoff) removed (Fawcett
3977  2001). Since the result is already a parametric performance curve,
3978  it cannot be used in combination with other measures (thus you may
3979  not specify a second performance measure).
3980
3981* rec - Recall. Same as tpr.
3982
3983* rnp - Rate of negative predictions. P(Yhat = -). Estimated as:
3984  (TN+FN)/(TP+FP+TN+FN).
3985
3986* rpp - Rate of positive predictions. P(Yhat = +). Estimated as:
3987  (TP+FP)/(TP+FP+TN+FN).
3988
3989* sar - Score combinining performance measures of different
3990  characteristics, in the attempt of creating a more "robust" measure
3991  (cf. Caruana R., ROCAI2004): SAR = 1/3 * (Accuracy + Area under the
3992  ROC curve + Root mean-squared error).
3993
3994* sens - Sensitivity. Same as tpr.
3995
3996* spec - Specificity. Same as tnr.
3997
3998* tnr - True negative rate. P(Yhat = - | Y = -). Estimated as: TN/N.
3999
4000* tpr - True positive rate. P(Yhat = + | Y = +). Estimated as: TP/P.
4001"""),
4002    arcGISDisplayName=_(u'First performance measure to plot'))
4003
4004AddArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'measure2',
4005    typeMetadata=UnicodeStringTypeMetadata(canBeNone=True, allowedValues=[u'acc', u'cal', u'chisq', u'err', u'f', u'fall', u'fnr', u'fpr', u'lift', u'mat', u'mi', u'miss', u'npv', u'odds', u'ppv', u'pcfall', u'pcmiss', u'phi', u'prec', u'rec', u'rnp', u'rpp', u'sar', u'sens', u'spec', u'tnr', u'tpr']),
4006    description=_(
4007u"""The second performance measure to plot.
4008
4009If specified, this performance measure will be used as the X
4010coordinate of the plot instead of the model cutoff value. Please see
4011the documentation for the First Performance Measure for more
4012information."""),
4013    arcGISDisplayName=_(u'Second performance measure to plot'))
4014
4015AddArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'summaryStats',
4016    typeMetadata=ListTypeMetadata(elementType=UnicodeStringTypeMetadata(allowedValues=[u'auc', u'mxe', u'prbe', u'rmse']), canBeNone=True),
4017    description=_(
4018u"""List of summary statistics to calculate for the model.
4019
4020The summary statistics are logged as information messages. If you
4021invoke this tool from ArcGIS, you will see them in the geoprocessing
4022output window. If you invoke this tool programmatically, the summary
4023statistics are returned in a list as well.
4024
4025The available statistics are:
4026
4027* auc - Area under the ROC curve. This is equal to the value of the
4028  Wilcoxon-Mann-Whitney test statistic and also the probability that
4029  the classifier will score are randomly drawn positive sample higher
4030  than a randomly drawn negative sample.
4031
4032* mxe - Mean cross-entropy. Only for use with probabilistic response
4033  variables (i.e. scores between 0 and 1). MXE := - 1/(P+N)
4034  sum_{y_i=+} ln(yhat_i) + sum_{y_i=-} ln(1-yhat_i).
4035
4036* prbe - Precision-recall break-even point. The cutoff where
4037  precision and recall are equal. At this point, positive and negative
4038  predictions are made at the same rate as their prevalence in the
4039  data. In the event that there are multiple break-even points, the
4040  cutoff for the last one will be returned.
4041
4042* rmse - Root-mean-squared error. Only for use with numerical class
4043  labels. RMSE := sqrt(1/(P+N) sum_i (y_i - yhat_i)^2).
4044
4045The summary statistics are calculated by the R ROCR package."""),
4046    arcGISDisplayName=_(u'Summary statistics to calculate'))
4047
4048AddArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'title',
4049    typeMetadata=UnicodeStringTypeMetadata(canBeNone=True),
4050    description=_(
4051u"""Title for the plot. If no title is provided, the plot will not
4052have a title."""),
4053    arcGISDisplayName=_(u'Plot title'))
4054
4055AddArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'evaluationDataTable',
4056    typeMetadata=ArcGISTableViewTypeMetadata(canBeNone=True, mustExist=True),
4057    description=_(
4058u"""ArcGIS table, table view, feature class, or feature layer
4059containing the data for evaluating the model. If an evaluation data
4060table is not provided, the model will be evaluated using the data that
4061was used to fit the model. (A copy of this data exists in the input
4062model file.)
4063
4064The evaluation data table must have the same fields as the table that
4065was used to fit the model. The model will be evaluated by reading
4066those values and computing the predicted response for each row and
4067comparing it to the actual response.
4068
4069When doing statistical modeling, it is considered good practice to
4070split your input data into a training data set and and an evaluation
4071data set, fit the model using the training data, and evaluate the
4072model using the evaluation data.  Please consult the scientific
4073literature for advice on this procedure. The following article is a
4074good place to start.
4075
4076Guisan, A., and Zimmerman, N.E. 2000. Predictive habitat distribution
4077models in ecology. Ecological Modeling 135: 147-186."""),
4078    arcGISDisplayName=_(u'Evaluation data table'),
4079    arcGISCategory=_(u'Evaluation data options'),
4080    dependencies=[ArcGISDependency(9, 1)])
4081
4082AddArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'where',
4083    typeMetadata=SQLWhereClauseTypeMetadata(canBeNone=True),
4084    description=ArcGIS91SelectCursor.__init__.__doc__.Obj.GetArgumentByName(u'where').Description,
4085    arcGISParameterDependencies=[u'evaluationDataTable'],
4086    arcGISDisplayName=_(u'Where clause'),
4087    arcGISCategory=_(u'Evaluation data options'),
4088    dependencies=[ArcGISDependency(9, 1)])
4089
4090AddArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'outputPlotFile',
4091    typeMetadata=FileTypeMetadata(canBeNone=True, mustBeDifferentThanArguments=[u'inputModelFile'], deleteIfParameterIsTrue=u'overwriteExisting', createParentDirectories=True),
4092    description=_(
4093u"""File to create for the plot. If this parameter is specified, the
4094plot will be written to the file rather than displayed on the screen.
4095
4096The file must have one of the following two extensions, which
4097determines the format that will be used:
4098
4099* .emf - Windows enhanced metafile (EMF) format. This is a vector
4100  format that may be printed and resized without any pixelation and is
4101  therefore suitable for use in printable documents that recognize
4102  this format (e.g. Microsoft Word or Microsoft Visio).
4103
4104* .png - Portable network graphics (PNG) format. This is a compressed,
4105  lossless, highly portable raster format suitable for use in web
4106  pages or other locations where a raster format is desired. Most
4107  scientific journals accept PNG; they typically request that files
4108  have a resolution of at least 1000 DPI.
4109"""),
4110    direction=u'Output',
4111    arcGISDisplayName=_(u'Output file'),
4112    arcGISCategory=_(u'Output file options'))
4113
4114AddArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'res',
4115    typeMetadata=FloatTypeMetadata(mustBeGreaterThan=0.),
4116    description=_(
4117u"""PNG plot file resolution, in dots per inch (DPI). The default is
4118set to a high value (1000) because this is the minimum resolution
4119typically required by scientific journals that accept figures in PNG
4120format.
4121
4122This parameter is ignored for EMF format because it is a vector
4123format."""),
4124    arcGISDisplayName=_(u'Plot resolution, in DPI'),
4125    arcGISCategory=_(u'Output file options'))
4126
4127AddArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'width',
4128    typeMetadata=FloatTypeMetadata(mustBeGreaterThan=0.),
4129    description=_(
4130u"""Plot file width in thousandths of inches (for EMF format; e.g. the
4131value 3000 is 3 inches) or pixels (for PNG format)."""),
4132    arcGISDisplayName=_(u'Plot width'),
4133    arcGISCategory=_(u'Output file options'))
4134
4135AddArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'height',
4136    typeMetadata=FloatTypeMetadata(mustBeGreaterThan=0.),
4137    description=_(
4138u"""Plot file height in thousandths of inches (for EMF format; e.g. the
4139value 3000 is 3 inches) or pixels (for PNG format)."""),
4140    arcGISDisplayName=_(u'Plot height'),
4141    arcGISCategory=_(u'Output file options'))
4142
4143AddArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'pointSize',
4144    typeMetadata=FloatTypeMetadata(minValue=1.0),
4145    description=_(
4146u"""The default pointsize of plotted text."""),
4147    arcGISDisplayName=_(u'Default pointsize of plotted text'),
4148    arcGISCategory=_(u'Output file options'))
4149
4150AddArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'bg',
4151    typeMetadata=UnicodeStringTypeMetadata(),
4152    description=_(
4153u"""PNG plot file background color. The color must be a valid name in
4154R's color palette, or "transparent" if there is no background color.
4155This parameter is ignored if the plot format file is EMF."""),
4156    arcGISDisplayName=_(u'Plot background color'),
4157    arcGISCategory=_(u'Output file options'))
4158
4159AddArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'overwriteExisting',
4160    typeMetadata=BooleanTypeMetadata(),
4161    description=_(
4162u"""If True, the output file will be overwritten, if it exists. If
4163False, a ValueError will be raised if the output file exists."""),
4164    initializeToArcGISGeoprocessorVariable=u'OverwriteOutput')
4165
4166AddResultMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'summaryStatValues',
4167    typeMetadata=ListTypeMetadata(elementType=FloatTypeMetadata(), canBeNone=True),
4168    description=_(
4169u"""The calculated summary statistics. This will be the same length as
4170the summaryStats parameter. If summaryStats was None, this will be
4171None."""))
4172
4173# Public method: ModelEvaluation.PlotROCOfBinaryClassificationModel
4174
4175AddMethodMetadata(ModelEvaluation.PlotROCOfBinaryClassificationModel,
4176    shortDescription=_(u'Plots the receiver operating characteristic (ROC) curve of a binary classification model (a model where the response variable has two possible values) using the R ROCR package.'),
4177    isExposedToPythonCallers=True,
4178    isExposedByCOM=True,
4179    isExposedAsArcGISTool=True,
4180    arcGISDisplayName=_(u'Plot ROC of Binary Classification Model'),
4181    arcGISToolCategory=_(u'Statistics\\Model Data\\Evaluate Model Performance'),
4182    dependencies=[RDependency(2, 6, 0), RPackageDependency(u'ROCR')])
4183
4184CopyArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'cls', ModelEvaluation.PlotROCOfBinaryClassificationModel, u'cls')
4185CopyArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'inputModelFile', ModelEvaluation.PlotROCOfBinaryClassificationModel, u'inputModelFile')
4186
4187AddArgumentMetadata(ModelEvaluation.PlotROCOfBinaryClassificationModel, u'cutoff',
4188    typeMetadata=UnicodeStringTypeMetadata(allowedValues=[u'None', u'Automatic', u'Specified'], makeLowercase=True),
4189    description=_(
4190u"""Cutoff to display in the plot and to use for the calculation of
4191summary statistics. The possible values are:
4192
4193* None - the plot will not display a cutoff and summary statistics
4194  will not be calculated.
4195
4196* Automatic - the tool will automatically select the cutoff value for
4197  the point on the curve that is closest to the point of perfect
4198  classification (the upper-left corner of the plot).
4199
4200* Specified - the tool will use the cutoff value that you specify for
4201  the Cutoff Value parameter.
4202
4203When doing binary classification, an important task is selecting a
4204cutoff. The cutoff is the value used to determine whether a given
4205predicted value of the response variable should be classified as
4206positive or negative. Because the model typically outputs predictions
4207along a continual range (e.g. between 0.0 and 1.0), you must determine
4208the portions of the range that represent positive and negative
4209responses. Response values less than the cutoff are classified as
4210negative and those greater than or equal to the cutoff are classified
4211as positive.
4212
4213In ROC analysis, you choose the cutoff value by contemplating the
4214tradeoff you will obtain between the true positive rate and false
4215positive rate of the model. A perfect model has a true positive rate
4216of 1 and a false positive rate of 0. This corresponds to the
4217upper-left corner of the ROC plot. Depending on your scenario, one
4218rate may be more important than another. In this case, you should run
4219this tool once with Cutoff set to Automatic, inspect the plot, choose
4220a cutoff that meets your goals, and run the tool a second time with
4221Cutoff set to Specified. If one rate is not more important than the
4222other, you might be satisfied with the Automatic option. When in
4223doubt, consult a statistician."""),
4224    arcGISDisplayName=_(u'Cutoff'))
4225
4226AddArgumentMetadata(ModelEvaluation.PlotROCOfBinaryClassificationModel, u'cutoffValue',
4227    typeMetadata=FloatTypeMetadata(canBeNone=True),
4228    description=_(
4229u"""Cutoff to value to use when the Cutoff parameter is set to
4230Specifed. Please see the documentation for that parameter for more
4231information."""),
4232    arcGISDisplayName=_(u'Cutoff value'))
4233
4234AddArgumentMetadata(ModelEvaluation.PlotROCOfBinaryClassificationModel, u'colorize',
4235    typeMetadata=BooleanTypeMetadata(),
4236    description=_(
4237u"""If True, the ROC curve will be colorized by cutoff value. If
4238False, the ROC curve will be black."""),
4239    arcGISDisplayName=_(u'Colorize ROC curve'))
4240
4241CopyArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'title', ModelEvaluation.PlotROCOfBinaryClassificationModel, u'title')
4242CopyArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'evaluationDataTable', ModelEvaluation.PlotROCOfBinaryClassificationModel, u'evaluationDataTable')
4243CopyArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'where', ModelEvaluation.PlotROCOfBinaryClassificationModel, u'where')
4244
4245AddArgumentMetadata(ModelEvaluation.PlotROCOfBinaryClassificationModel, u'outputSummaryFile',
4246    typeMetadata=FileTypeMetadata(canBeNone=True, mustBeDifferentThanArguments=[u'inputModelFile', u'outputPlotFile'], deleteIfParameterIsTrue=u'overwriteExisting', createParentDirectories=True),
4247    description=_(
4248u"""Text file to receive the model summary statistics that this tool
4249reports as log messages. If a cutoff is provided or calculated by this
4250tool, a contingency table and associated statistics will also be
4251written."""),
4252    direction=u'Output',
4253    arcGISDisplayName=_(u'Model summary statistics file'),
4254    arcGISCategory=_(u'Output file options'))
4255
4256CopyArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'outputPlotFile', ModelEvaluation.PlotROCOfBinaryClassificationModel, u'outputPlotFile')
4257CopyArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'res', ModelEvaluation.PlotROCOfBinaryClassificationModel, u'res')
4258CopyArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'width', ModelEvaluation.PlotROCOfBinaryClassificationModel, u'width')
4259CopyArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'height', ModelEvaluation.PlotROCOfBinaryClassificationModel, u'height')
4260CopyArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'pointSize', ModelEvaluation.PlotROCOfBinaryClassificationModel, u'pointSize')
4261CopyArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'bg', ModelEvaluation.PlotROCOfBinaryClassificationModel, u'bg')
4262CopyArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'overwriteExisting', ModelEvaluation.PlotROCOfBinaryClassificationModel, u'overwriteExisting')
4263
4264AddResultMetadata(ModelEvaluation.PlotROCOfBinaryClassificationModel, u'outputCutoff',
4265    typeMetadata=FloatTypeMetadata(canBeNone=True),
4266    description=_(
4267u"""Output cutoff value. The returned value depends on what was
4268specified for the Cutoff parameter:
4269
4270* None - no value is returned (in Python, None is returned, often
4271  called NULL in other programming languages).
4272
4273* Automatic - the automatically calculated value is returned.
4274
4275* Specified - the value specified for the Cutoff Value parameter is
4276  returned.
4277"""),
4278    arcGISDisplayName=_(u'Output cutoff'))
4279
4280AddResultMetadata(ModelEvaluation.PlotROCOfBinaryClassificationModel, u'tp',
4281    typeMetadata=FloatTypeMetadata(canBeNone=True),
4282    description=_(
4283u"""The number of true positives obtained for the cutoff value that
4284was used. If no cutoff was used, no value is returned (in Python, None
4285is returned, often called NULL in other programming languages)."""))
4286
4287AddResultMetadata(ModelEvaluation.PlotROCOfBinaryClassificationModel, u'fp',
4288    typeMetadata=FloatTypeMetadata(canBeNone=True),
4289    description=_(
4290u"""The number of false positives obtained for the cutoff value that
4291was used. If no cutoff was used, no value is returned (in Python, None
4292is returned, often called NULL in other programming languages)."""))
4293
4294AddResultMetadata(ModelEvaluation.PlotROCOfBinaryClassificationModel, u'fn',
4295    typeMetadata=FloatTypeMetadata(canBeNone=True),
4296    description=_(
4297u"""The number of false negatives obtained for the cutoff value that
4298was used. If no cutoff was used, no value is returned (in Python, None
4299is returned, often called NULL in other programming languages)."""))
4300
4301AddResultMetadata(ModelEvaluation.PlotROCOfBinaryClassificationModel, u'tn',
4302    typeMetadata=FloatTypeMetadata(canBeNone=True),
4303    description=_(
4304u"""The number of true negatives obtained for the cutoff value that
4305was used. If no cutoff was used, no value is returned (in Python, None
4306is returned, often called NULL in other programming languages)."""))
4307
4308AddResultMetadata(ModelEvaluation.PlotROCOfBinaryClassificationModel, u'auc',
4309    typeMetadata=FloatTypeMetadata(),
4310    description=_(u"""The area under the ROC curve."""))
4311
4312AddResultMetadata(ModelEvaluation.PlotROCOfBinaryClassificationModel, u'mxe',
4313    typeMetadata=FloatTypeMetadata(canBeNone=True),
4314    description=_(
4315u"""Mean cross-entropy for the model, if the model is probabilistic
4316(the respose ranges from 0 to 1), or None otherwise. MXE := - 1/(P+N)
4317sum_{y_i=+} ln(yhat_i) + sum_{y_i=-} ln(1-yhat_i)."""))
4318
4319AddResultMetadata(ModelEvaluation.PlotROCOfBinaryClassificationModel, u'prbe',
4320    typeMetadata=FloatTypeMetadata(canBeNone=True),
4321    description=_(
4322u"""Precision-recall break-even point for the model, which is the
4323cutoff where precision and recall are equal. At this point, positive
4324and negative predictions are made at the same rate as their prevalence
4325in the data. In the event that there are multiple break-even points,
4326the cutoff for the last one will be returned. If there is no such
4327point, None will be returned. (I am not actually sure this is
4328possible)."""))
4329
4330AddResultMetadata(ModelEvaluation.PlotROCOfBinaryClassificationModel, u'rmse',
4331    typeMetadata=FloatTypeMetadata(canBeNone=True),
4332    description=_(
4333u"""Root-mean-squared error, if the response variable is a numeric
4334value (rather than a categorical string), or None otherwise. RMSE :=
4335sqrt(1/(P+N) sum_i (y_i - yhat_i)^2)."""))
4336
4337# Public method: ModelEvaluation.RandomlySplitArcGISTableIntoTrainingAndEvaluationRecords
4338
4339AddMethodMetadata(ModelEvaluation.RandomlySplitArcGISTableIntoTrainingAndEvaluationRecords,
4340    shortDescription=_(u'Randomly designates the records of a table as either training records (for fitting a statistical model) or evaluation records (for evaluating the model).'),
4341    isExposedToPythonCallers=True,
4342    isExposedByCOM=True,
4343    isExposedAsArcGISTool=True,
4344    arcGISDisplayName=_(u'Randomly Split Table Into Training and Evaluation Records'),
4345    arcGISToolCategory=_(u'Statistics\\Model Data\\Evaluate Model Performance'),
4346    dependencies=[ArcGISDependency(9, 1)])
4347
4348CopyArgumentMetadata(ModelEvaluation.PlotPerformanceOfBinaryClassificationModel, u'cls', ModelEvaluation.RandomlySplitArcGISTableIntoTrainingAndEvaluationRecords, u'cls')
4349
4350AddArgumentMetadata(ModelEvaluation.RandomlySplitArcGISTableIntoTrainingAndEvaluationRecords, u'table',
4351    typeMetadata=ArcGISTableViewTypeMetadata(mustExist=True),
4352    description=_(
4353u"""ArcGIS table, table view, feature class, or feature layer."""),
4354    arcGISDisplayName=_(u'Table'))
4355
4356AddArgumentMetadata(ModelEvaluation.RandomlySplitArcGISTableIntoTrainingAndEvaluationRecords, u'percentEvaluation',
4357    typeMetadata=FloatTypeMetadata(minValue=0.0, maxValue=100.0),
4358    description=_(
4359u"""Percent of the table's records to randomly designate evaluation records.
4360The remaining records will be designated training records.
4361
4362A popular ratio of training to evaluation is 2 to 1, but a different ratio
4363may be appropriate depending on the total number of records and the
4364objective of your study. Please consult the scientific literature for
4365advice. The following article is a good place to start.
4366
4367Guisan, A., and Zimmerman, N.E. 2000. Predictive habitat distribution
4368models in ecology. Ecological Modeling 135: 147-186."""),
4369    arcGISDisplayName=_(u'Percentage of evaluation records'))
4370
4371AddArgumentMetadata(ModelEvaluation.RandomlySplitArcGISTableIntoTrainingAndEvaluationRecords, u'field',
4372    typeMetadata=UnicodeStringTypeMetadata(),
4373    description=_(
4374u"""Field to be randomly assigned the value 0 for training records and
43751 for evaluation records.
4376
4377If the field already exists, it must have the SHORT, LONG, FLOAT, or
4378DOUBLE data type. If it does not exist, it will be created with the
4379SHORT data type. If you do not provide a field name, the name
4380IsEvalData will be used."""),
4381    arcGISDisplayName=_(u'Field to assign'))
4382
4383AddArgumentMetadata(ModelEvaluation.RandomlySplitArcGISTableIntoTrainingAndEvaluationRecords, u'where',
4384    typeMetadata=SQLWhereClauseTypeMetadata(canBeNone=True),
4385    description=ArcGIS91SelectCursor.__init__.__doc__.Obj.GetArgumentByName(u'where').Description,
4386    arcGISParameterDependencies=[u'table'],
4387    arcGISDisplayName=_(u'Where clause'))
4388
4389AddResultMetadata(ModelEvaluation.RandomlySplitArcGISTableIntoTrainingAndEvaluationRecords, u'updatedTable',
4390    typeMetadata=ArcGISTableViewTypeMetadata(),
4391    description=_(u'Updated table.'),
4392    arcGISDisplayName=_(u'Updated table'),
4393    arcGISParameterDependencies=[u'table'])
4394
4395###############################################################################
4396# Names exported by this module
4397###############################################################################
4398
4399__all__ = ['GLM', 'GAM', 'TreeModel', 'LinearMixedModel', 'ModelEvaluation']
Note: See TracBrowser for help on using the browser.