Changeset 1019
- Timestamp:
- 08/02/12 12:49:20 (10 months ago)
- Location:
- MGET/Branches/Jason/PythonPackage/src/GeoEco
- Files:
-
- 5 modified
-
FisheryAnalysis/Spatiotemporal.py (modified) (3 diffs)
-
SpatialAnalysis/Interpolation.py (modified) (2 diffs)
-
Statistics/Modeling.py (modified) (2 diffs)
-
Statistics/PredictModel.r (modified) (9 diffs)
-
__init__.py (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
-
MGET/Branches/Jason/PythonPackage/src/GeoEco/FisheryAnalysis/Spatiotemporal.py
r987 r1019 528 528 529 529 The spectral power is a measure of the strength of the contribution of 530 the wave 's contribute to the overall pattern. Important waves appear531 as peaks in the periodogram. When you find a peak, the value of the x 532 axis gives the period (in days) of the cycle. In marine ecological 533 data, patterns are often observed in relation to solar, lunar, or534 tidal cycles. Solar cycles are indicated by peaks at 365 days (an 535 annual cycle) and at 24 hours (a diurnal cycle). Lunar cycles are 536 indicated by a peak at 29.5 days. Tidal cycles are more complicated to 537 observe, and depend ontidal patterns in the region of interest.530 the wave to the overall pattern. Important waves appear as peaks in 531 the periodogram. When you find a peak, the value of the x axis gives 532 the period (in days) of the wave. In marine ecological data, patterns 533 are often observed in relation to solar, lunar, or tidal cycles. Solar 534 cycles are indicated by peaks at 365 days (an annual cycle) and at 24 535 hours (a diurnal cycle). Lunar cycles are indicated by a peak at 29.5 536 days. Tidal cycles are more complicated to observe, and depend on 537 tidal patterns in the region of interest. 538 538 539 539 To interpret the periodogram, look for high spikes at 365 days, 29.5 540 days, 1 day, with very low values elsewhere. If you see high spikes at541 one or more of those locations, a solar or lunar cycle is indicated. 542 But if those spikes are not very high relative to other periods--if 543 the periodogram displays a generally jittery or noisy pattern--then 544 solar and lunar cycles may not exist.540 days, or 1 day. If you see high spikes at one or more of those 541 locations with very low values everywhere else, a solar or lunar cycle 542 is indicated. But if those spikes are not very high relative to other 543 periods--if the periodogram displays a generally jittery or noisy 544 pattern--then solar and lunar cycles may not exist. 545 545 546 546 In order for Fourier analysis to detect such patterns, the data must … … 879 879 880 880 AddMethodMetadata(FisheryPeriodicity.AnalyzeCPUEInTable, 881 shortDescription=_(u'Creates several plots that analyze the temporal periodicity of catch per unit effort ( FPUE) calculated from fishing records in a Table.'),881 shortDescription=_(u'Creates several plots that analyze the temporal periodicity of catch per unit effort (CPUE) calculated from fishing records in a Table.'), 882 882 longDescription=_( 883 883 u"""The first plot is a bar plot showing the mean CPUE for a series of … … 976 976 977 977 AddMethodMetadata(FisheryPeriodicity.AnalyzeCPUEInArcGISTable, 978 shortDescription=_(u'Creates several plots that analyze the temporal periodicity of catch per unit effort ( FPUE) calculated from fishing records in a table.'),978 shortDescription=_(u'Creates several plots that analyze the temporal periodicity of catch per unit effort (CPUE) calculated from fishing records in a table.'), 979 979 longDescription=FisheryPeriodicity.AnalyzeCPUEInTable.__doc__.Obj.LongDescription, 980 980 isExposedToPythonCallers=True, -
MGET/Branches/Jason/PythonPackage/src/GeoEco/SpatialAnalysis/Interpolation.py
r977 r1019 288 288 coordinateFields[1] = zField 289 289 elif table.GeometryType != u'Point25D' and zValue is None: 290 raise ValueError(_(u'%(grid)s has a z dimension but %(table)s has %(gt)s geometry and a z-coordinate field was not specified. It must have Point25D geometry or you must specify a z-coordinate field or specify the value of the z coordinate.') % {u'g ': grids[0].DisplayName, u'table': table.DisplayName, u'gt': table.GeometryType})290 raise ValueError(_(u'%(grid)s has a z dimension but %(table)s has %(gt)s geometry and a z-coordinate field was not specified. It must have Point25D geometry or you must specify a z-coordinate field or specify the value of the z coordinate.') % {u'grid': grids[0].DisplayName, u'table': table.DisplayName, u'gt': table.GeometryType}) 291 291 292 292 if grids[0].Dimensions[0] == u't': … … 294 294 coordinateFields[0] = tField 295 295 elif tValue is None: 296 raise ValueError(_(u'%(grid)s has a t dimension but a t-coordinate field of %(table)s was not specified. You must specify a t-coordinate field or the value of the t coordinate.') % {u'g ': grids[0].DisplayName, u'table': table.DisplayName})296 raise ValueError(_(u'%(grid)s has a t dimension but a t-coordinate field of %(table)s was not specified. You must specify a t-coordinate field or the value of the t coordinate.') % {u'grid': grids[0].DisplayName, u'table': table.DisplayName}) 297 297 else: 298 298 tField = None -
MGET/Branches/Jason/PythonPackage/src/GeoEco/Statistics/Modeling.py
r1013 r1019 4251 4251 CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'constantPredictorNames', TreeModel.PredictFromArcGISRasters, u'constantPredictorNames') 4252 4252 CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'constantPredictorValues', TreeModel.PredictFromArcGISRasters, u'constantPredictorValues') 4253 4254 AddArgumentMetadata(TreeModel.PredictFromArcGISRasters, u'cutoff', 4255 typeMetadata=FloatTypeMetadata(minValue=0., maxValue=1., canBeNone=True), 4256 description=_( 4257 u"""Cutoff to use when classifying the continuous probability output 4258 by a binary classification model into a binary result (0 or 1). This 4259 parameter should not be specified for regression models or 4260 classification models that have more than two classes. 4261 4262 Probabilities greater than or equal to the cutoff will be classified 4263 as 1; probabilities less than the cutoff will be classified as 0. If a 4264 cutoff is not provided, the classification will be performed 4265 automatically by R."""), 4266 arcGISDisplayName=_(u'Binary classification cutoff')) # TODO: improve "automatically classified by R" 4267 4253 CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'cutoff', TreeModel.PredictFromArcGISRasters, u'cutoff') 4268 4254 CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'resamplingTechniques', TreeModel.PredictFromArcGISRasters, u'resamplingTechniques') 4269 4255 CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'ignoreOutOfRangeValues', TreeModel.PredictFromArcGISRasters, u'ignoreOutOfRangeValues') … … 5047 5033 CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'constantPredictorNames', RandomForestModel.PredictFromArcGISRasters, u'constantPredictorNames') 5048 5034 CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'constantPredictorValues', RandomForestModel.PredictFromArcGISRasters, u'constantPredictorValues') 5049 5050 AddArgumentMetadata(RandomForestModel.PredictFromArcGISRasters, u'cutoff', 5051 typeMetadata=FloatTypeMetadata(minValue=0., maxValue=1., canBeNone=True), 5052 description=_( 5053 u"""Cutoff to use when classifying the continuous probability output 5054 by a binary classification model into a binary result (0 or 1). This 5055 parameter should not be specified for regression models or 5056 classification models with more than two classes. 5057 5058 Probabilities greater than or equal to the cutoff will be classified 5059 as 1; probabilities less than the cutoff will be classified as 0. If a 5060 cutoff is not provided, 0.5 will be implicitly used, by virtue of the 5061 default behavior of random forests, in which the prediction is decided 5062 by majority vote between all of the trees in the forest."""), 5063 arcGISDisplayName=_(u'Binary classification cutoff')) 5064 5035 CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'cutoff', RandomForestModel.PredictFromArcGISRasters, u'cutoff') 5065 5036 CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'resamplingTechniques', RandomForestModel.PredictFromArcGISRasters, u'resamplingTechniques') 5066 5037 CopyArgumentMetadata(GLM.PredictFromArcGISRasters, u'ignoreOutOfRangeValues', RandomForestModel.PredictFromArcGISRasters, u'ignoreOutOfRangeValues') -
MGET/Branches/Jason/PythonPackage/src/GeoEco/Statistics/PredictModel.r
r1015 r1019 89 89 maxFittedPredictorValues[[predictor]] <- max(c(trainingData[[predictor]]), na.rm=TRUE) 90 90 } 91 92 isBinaryClassification <- minResponseValue == 0 && maxResponseValue == 1 && 93 (!is.null(rPackage) && rPackage == "rpart" && model$method == "class" || 94 !is.null(rPackage) && rPackage == "randomForest" && model$type == "classification" || 95 !is.null(rPackage) && rPackage == "party" && (model@responses@is_nominal[1] || model@responses@is_ordinal[1]) || 96 (!is.null(rPackage) && rPackage %in% c("mgcv", "gam") || is.null(rPackage)) && model$family$family %in% c("binomial", "quasibinomial", "negbin")) 97 98 isNonbinaryClassification <- !isBinaryClassification && 99 (!is.null(rPackage) && rPackage == "rpart" && model$method == "class" || 100 !is.null(rPackage) && rPackage == "randomForest" && model$type == "classification" || 101 !is.null(rPackage) && rPackage == "party" && (model@responses@is_nominal[1] || model@responses@is_ordinal[1])) 91 102 92 103 # Build a vector listing the categorical predictors. … … 124 135 allValuesAreNA <- TRUE 125 136 126 return(list(responseVariable, allPredictors, minResponseValue, maxResponseValue, minFittedPredictorValues, maxFittedPredictorValues, categoricalPredictors, categoricalPredictorLevels, warnedAboutMissingLevels, allValuesForPredictorAreNA, allValuesAreNA))137 return(list(responseVariable, allPredictors, minResponseValue, maxResponseValue, isBinaryClassification, isNonbinaryClassification, minFittedPredictorValues, maxFittedPredictorValues, categoricalPredictors, categoricalPredictorLevels, warnedAboutMissingLevels, allValuesForPredictorAreNA, allValuesAreNA)) 127 138 } 128 139 129 140 130 PredictModel <- function(model, rPackage, cutoff, ignoreOutOfRangeValues, returnPredictedErrors, allPredictors, responseVariable, minResponseValue, maxResponseValue, minFittedPredictorValues, maxFittedPredictorValues, categoricalPredictors, categoricalPredictorLevels, warnedAboutMissingLevels, allValuesForPredictorAreNA, allValuesAreNA, predictorValues , forceBinaryProbabilities=FALSE)141 PredictModel <- function(model, rPackage, cutoff, ignoreOutOfRangeValues, returnPredictedErrors, allPredictors, responseVariable, minResponseValue, maxResponseValue, minFittedPredictorValues, maxFittedPredictorValues, categoricalPredictors, categoricalPredictorLevels, warnedAboutMissingLevels, allValuesForPredictorAreNA, allValuesAreNA, predictorValues) 131 142 { 132 143 # Determine which records have NA for one or more predictors. … … 193 204 194 205 if (model$method == "class") 195 if (minResponseValue == 0 && maxResponseValue == 1 && (!is.null(cutoff) || forceBinaryProbabilities))206 if (minResponseValue == 0 && maxResponseValue == 1) 196 207 { 197 208 predictedResponse <- as.vector(suppressWarnings(predict(model, newdata=predictorValues, type="prob")[,2])) … … 221 232 # the second class and apply the cutoff. 222 233 223 if (rPackage == "randomForest" && model$type == "classification" && minResponseValue == 0 && maxResponseValue == 1 && (!is.null(cutoff) || forceBinaryProbabilities))234 if (rPackage == "randomForest" && model$type == "classification" && minResponseValue == 0 && maxResponseValue == 1) 224 235 { 225 236 predictedResponse <- as.vector(suppressWarnings(predict(model, newdata=predictorValues, type="prob")[,2])) … … 227 238 predictedResponse <- as.integer(predictedResponse >= cutoff) 228 239 } 229 else if (rPackage == "party" && (model@responses@is_nominal[1] || model@responses@is_ordinal[1]) && minResponseValue == 0 && maxResponseValue == 1 && !is.null(cutoff)) 230 predictedResponse <- as.integer(sapply(suppressWarnings(predict(model, newdata=predictorValues, type="prob")), "[", i = 2) >= cutoff) 240 else if (rPackage == "party" && (model@responses@is_nominal[1] || model@responses@is_ordinal[1]) && minResponseValue == 0 && maxResponseValue == 1) 241 { 242 predictedResponse <- sapply(suppressWarnings(predict(model, newdata=predictorValues, type="prob")), "[", i = 2) 243 if (!is.null(cutoff)) 244 predictedResponse <- as.integer(predictedResponse >= cutoff) 245 } 231 246 232 247 # For other models (regression forests or classification forests with … … 355 370 minResponseValue <- variables[[3]] 356 371 maxResponseValue <- variables[[4]] 357 minFittedPredictorValues <- variables[[5]] 358 maxFittedPredictorValues <- variables[[6]] 359 categoricalPredictors <- variables[[7]] 360 categoricalPredictorLevels <- variables[[8]] 361 warnedAboutMissingLevels <- variables[[9]] 362 allValuesForPredictorAreNA <- variables[[10]] 363 allValuesAreNA <- variables[[11]] 372 isBinaryClassification <- variables[[5]] 373 isNonbinaryClassification <- variables[[6]] 374 minFittedPredictorValues <- variables[[7]] 375 maxFittedPredictorValues <- variables[[8]] 376 categoricalPredictors <- variables[[9]] 377 categoricalPredictorLevels <- variables[[10]] 378 warnedAboutMissingLevels <- variables[[11]] 379 allValuesForPredictorAreNA <- variables[[12]] 380 allValuesAreNA <- variables[[13]] 364 381 365 382 # Verify that the caller provided all of the necessary predictors. … … 398 415 }, finally=close(rasterInfo)) 399 416 400 # Determine the data type and NoData value of the output rasters. For 401 # regression models, use 32-bit floating point as the data type and 402 # -3.4028235e+038 as the NoData value (the 32-bit IEEE-754 floating-point 403 # negative number that is farthest from 0; traditionally used by ArcGIS for 404 # NoData). For classification models, use the most compact integer data type 405 # and corresponding NoData value. 406 407 if (is.null(cutoff) && (is.null(rPackage) || !(rPackage == "rpart" && model$method == "class" || rPackage == "randomForest" && model$type == "classification" || rPackage == "party" && (model@responses@is_nominal[1] || model@responses@is_ordinal[1])))) 408 { 417 # Determine the data type and NoData value of the output rasters. 418 # If a cutoff is provided (indicating that the output should be a 419 # binary classification) or the model is a classification with 420 # more than two classes, use the most compact integer data type 421 # and corresponding NoData value. Otherwise (for regression models 422 # or classification models with two classes that but no cutoff was 423 # provided), use 32-bit floating point as the data type and 424 # -3.4028235e+038 as the NoData value (the 32-bit IEEE-754 425 # floating-point negative number that is farthest from 0; 426 # traditionally used by ArcGIS for NoData). 427 428 if (!is.null(cutoff) || isNonbinaryClassification) 429 { 430 if (minResponseValue > -128 && maxResponseValue <= 127) 431 { 432 dataType <- "signedint" 433 nbytes <- 1 434 noDataValue <- as.integer(-128) 435 } 436 else if (minResponseValue >= -128 && maxResponseValue < 127) 437 { 438 dataType <- "signedint" 439 nbytes <- 1 440 noDataValue <- as.integer(127) 441 } 442 else if (minResponseValue > 0 && maxResponseValue <= 255) 443 { 444 dataType <- "unsignedint" 445 nbytes <- 1 446 noDataValue <- as.integer(0) 447 } 448 else if (minResponseValue >= 0 && maxResponseValue < 255) 449 { 450 dataType <- "unsignedint" 451 nbytes <- 1 452 noDataValue <- as.integer(255) 453 } 454 else if (minResponseValue > -32768 && maxResponseValue <= 32767) 455 { 456 dataType <- "signedint" 457 nbytes <- 2 458 noDataValue <- as.integer(-32768) 459 } 460 else if (minResponseValue >= -32768 && maxResponseValue < 32767) 461 { 462 dataType <- "signedint" 463 nbytes <- 2 464 noDataValue <- as.integer(32767) 465 } 466 else if (minResponseValue > 0 && maxResponseValue <= 65535) 467 { 468 dataType <- "unsignedint" 469 nbytes <- 2 470 noDataValue <- as.integer(0) 471 } 472 else if (minResponseValue >= 0 && maxResponseValue < 65535) 473 { 474 dataType <- "unsignedint" 475 nbytes <- 2 476 noDataValue <- as.integer(65535) 477 } 478 else if (minResponseValue > -2147483648 && maxResponseValue <= 2147483647) 479 { 480 dataType <- "signedint" 481 nbytes <- 4 482 noDataValue <- as.integer(-2147483648) 483 } 484 else if (minResponseValue >= -2147483648 && maxResponseValue < 2147483647) 485 { 486 dataType <- "signedint" 487 nbytes <- 4 488 noDataValue <- as.integer(2147483647) 489 } 490 else if (minResponseValue > 0) 491 { 492 dataType <- "unsignedint" 493 nbytes <- 4 494 noDataValue <- as.integer(0) 495 } 496 else 497 { 498 dataType <- "unsignedint" 499 nbytes <- 4 500 noDataValue <- as.integer(4294967296) 501 } 502 } 503 else 504 { 505 if (isBinaryClassification && is.null(cutoff)) 506 warning("This appears to be a binary classification model but no cutoff value was provided. The output will be a continuous floating-point value ranging from 0 to 1. To obtain a binary integer value (0 or 1), please provide a cutoff and try again.", call.=FALSE) 507 409 508 dataType <- "float" 410 509 nbytes <- 4 411 510 noDataValue <- -3.4028235e+038 412 }413 else414 {415 if (minResponseValue > -128 && maxResponseValue <= 127)416 {417 dataType <- "signedint"418 nbytes <- 1419 noDataValue <- as.integer(-128)420 }421 else if (minResponseValue >= -128 && maxResponseValue < 127)422 {423 dataType <- "signedint"424 nbytes <- 1425 noDataValue <- as.integer(127)426 }427 else if (minResponseValue > 0 && maxResponseValue <= 255)428 {429 dataType <- "unsignedint"430 nbytes <- 1431 noDataValue <- as.integer(0)432 }433 else if (minResponseValue >= 0 && maxResponseValue < 255)434 {435 dataType <- "unsignedint"436 nbytes <- 1437 noDataValue <- as.integer(255)438 }439 else if (minResponseValue > -32768 && maxResponseValue <= 32767)440 {441 dataType <- "signedint"442 nbytes <- 2443 noDataValue <- as.integer(-32768)444 }445 else if (minResponseValue >= -32768 && maxResponseValue < 32767)446 {447 dataType <- "signedint"448 nbytes <- 2449 noDataValue <- as.integer(32767)450 }451 else if (minResponseValue > 0 && maxResponseValue <= 65535)452 {453 dataType <- "unsignedint"454 nbytes <- 2455 noDataValue <- as.integer(0)456 }457 else if (minResponseValue >= 0 && maxResponseValue < 65535)458 {459 dataType <- "unsignedint"460 nbytes <- 2461 noDataValue <- as.integer(65535)462 }463 else if (minResponseValue > -2147483648 && maxResponseValue <= 2147483647)464 {465 dataType <- "signedint"466 nbytes <- 4467 noDataValue <- as.integer(-2147483648)468 }469 else if (minResponseValue >= -2147483648 && maxResponseValue < 2147483647)470 {471 dataType <- "signedint"472 nbytes <- 4473 noDataValue <- as.integer(2147483647)474 }475 else if (minResponseValue > 0)476 {477 dataType <- "unsignedint"478 nbytes <- 4479 noDataValue <- as.integer(0)480 }481 else482 {483 dataType <- "unsignedint"484 nbytes <- 4485 noDataValue <- as.integer(4294967296)486 }487 511 } 488 512 … … 629 653 minResponseValue <- variables[[3]] 630 654 maxResponseValue <- variables[[4]] 631 minFittedPredictorValues <- variables[[5]] 632 maxFittedPredictorValues <- variables[[6]] 633 categoricalPredictors <- variables[[7]] 634 categoricalPredictorLevels <- variables[[8]] 635 warnedAboutMissingLevels <- variables[[9]] 636 allValuesForPredictorAreNA <- variables[[10]] 637 allValuesAreNA <- variables[[11]] 638 639 isBinaryClassification <- minResponseValue == 0 && maxResponseValue == 1 && 640 (!is.null(rPackage) && rPackage == "rpart" && model$method == "class" || 641 !is.null(rPackage) && rPackage == "randomForest" && model$type == "classification" || 642 !is.null(rPackage) && rPackage == "party" && (model@responses@is_nominal[1] || model@responses@is_ordinal[1]) || 643 (!is.null(rPackage) && rPackage %in% c("mgcv", "gam") || is.null(rPackage)) && model$family$family %in% c("binomial", "quasibinomial", "negbin")) 644 645 isNonbinaryClassification <- !isBinaryClassification && 646 (!is.null(rPackage) && rPackage == "rpart" && model$method == "class" || 647 !is.null(rPackage) && rPackage == "randomForest" && model$type == "classification" || 648 !is.null(rPackage) && rPackage == "party" && (model@responses@is_nominal[1] || model@responses@is_ordinal[1])) 655 isBinaryClassification <- variables[[5]] 656 isNonbinaryClassification <- variables[[6]] 657 minFittedPredictorValues <- variables[[7]] 658 maxFittedPredictorValues <- variables[[8]] 659 categoricalPredictors <- variables[[9]] 660 categoricalPredictorLevels <- variables[[10]] 661 warnedAboutMissingLevels <- variables[[11]] 662 allValuesForPredictorAreNA <- variables[[12]] 663 allValuesAreNA <- variables[[13]] 649 664 650 665 # Verify that the caller provided all of the necessary predictors. … … 661 676 # Do the prediction. For binary models, force the probability to be returned. 662 677 663 predictedResponse <- PredictModel(model, rPackage, NULL, ignoreOutOfRangeValues, FALSE, allPredictors, responseVariable, minResponseValue, maxResponseValue, minFittedPredictorValues, maxFittedPredictorValues, categoricalPredictors, categoricalPredictorLevels, warnedAboutMissingLevels, allValuesForPredictorAreNA, allValuesAreNA, newData , TRUE)[[1]]678 predictedResponse <- PredictModel(model, rPackage, NULL, ignoreOutOfRangeValues, FALSE, allPredictors, responseVariable, minResponseValue, maxResponseValue, minFittedPredictorValues, maxFittedPredictorValues, categoricalPredictors, categoricalPredictorLevels, warnedAboutMissingLevels, allValuesForPredictorAreNA, allValuesAreNA, newData)[[1]] 664 679 665 680 # If the data include the response variable, compute summary statistics. -
MGET/Branches/Jason/PythonPackage/src/GeoEco/__init__.py
r992 r1019 1 __version__ = u'0.8a4 2'1 __version__ = u'0.8a43'
