| 1 | # Datasets/Collections/__init__.py - Base classes that define the
|
|---|
| 2 | # interfaces used to access collections of datasets.
|
|---|
| 3 | #
|
|---|
| 4 | # Copyright (C) 2010 Jason J. Roberts
|
|---|
| 5 | #
|
|---|
| 6 | # This program is free software; you can redistribute it and/or
|
|---|
| 7 | # modify it under the terms of the GNU General Public License
|
|---|
| 8 | # as published by the Free Software Foundation; either version 2
|
|---|
| 9 | # of the License, or (at your option) any later version.
|
|---|
| 10 | #
|
|---|
| 11 | # This program is distributed in the hope that it will be useful,
|
|---|
| 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|---|
| 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|---|
| 14 | # GNU General Public License (available in the file LICENSE.TXT)
|
|---|
| 15 | # for more details.
|
|---|
| 16 | #
|
|---|
| 17 | # You should have received a copy of the GNU General Public License
|
|---|
| 18 | # along with this program; if not, write to the Free Software
|
|---|
| 19 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|---|
| 20 |
|
|---|
| 21 | import datetime
|
|---|
| 22 | import os
|
|---|
| 23 | import re
|
|---|
| 24 | import shutil
|
|---|
| 25 | import types
|
|---|
| 26 |
|
|---|
| 27 | from GeoEco.Datasets import Dataset, DatasetCollection
|
|---|
| 28 | from GeoEco.DynamicDocString import DynamicDocString
|
|---|
| 29 | from GeoEco.Internationalization import _
|
|---|
| 30 | from GeoEco.Logging import ProgressReporter
|
|---|
| 31 | from GeoEco.Types import *
|
|---|
| 32 |
|
|---|
| 33 |
|
|---|
| 34 | class DatasetCollectionTree(DatasetCollection):
|
|---|
| 35 | __doc__ = DynamicDocString()
|
|---|
| 36 |
|
|---|
| 37 | def _GetPathParsingExpressions(self):
|
|---|
| 38 | return self._PathParsingExpressions
|
|---|
| 39 |
|
|---|
| 40 | PathParsingExpressions = property(_GetPathParsingExpressions, doc=DynamicDocString())
|
|---|
| 41 |
|
|---|
| 42 | def _GetPathCreationExpressions(self):
|
|---|
| 43 | return self._PathCreationExpressions
|
|---|
| 44 |
|
|---|
| 45 | PathCreationExpressions = property(_GetPathCreationExpressions, doc=DynamicDocString())
|
|---|
| 46 |
|
|---|
| 47 | def __init__(self, pathParsingExpressions=None, pathCreationExpressions=None, canSortByDate=True, parentCollection=None, queryableAttributes=None, queryableAttributeValues=None, lazyPropertyValues=None, cacheDirectory=None):
|
|---|
| 48 | # TODO: Validation
|
|---|
| 49 |
|
|---|
| 50 | # Initialize the base class.
|
|---|
| 51 |
|
|---|
| 52 | super(DatasetCollectionTree, self).__init__(parentCollection, queryableAttributes, queryableAttributeValues, lazyPropertyValues, cacheDirectory)
|
|---|
| 53 |
|
|---|
| 54 | # Validate that either the pathParsingExpressions or
|
|---|
| 55 | # pathCreationExpressions or both were specified.
|
|---|
| 56 |
|
|---|
| 57 | if pathParsingExpressions is None and pathCreationExpressions is None:
|
|---|
| 58 | raise ValueError(_(u'pathParsingExpressions and pathCreationExpressions are both None. At least one of them must be specified.'))
|
|---|
| 59 |
|
|---|
| 60 | # Search the queryable attributes for one with the data type
|
|---|
| 61 | # DateTimeTypeMetadata. If we find one, it will require
|
|---|
| 62 | # special processing.
|
|---|
| 63 |
|
|---|
| 64 | attrs = self.GetQueryableAttributesWithDataType(DateTimeTypeMetadata)
|
|---|
| 65 | if len(attrs) > 1: # Should never happen; CollectibleObject.__init__ prevents it
|
|---|
| 66 | raise ValueError(_(u'This dataset collection has multiple queryable attributes defined with the data type DateTimeTypeMetadata. In order to retrieve the oldest dataset, only one queryable attribute of that type must be defined.'))
|
|---|
| 67 | if len(attrs) == 1:
|
|---|
| 68 | dateTimeAttr = attrs[0]
|
|---|
| 69 | else:
|
|---|
| 70 | dateTimeAttr = None
|
|---|
| 71 |
|
|---|
| 72 | # If the caller provided path parsing expressions, map them to
|
|---|
| 73 | # the queryable attributes. Later, when executing a query,
|
|---|
| 74 | # we'll descend down the path parsing expressions as we drill
|
|---|
| 75 | # into the hierarchy. As we descend each level, we need to
|
|---|
| 76 | # know which queryable attributes have been determined by the
|
|---|
| 77 | # levels we've descended.
|
|---|
| 78 |
|
|---|
| 79 | adjustedPPE = None
|
|---|
| 80 |
|
|---|
| 81 | if pathParsingExpressions is not None:
|
|---|
| 82 | attrsForExpr = [None] * len(pathParsingExpressions)
|
|---|
| 83 | dateTimeComponents = []
|
|---|
| 84 | adjustedPPE = []
|
|---|
| 85 |
|
|---|
| 86 | for i in range(len(pathParsingExpressions)):
|
|---|
| 87 | if pathParsingExpressions[i].endswith('$'):
|
|---|
| 88 | adjustedPPE.append(pathParsingExpressions[i])
|
|---|
| 89 | else:
|
|---|
| 90 | adjustedPPE.append(pathParsingExpressions[i] + '$')
|
|---|
| 91 |
|
|---|
| 92 | ppeNames = re.findall(r'\(\?P<(\w+)>[^\)]+\)', adjustedPPE[i], re.IGNORECASE)
|
|---|
| 93 | attrsForExpr[i] = []
|
|---|
| 94 |
|
|---|
| 95 | for j in range(len(ppeNames)):
|
|---|
| 96 | if ppeNames[j] not in ['Year', 'Month', 'Day', 'DayOfYear', 'Hour', 'Minute', 'Second']:
|
|---|
| 97 | qa = self.GetQueryableAttribute(ppeNames[j])
|
|---|
| 98 | if qa is None:
|
|---|
| 99 | raise ValueError(_(u'pathParsingExpressions[%(i)i] includes a group named "%(group)s" but there is no queryable attribute with that name defined for this collection or its parents. For each named group in pathParsingExpressions, a there must be a queryable attribute defined for it.') % {u'i': i, u'group': ppeNames[j]})
|
|---|
| 100 | attrsForExpr[i].append([ppeNames[j], qa])
|
|---|
| 101 | else:
|
|---|
| 102 | if dateTimeAttr is None:
|
|---|
| 103 | raise ValueError(_(u'pathParsingExpressions[%(i)i] includes the date/time component "%(group)s" but there is no queryable attribute defined for this dataset collection or its parents with the data type DateTimeTypeMetadata. In order to parse date/time components, a DateTimeTypeMetadata queryable attribute must be defined for the collection.') % {u'i': i, u'group': ppeNames[j]})
|
|---|
| 104 | attrsForExpr[i].append([ppeNames[j], dateTimeAttr])
|
|---|
| 105 | if ppeNames[j] not in dateTimeComponents:
|
|---|
| 106 | dateTimeComponents.append(ppeNames[j])
|
|---|
| 107 |
|
|---|
| 108 | # If the caller provided path creation expressions but not
|
|---|
| 109 | # path parsing expressions, map the path creation expressions
|
|---|
| 110 | # to the queryable attributes. Later, when importing datasets,
|
|---|
| 111 | # we'll use this mapping to create the destination path for
|
|---|
| 112 | # each dataset we import.
|
|---|
| 113 | #
|
|---|
| 114 | # If the caller specified both path creation expressions and
|
|---|
| 115 | # path parsing expressions, just validate that they are
|
|---|
| 116 | # compatible with each other.
|
|---|
| 117 |
|
|---|
| 118 | if pathCreationExpressions is not None:
|
|---|
| 119 | if pathParsingExpressions is None:
|
|---|
| 120 | attrsForExpr = [None] * len(pathCreationExpressions)
|
|---|
| 121 | dateTimeComponents = []
|
|---|
| 122 | dateTimeCompForPCEName = {'y': 'Year', 'Y': 'Year', 'b': 'Month', 'B': 'Month', 'm': 'Month', 'd': 'Day', 'j': 'DayOfYear', 'H': 'Hour', 'M': 'Minute', 'S': 'Second'}
|
|---|
| 123 | else:
|
|---|
| 124 | validPCENamesForPPEName = {'Year': 'yY', 'Month': 'bBm', 'Day': 'd', 'DayOfYear': 'j', 'Hour': 'H', 'Minute': 'M', 'Second': 'S'}
|
|---|
| 125 |
|
|---|
| 126 | for i in range(len(pathCreationExpressions)):
|
|---|
| 127 | pceNames = re.findall(r'%%([yYbBmdjHMS])|%\((\w+)\)|%{1,2}[^%]', pathCreationExpressions[i], re.IGNORECASE)
|
|---|
| 128 |
|
|---|
| 129 | if pathParsingExpressions is None:
|
|---|
| 130 | attrsForExpr[i] = []
|
|---|
| 131 | elif len(pceNames) != len(attrsForExpr[i]):
|
|---|
| 132 | raise ValueError(_(u'pathParsingExpressions[%(i)i] contains %(g1)i named groups but pathCreationExpressions[%(i)i] contains %(g2)i substitution groups. The number of named groups in pathParsingExpressions[%(i)i] must equal the number of substitution groups in pathCreationExpressions[%(i)i].') % {u'i': i, u'g1': len(attrsForExpr[i]), u'g2': len(pceNames)})
|
|---|
| 133 |
|
|---|
| 134 | for j in range(len(pceNames)):
|
|---|
| 135 | if pceNames[j][0] == '' and pceNames[j][1] == '': # Ensure that all substitution groups are named groups
|
|---|
| 136 | raise ValueError(_(u'Substitution group %(j)i (0 is the first group) of pathParsingExpressions[%(i)i] is invalid. It must be a named group or an allowed date/time formatter.') % {u'i': i, u'j': j, u'i': i})
|
|---|
| 137 |
|
|---|
| 138 | if pathParsingExpressions is None:
|
|---|
| 139 | if pceNames[j][1] != '':
|
|---|
| 140 | qa = self.GetQueryableAttribute(pceNames[j][1])
|
|---|
| 141 | if qa is None:
|
|---|
| 142 | raise ValueError(_(u'pathCreationExpressions[%(i)i] includes a group named "%(group)s" but there is no queryable attribute with that name defined for this collection or its parents. For each named group in pathCreationExpressions, there must be a queryable attribute defined for it.') % {u'i': i, u'group': pceNames[j][1]})
|
|---|
| 143 | attrsForExpr[i].append([pceNames[j][1], qa])
|
|---|
| 144 | else:
|
|---|
| 145 | if dateTimeAttr is None:
|
|---|
| 146 | raise ValueError(_(u'pathCreationExpressions[%(i)i] includes the date/time component "%(group)s" but there is no queryable attribute defined for this dataset collection or its parents with the data type DateTimeTypeMetadata. If the path creation expressions include date/time components, a DateTimeTypeMetadata queryable attribute must be defined for the collection.') % {u'i': i, u'group': pceNames[j][0]})
|
|---|
| 147 | dateTimeComp = dateTimeCompForPCEName[pceNames[j][0]]
|
|---|
| 148 | attrsForExpr[i].append([dateTimeComp, dateTimeAttr])
|
|---|
| 149 | if dateTimeComp not in dateTimeComponents:
|
|---|
| 150 | dateTimeComponents.append(dateTimeComp)
|
|---|
| 151 | else:
|
|---|
| 152 | if attrsForExpr[i][j][0] not in ['Year', 'Month', 'Day', 'DayOfYear', 'Hour', 'Minute', 'Second']:
|
|---|
| 153 | if pceNames[j][1] == '':
|
|---|
| 154 | raise ValueError(_(u'pathParsingExpressions[%(i)i] includes a group named "%(group1)s" but the corresponding substitution group in pathCreationExpressions[%(i)i] is the date/time formatter %%%(group2)s. Because "%(group1)s" is not a date/time component, the corresponding substitution group in the corresponding path creation expression cannot be a date/time formatter.') % {u'i': i, u'group1': attrsForExpr[i][j][0], u'group2': pceNames[j][0]})
|
|---|
| 155 | if pceNames[j][1] != attrsForExpr[i][j][0]:
|
|---|
| 156 | raise ValueError(_(u'pathParsingExpressions[%(i)i] includes a group named "%(group1)s" but the corresponding substitution group in pathCreationExpressions[%(i)i] is named "%(group2)s". The two lists of expressions must reference the same queryable attributes in the same order.') % {u'i': i, u'group1': attrsForExpr[i][j][0], u'group2': pceNames[j][1]})
|
|---|
| 157 | else:
|
|---|
| 158 | if pceNames[j][0] == '':
|
|---|
| 159 | raise ValueError(_(u'pathParsingExpressions[%(i)i] includes the date/time component "%(group1)s" but the corresponding substitution group in pathCreationExpressions[%(i)i] is not a date/time formatter (it is a group named "%(group2)s"). Because "%(group1)s" is a date/time component, the corresponding substitution group in the corresponding path creation expression must be a date/time formatter.') % {u'i': i, u'group1': attrsForExpr[i][j][0], u'group2': pceNames[j][1]})
|
|---|
| 160 | if pceNames[j][0] not in validPCENamesForPPEName[attrsForExpr[i][j][0]]:
|
|---|
| 161 | raise ValueError(_(u'pathParsingExpressions[%(i)i] includes the date/time component "%(group1)s" but the corresponding substitution group in pathCreationExpressions[%(i)i] is the wrong date/time formatter (%%%(group2)s). The allowed date/time formatters for "%(group1)s" are: %(allowed)s') % {u'i': i, u'group1': attrsForExpr[i][j][0], u'group2': pceNames[j][0], u'allowed': u', '.join(['%%' + c for c in validPCENamesForPPEName[attrsForExpr[i][j][0]]])})
|
|---|
| 162 |
|
|---|
| 163 | # If the queryable attributes contains an attribute with the
|
|---|
| 164 | # data type DateTimeTypeMetadata, perform additional
|
|---|
| 165 | # validation.
|
|---|
| 166 |
|
|---|
| 167 | if dateTimeAttr is not None:
|
|---|
| 168 | if pathParsingExpressions is not None:
|
|---|
| 169 | param = u'pathParsingExpressions'
|
|---|
| 170 | else:
|
|---|
| 171 | param = u'pathCreationExpressions'
|
|---|
| 172 |
|
|---|
| 173 | # Validate that the path expressions contain, at minimum,
|
|---|
| 174 | # the Year.
|
|---|
| 175 |
|
|---|
| 176 | if 'Year' not in dateTimeComponents:
|
|---|
| 177 | raise TypeError(_(u'This dataset collection includes a queryable attribute with the data type DateTimeTypeMetadata but %(param)s does not include a path component for the year.') % {u'param': param})
|
|---|
| 178 |
|
|---|
| 179 | # Validate that Month and DayOfYear are not both
|
|---|
| 180 | # specified, and that Day and DayOfYear are not both
|
|---|
| 181 | # specified.
|
|---|
| 182 |
|
|---|
| 183 | if 'Month' in dateTimeComponents and 'DayOfYear' in dateTimeComponents:
|
|---|
| 184 | raise TypeError(_(u'%(param)s includes path components for both the month and the day of the year. This is not allowed. Please choose one or the other.') % {u'param': param})
|
|---|
| 185 |
|
|---|
| 186 | if 'Day' in dateTimeComponents and 'DayOfYear' in dateTimeComponents:
|
|---|
| 187 | raise TypeError(_(u'%(param)s includes path components for both the day of the month and the day of the year. This is not allowed. Please choose one or the other.') % {u'param': param})
|
|---|
| 188 |
|
|---|
| 189 | # Validate that if Day is specified, Month is also
|
|---|
| 190 | # specified.
|
|---|
| 191 |
|
|---|
| 192 | if 'Day' in dateTimeComponents and 'Month' not in dateTimeComponents:
|
|---|
| 193 | raise TypeError(_(u'%(param)s includes a path component for the day of the month but not the month. This is not allowed. Please add an expression for the month in addition to the day of the month.') % {u'param': param})
|
|---|
| 194 |
|
|---|
| 195 | # Initialize our properties.
|
|---|
| 196 |
|
|---|
| 197 | self._PathParsingExpressions = adjustedPPE
|
|---|
| 198 | self._PathCreationExpressions = pathCreationExpressions
|
|---|
| 199 | self._AttrsForExpr = attrsForExpr
|
|---|
| 200 | self._DateTimeComponents = dateTimeComponents
|
|---|
| 201 | self._CanSortByDate = canSortByDate
|
|---|
| 202 |
|
|---|
| 203 | def _QueryDatasets(self, parsedExpression, progressReporter, options, parentAttrValues):
|
|---|
| 204 | if self.PathParsingExpressions is None:
|
|---|
| 205 | raise RuntimeError(_(u'Cannot query %(dn)s for datasets because the DatasetCollectionTree representing it was not instantiated with path parsing expressions.') % {u'dn': self.DisplayName})
|
|---|
| 206 | return self._QueryRecursive(parsedExpression, progressReporter, options, [], 0, parentAttrValues, {}, 'normal')
|
|---|
| 207 |
|
|---|
| 208 | def _QueryRecursive(self, parsedExpression, progressReporter, options, pathComponents, depth, parentAttrValues, parsedAttrValues, queryType='normal'):
|
|---|
| 209 |
|
|---|
| 210 | # Extract key/value pairs from options that are intended for
|
|---|
| 211 | # us, so we do not pass them through to _ConstructFoundObject.
|
|---|
| 212 |
|
|---|
| 213 | if options is not None and 'closeDatasets' in options:
|
|---|
| 214 | closeDatasets = bool(options['closeDatasets'])
|
|---|
| 215 | constructOptions = {}
|
|---|
| 216 | constructOptions.update(options)
|
|---|
| 217 | del constructOptions['closeDatasets']
|
|---|
| 218 | else:
|
|---|
| 219 | closeDatasets = False
|
|---|
| 220 | constructOptions = options
|
|---|
| 221 |
|
|---|
| 222 | # Enumerate the contents of the path.
|
|---|
| 223 |
|
|---|
| 224 | contents = self._ListContents(pathComponents)
|
|---|
| 225 |
|
|---|
| 226 | # If this is an 'oldest' or 'newest' query (rather than just a
|
|---|
| 227 | # 'normal' query), our objective is to return just one
|
|---|
| 228 | # dataset: the oldest or newest. To make this efficient, when
|
|---|
| 229 | # the path component at this level contains an element of the
|
|---|
| 230 | # date or time, sort the contents of the path in ascending or
|
|---|
| 231 | # descending order. Later, as we're iterating through the
|
|---|
| 232 | # contents of the path, we'll stop once we have found a single
|
|---|
| 233 | # dataset.
|
|---|
| 234 |
|
|---|
| 235 | if queryType != 'normal':
|
|---|
| 236 | sortByDate = False
|
|---|
| 237 | for [name, attr] in self._AttrsForExpr[depth]:
|
|---|
| 238 | if isinstance(attr.DataType, DateTimeTypeMetadata):
|
|---|
| 239 | sortByDate = True
|
|---|
| 240 | break
|
|---|
| 241 |
|
|---|
| 242 | if sortByDate:
|
|---|
| 243 | if queryType == 'oldest':
|
|---|
| 244 | contents.sort()
|
|---|
| 245 | else:
|
|---|
| 246 | contents.sort(reverse=True)
|
|---|
| 247 |
|
|---|
| 248 | # Iterate through the contents of the path, testing each item
|
|---|
| 249 | # against the query expression.
|
|---|
| 250 |
|
|---|
| 251 | datasetsFound = []
|
|---|
| 252 |
|
|---|
| 253 | for component in contents:
|
|---|
| 254 |
|
|---|
| 255 | # Parse the queryable attribute values derived from this
|
|---|
| 256 | # path component. If the component does not match the
|
|---|
| 257 | # regular expression for this level in the hierarchy, go
|
|---|
| 258 | # on to the next one.
|
|---|
| 259 |
|
|---|
| 260 | match = re.match(self._PathParsingExpressions[depth], component, re.IGNORECASE)
|
|---|
| 261 | if match is None:
|
|---|
| 262 |
|
|---|
| 263 | # The following message generates too much output for
|
|---|
| 264 | # certain collections. Uncomment it only when
|
|---|
| 265 | # necessary.
|
|---|
| 266 |
|
|---|
| 267 | #self._LogDebug(_(u'%(class)s 0x%(id)08X: Skipping path component "%(comp)s"; it does not match regular expression "%(re)s".'), {u'class': self.__class__.__name__, u'id': id(self), u'comp': component, u're': self._PathParsingExpressions[depth]})
|
|---|
| 268 |
|
|---|
| 269 | continue
|
|---|
| 270 |
|
|---|
| 271 | componentAttrValues = {}
|
|---|
| 272 | for [name, attr] in self._AttrsForExpr[depth]:
|
|---|
| 273 | value = match.group(name)
|
|---|
| 274 | try:
|
|---|
| 275 | if isinstance(attr.DataType, UnicodeStringTypeMetadata):
|
|---|
| 276 | componentAttrValues[attr.Name] = self._Unicode(value)
|
|---|
| 277 | elif isinstance(attr.DataType, IntegerTypeMetadata):
|
|---|
| 278 | componentAttrValues[attr.Name] = int(value)
|
|---|
| 279 | elif isinstance(attr.DataType, FloatTypeMetadata):
|
|---|
| 280 | componentAttrValues[attr.Name] = float(value)
|
|---|
| 281 | else:
|
|---|
| 282 | componentAttrValues[name] = int(value) # Add datetime components as integers
|
|---|
| 283 | except Exception, e:
|
|---|
| 284 | self._LogDebug(_(u'%(class)s 0x%(id)08X: Skipping path component "%(comp)s"; failed to parse queryable attribute %(attr)s from the string %(s)s due to %(e)s: %(msg)s.'), {u'class': self.__class__.__name__, u'id': id(self), u'comp': component, u'attr': attr.Name, u's': repr(value), u'e': e.__class__.__name__, u'msg': self._Unicode(e)})
|
|---|
| 285 | componentAttrValues = None
|
|---|
| 286 | break
|
|---|
| 287 |
|
|---|
| 288 | if componentAttrValues is None:
|
|---|
| 289 | continue
|
|---|
| 290 |
|
|---|
| 291 | componentAttrValues.update(parsedAttrValues)
|
|---|
| 292 |
|
|---|
| 293 | # If the path parse expressions parse a datetime and we
|
|---|
| 294 | # have parsed all of the components specified in the
|
|---|
| 295 | # expressions, build a datetime value and add it to the
|
|---|
| 296 | # attribute values.
|
|---|
| 297 |
|
|---|
| 298 | if len(self._DateTimeComponents) > 0 and 'DateTime' not in componentAttrValues:
|
|---|
| 299 | foundAll = True
|
|---|
| 300 | for dtComp in self._DateTimeComponents:
|
|---|
| 301 | if dtComp not in componentAttrValues:
|
|---|
| 302 | foundAll = False
|
|---|
| 303 | break
|
|---|
| 304 |
|
|---|
| 305 | if foundAll:
|
|---|
| 306 | year = componentAttrValues['Year']
|
|---|
| 307 | month = 1
|
|---|
| 308 | day = 1
|
|---|
| 309 | hour = 0
|
|---|
| 310 | minute = 0
|
|---|
| 311 | second = 0
|
|---|
| 312 |
|
|---|
| 313 | if 'Month' in componentAttrValues:
|
|---|
| 314 | month = componentAttrValues['Month']
|
|---|
| 315 |
|
|---|
| 316 | if 'Day' in componentAttrValues:
|
|---|
| 317 | day = componentAttrValues['Day']
|
|---|
| 318 |
|
|---|
| 319 | if 'Hour' in componentAttrValues:
|
|---|
| 320 | hour = componentAttrValues['Hour']
|
|---|
| 321 |
|
|---|
| 322 | if 'Minute' in componentAttrValues:
|
|---|
| 323 | minute = componentAttrValues['Minute']
|
|---|
| 324 |
|
|---|
| 325 | if 'Second' in componentAttrValues:
|
|---|
| 326 | second = componentAttrValues['Second']
|
|---|
| 327 |
|
|---|
| 328 | try:
|
|---|
| 329 | dt = datetime.datetime(year, month, day, hour, minute, second)
|
|---|
| 330 | except Exception, e:
|
|---|
| 331 | self._LogDebug(_(u'%(class)s 0x%(id)08X: Skipping path component "%(comp)s"; failed to construct a datetime instance from values [%(year)s, %(month)s, %(day)s, %(hour)s, %(minute)s, %(second)s] due to %(e)s: %(msg)s.'), {u'class': self.__class__.__name__, u'id': id(self), u'comp': component, u'year': repr(year), u'month': repr(month), u'day': repr(day), u'hour': repr(hour), u'minute': repr(minute), u'second': repr(second), u'e': e.__class__.__name__, u'msg': self._Unicode(e)})
|
|---|
| 332 | continue
|
|---|
| 333 |
|
|---|
| 334 | if 'DayOfYear' in componentAttrValues:
|
|---|
| 335 | try:
|
|---|
| 336 | dt += datetime.timedelta(days=componentAttrValues['DayOfYear'] - 1)
|
|---|
| 337 | except Exception, e:
|
|---|
| 338 | self._LogDebug(_(u'%(class)s 0x%(id)08X: Skipping path component "%(comp)s"; failed to add datetime.timedelta(days=%(dayOfYear)s - 1) to the datetime %(dt)s.'), {u'class': self.__class__.__name__, u'id': id(self), u'comp': component, u'dayOfYear': repr(componentAttrValues['DayOfYear']), u'dt': repr(dt)})
|
|---|
| 339 | continue
|
|---|
| 340 |
|
|---|
| 341 | offsetInDays = self.GetLazyPropertyValue('TOffsetFromParsedTime')
|
|---|
| 342 | if offsetInDays is not None:
|
|---|
| 343 | dt += datetime.timedelta(offsetInDays)
|
|---|
| 344 |
|
|---|
| 345 | componentAttrValues['DateTime'] = dt
|
|---|
| 346 | componentAttrValues['Year'] = dt.year
|
|---|
| 347 | componentAttrValues['Month'] = dt.month
|
|---|
| 348 | componentAttrValues['Day'] = dt.day
|
|---|
| 349 | componentAttrValues['Hour'] = dt.hour
|
|---|
| 350 | componentAttrValues['Minute'] = dt.minute
|
|---|
| 351 | componentAttrValues['Second'] = dt.second
|
|---|
| 352 | componentAttrValues['DayOfYear'] = int(dt.strftime('%j'))
|
|---|
| 353 |
|
|---|
| 354 | # If there are any queryable attributes defined that are
|
|---|
| 355 | # derived from the values we have parsed so far, derive
|
|---|
| 356 | # their values now.
|
|---|
| 357 |
|
|---|
| 358 | allAttrValues = {}
|
|---|
| 359 | allAttrValues.update(parentAttrValues)
|
|---|
| 360 | allAttrValues.update(componentAttrValues)
|
|---|
| 361 |
|
|---|
| 362 | obj = self
|
|---|
| 363 | while obj is not None:
|
|---|
| 364 | if obj._QueryableAttributes is not None:
|
|---|
| 365 | for attr in obj._QueryableAttributes:
|
|---|
| 366 | if attr.DerivedFromAttr is not None and attr.Name not in allAttrValues and attr.DerivedFromAttr in allAttrValues:
|
|---|
| 367 | if attr.DerivedValueMap is not None:
|
|---|
| 368 | if allAttrValues[attr.DerivedFromAttr] in attr.DerivedValueMap:
|
|---|
| 369 | allAttrValues[attr.Name] = attr.DerivedValueMap[allAttrValues[attr.DerivedFromAttr]]
|
|---|
| 370 | else:
|
|---|
| 371 | value = attr.DerivedValueFunc(allAttrValues, allAttrValues[attr.DerivedFromAttr])
|
|---|
| 372 | if value is not None:
|
|---|
| 373 | allAttrValues[attr.Name] = value
|
|---|
| 374 | obj = obj.ParentCollection
|
|---|
| 375 |
|
|---|
| 376 | # Test whether this path component matches the query
|
|---|
| 377 | # expression. This will return True or False, or None,
|
|---|
| 378 | # indicating that the result of the query expression is
|
|---|
| 379 | # indeterminate because it depends on the values of
|
|---|
| 380 | # queryable attributes that have not been obtained yet.
|
|---|
| 381 |
|
|---|
| 382 | if parsedExpression is not None:
|
|---|
| 383 | try:
|
|---|
| 384 | result = parsedExpression.eval(allAttrValues)
|
|---|
| 385 |
|
|---|
| 386 | # If the result was False, indicating that there
|
|---|
| 387 | # are enough queryable attributes whose values are
|
|---|
| 388 | # known to conclude that the path component does
|
|---|
| 389 | # not match query expression, and the path parse
|
|---|
| 390 | # expressions parse some date/time parts, and we
|
|---|
| 391 | # have not parsed them all yet, and we have a lazy
|
|---|
| 392 | # property named TOffsetFromParsedTime, it means
|
|---|
| 393 | # that the date/time values we have parsed so far
|
|---|
| 394 | # might not represent the ultimate values when
|
|---|
| 395 | # parsing is complete.
|
|---|
| 396 | #
|
|---|
| 397 | # For example, consider the case of MODIS
|
|---|
| 398 | # nighttime monthly SST files. In the directory
|
|---|
| 399 | # structure, files are grouped in subdirectories
|
|---|
| 400 | # by year. But the time range of a given file is
|
|---|
| 401 | # 12:00:00 on the last day of the previous month
|
|---|
| 402 | # to 12:00:00 of the last day of the month of the
|
|---|
| 403 | # file. Therefore, files for the month of January
|
|---|
| 404 | # actually begin in the previous year, e.g. the
|
|---|
| 405 | # January 2004 file actually runs from 31-Dec-2003
|
|---|
| 406 | # 12:00:00 to 31-Jan-2004 12:00:00. This is
|
|---|
| 407 | # problematic. A query for "Year = 2003" should
|
|---|
| 408 | # return this file (and not the January 2003
|
|---|
| 409 | # file). We should descend into the 2004 directory
|
|---|
| 410 | # in order to obtain the file that starts in 2003,
|
|---|
| 411 | # even though the 2004 directory will resulting in
|
|---|
| 412 | # us parsing Year as 2004 and therefore not
|
|---|
| 413 | # matching the "Year = 2003" expression.
|
|---|
| 414 | #
|
|---|
| 415 | # To work around this, evaluate the expression
|
|---|
| 416 | # again, adjusting the date/time parts we've
|
|---|
| 417 | # parsed so far by the TOffsetFromParsedTime, to
|
|---|
| 418 | # see if the result changes from False to None. If
|
|---|
| 419 | # so, it means we should descend into this part of
|
|---|
| 420 | # the tree because it is theoretically possible
|
|---|
| 421 | # that the items within it could end up matching.
|
|---|
| 422 | #
|
|---|
| 423 | # This is very complicated, but at this time I do
|
|---|
| 424 | # not know of a better way to handle datasets such
|
|---|
| 425 | # as MODIS that do not include the time values in
|
|---|
| 426 | # their file names.
|
|---|
| 427 |
|
|---|
| 428 | if result == False and len(self._DateTimeComponents) > 0 and 'DateTime' not in allAttrValues and self.GetLazyPropertyValue('TOffsetFromParsedTime'):
|
|---|
| 429 | if 'Year' in allAttrValues:
|
|---|
| 430 | attrValuesToTry = {}
|
|---|
| 431 | attrValuesToTry.update(allAttrValues)
|
|---|
| 432 | tryPreviousYear = True
|
|---|
| 433 |
|
|---|
| 434 | if 'DayOfYear' in attrValuesToTry:
|
|---|
| 435 | attrValuesToTry['DayOfYear'] = int((datetime.datetime(attrValuesToTry['Year'], 1, 1) + datetime.timedelta(days=attrValuesToTry['DayOfYear'] - 2)).strftime('%j'))
|
|---|
| 436 | result = parsedExpression.eval(attrValuesToTry)
|
|---|
| 437 | tryPreviousYear = result == False and attrValuesToTry['DayOfYear'] > allAttrValues['DayOfYear']
|
|---|
| 438 |
|
|---|
| 439 | elif 'Month' in attrValuesToTry:
|
|---|
| 440 | tryPreviousMonth = True
|
|---|
| 441 |
|
|---|
| 442 | if 'Day' in attrValuesToTry: # It should be rare that Year, Month, Day has been parsed by DateTime has not been; it implies that Hour still must be parsed
|
|---|
| 443 | attrValuesToTry['Day'] = (datetime.datetime(attrValuesToTry['Year'], attrValuesToTry['Month'], attrValuesToTry['Day']) - datetime.timedelta(1)).day
|
|---|
| 444 | if attrValuesToTry['Day'] < allAttrValues['Day']:
|
|---|
| 445 | result = parsedExpression.eval(attrValuesToTry)
|
|---|
| 446 | tryPreviousMonth = False
|
|---|
| 447 |
|
|---|
| 448 | if tryPreviousMonth:
|
|---|
| 449 | if attrValuesToTry['Month'] > 1:
|
|---|
| 450 | attrValuesToTry['Month'] -= 1
|
|---|
| 451 | result = parsedExpression.eval(attrValuesToTry)
|
|---|
| 452 | tryPreviousYear = False
|
|---|
| 453 | else:
|
|---|
| 454 | attrValuesToTry['Month'] = 12
|
|---|
| 455 |
|
|---|
| 456 | if tryPreviousYear:
|
|---|
| 457 | attrValuesToTry['Year'] -= 1
|
|---|
| 458 | result = parsedExpression.eval(attrValuesToTry)
|
|---|
| 459 |
|
|---|
| 460 | except Exception, e:
|
|---|
| 461 | continue # TODO: report better message
|
|---|
| 462 | else:
|
|---|
| 463 | result = True
|
|---|
| 464 |
|
|---|
| 465 | if result is None or result:
|
|---|
| 466 | self._LogDebug(_(u'%(class)s 0x%(id)08X: Query result for path component "%(comp)s": %(result)s'), {u'class': self.__class__.__name__, u'id': id(self), u'comp': component, u'result': repr(result)})
|
|---|
| 467 |
|
|---|
| 468 | # If we are still descending the path components (i.e.
|
|---|
| 469 | # this is not the deepest one), and we got a True or None,
|
|---|
| 470 | # recursively evaluate this component.
|
|---|
| 471 |
|
|---|
| 472 | if depth < len(self._PathParsingExpressions) - 1:
|
|---|
| 473 | if result or result is None:
|
|---|
| 474 | datasetsFound.extend(self._QueryRecursive(parsedExpression, progressReporter, options, pathComponents + [component], depth + 1, parentAttrValues, componentAttrValues, queryType))
|
|---|
| 475 |
|
|---|
| 476 | # If we have reached the deepest level of the path
|
|---|
| 477 | # component hierarchy and the result was True (it matches)
|
|---|
| 478 | # or None (we don't know if it matches), construct an
|
|---|
| 479 | # object for it. If that object is a Dataset and the
|
|---|
| 480 | # result was True, add it to our list of datasets; if the
|
|---|
| 481 | # result was None, don't add it. If that object is a
|
|---|
| 482 | # DatasetCollection, submit our query to it to retreive
|
|---|
| 483 | # the matching datasets.
|
|---|
| 484 |
|
|---|
| 485 | elif result or result is None:
|
|---|
| 486 |
|
|---|
| 487 | obj = self._ConstructFoundObject(pathComponents + [component], componentAttrValues, constructOptions)
|
|---|
| 488 | if obj is not None:
|
|---|
| 489 | if isinstance(obj, Dataset):
|
|---|
| 490 | if result:
|
|---|
| 491 | datasetsFound.append(obj)
|
|---|
| 492 | if closeDatasets:
|
|---|
| 493 | obj.Close()
|
|---|
| 494 | if progressReporter is not None:
|
|---|
| 495 | progressReporter.ReportProgress()
|
|---|
| 496 | else:
|
|---|
| 497 | del obj
|
|---|
| 498 | else:
|
|---|
| 499 | datasetsFound.extend(obj._QueryDatasets(parsedExpression, progressReporter, options, allAttrValues))
|
|---|
| 500 | if closeDatasets:
|
|---|
| 501 | obj.Close()
|
|---|
| 502 |
|
|---|
| 503 | # If this is an 'oldest' or 'newest' query and the path
|
|---|
| 504 | # component at this level includes an element of the date
|
|---|
| 505 | # or time and we got some datasets, break out of the loop.
|
|---|
| 506 |
|
|---|
| 507 | if queryType != 'normal' and sortByDate and len(datasetsFound) > 0:
|
|---|
| 508 | break
|
|---|
| 509 |
|
|---|
| 510 | # If this is an 'oldest' or 'newest' query, walk through the
|
|---|
| 511 | # datasets we found and return the oldest or newest one. If
|
|---|
| 512 | # multiple datasets have the same oldest or newest date and
|
|---|
| 513 | # time, we pick whichever one happened to come first.
|
|---|
| 514 |
|
|---|
| 515 | if queryType != 'normal' and len(datasetsFound) > 0:
|
|---|
| 516 | best = 0
|
|---|
| 517 | bestDateTime = datasetsFound[0].GetQueryableAttributeValue('DateTime')
|
|---|
| 518 |
|
|---|
| 519 | for i in range(1, len(datasetsFound)):
|
|---|
| 520 | if queryType == 'oldest':
|
|---|
| 521 | if datasetsFound[i].GetQueryableAttributeValue('DateTime') < bestDateTime:
|
|---|
| 522 | best = i
|
|---|
| 523 | elif datasetsFound[i].GetQueryableAttributeValue('DateTime') > bestDateTime:
|
|---|
| 524 | best = i
|
|---|
| 525 |
|
|---|
| 526 | datasetsFound = [datasetsFound[best]]
|
|---|
| 527 |
|
|---|
| 528 | # Return the datasets we found.
|
|---|
| 529 |
|
|---|
| 530 | return datasetsFound
|
|---|
| 531 |
|
|---|
| 532 | def _GetOldestDataset(self, parsedExpression, options, parentAttrValues, dateTimeAttrName):
|
|---|
| 533 | if self._CanSortByDate:
|
|---|
| 534 | datasetsFound = self._QueryRecursive(parsedExpression, None, options, [], 0, parentAttrValues, {}, 'oldest')
|
|---|
| 535 | if len(datasetsFound) > 0:
|
|---|
| 536 | return datasetsFound[0]
|
|---|
| 537 | return None
|
|---|
| 538 |
|
|---|
| 539 | return super(DatasetCollectionTree, self)._GetOldestDataset(parsedExpression, options, parentAttrValues, dateTimeAttrName)
|
|---|
| 540 |
|
|---|
| 541 | def _GetNewestDataset(self, parsedExpression, options, parentAttrValues, dateTimeAttrName):
|
|---|
| 542 | if self._CanSortByDate:
|
|---|
| 543 | datasetsFound = self._QueryRecursive(parsedExpression, None, options, [], 0, parentAttrValues, {}, 'newest')
|
|---|
| 544 | if len(datasetsFound) > 0:
|
|---|
| 545 | return datasetsFound[0]
|
|---|
| 546 | return None
|
|---|
| 547 |
|
|---|
| 548 | return super(DatasetCollectionTree, self)._GetNewestDataset(parsedExpression, options, parentAttrValues, dateTimeAttrName)
|
|---|
| 549 |
|
|---|
| 550 | def _ImportDatasets(self, datasets, mode, reportProgress, options):
|
|---|
| 551 | if self.PathCreationExpressions is None:
|
|---|
| 552 | raise RuntimeError(_(u'Cannot import datasets into %(dn)s because the DatasetCollectionTree representing it was not instantiated with path creation expressions.') % {u'dn': self.DisplayName})
|
|---|
| 553 |
|
|---|
| 554 | if reportProgress:
|
|---|
| 555 | self._LogInfo(_(u'Importing %(count)i datasets into %(dn)s with mode "%(mode)s".') % {u'count': len(datasets), u'dn': self.DisplayName, u'mode': mode})
|
|---|
| 556 | else:
|
|---|
| 557 | self._LogDebug(_(u'%(class)s 0x%(id)08X: Importing %(count)i datasets into %(dn)s with mode "%(mode)s".') % {u'class': self.__class__.__name__, u'id': id(self), u'count': len(datasets), u'dn': self.DisplayName, u'mode': mode})
|
|---|
| 558 |
|
|---|
| 559 | # Build a dictionary mapping destination paths to the source
|
|---|
| 560 | # datasets that should be imported to those paths. In many
|
|---|
| 561 | # cases, as when we are a DirectoryTree of GDALDatasets, each
|
|---|
| 562 | # source dataset will be imported to a unique destination
|
|---|
| 563 | # path. In some cases, as when we are a DirectoryTree of
|
|---|
| 564 | # NetCDFFiles, several source datasets may be imported to each
|
|---|
| 565 | # unique destination path.
|
|---|
| 566 |
|
|---|
| 567 | pathComponentsForPath = {}
|
|---|
| 568 | datasetsForPath = {}
|
|---|
| 569 |
|
|---|
| 570 | for dataset in datasets:
|
|---|
| 571 |
|
|---|
| 572 | # First, build a dictionary that maps the names of
|
|---|
| 573 | # attributes that appear in our path parsing and path
|
|---|
| 574 | # creation expressions to values of those attributes from
|
|---|
| 575 | # the source dataset. Fail if the source dataset does not
|
|---|
| 576 | # have all of these attributes.
|
|---|
| 577 |
|
|---|
| 578 | attrValues = {}
|
|---|
| 579 | gotDateTime = False
|
|---|
| 580 |
|
|---|
| 581 | for attrs in self._AttrsForExpr:
|
|---|
| 582 | for [name, attr] in attrs:
|
|---|
| 583 | if issubclass(attr.DataType.__class__, DateTimeTypeMetadata):
|
|---|
| 584 | attrName = u'DateTime'
|
|---|
| 585 | gotDateTime = True
|
|---|
| 586 | else:
|
|---|
| 587 | attrName = attr.Name
|
|---|
| 588 | if attrName not in attrValues:
|
|---|
| 589 | attrValue = dataset.GetQueryableAttributeValue(attrName)
|
|---|
| 590 | if attrValue is None:
|
|---|
| 591 | raise ValueError(_(u'%(dn1)s does not have a queryable attribute named %(name)s. In order to import this dataset into %(dn2)s, it must have that queryable attribute.') % {u'dn1': dataset.DisplayName, u'name': attrName, u'dn2': self.DisplayName})
|
|---|
| 592 |
|
|---|
| 593 | if attrName == u'DateTime':
|
|---|
| 594 | offsetInDays = dataset.GetLazyPropertyValue('TOffsetFromParsedTime')
|
|---|
| 595 | if offsetInDays is not None:
|
|---|
| 596 | attrValue = datetime.datetime(attrValue.year, attrValue.month, attrValue.day, attrValue.hour, attrValue.minute, attrValue.second, attrValue.microsecond) - datetime.timedelta(offsetInDays)
|
|---|
| 597 |
|
|---|
| 598 | attrValues[attrName] = attrValue
|
|---|
| 599 |
|
|---|
| 600 | # Build the components of the destination path.
|
|---|
| 601 |
|
|---|
| 602 | pathComponents = []
|
|---|
| 603 | for expr in self._PathCreationExpressions:
|
|---|
| 604 | comp = expr % attrValues
|
|---|
| 605 | if gotDateTime:
|
|---|
| 606 | comp = self._Unicode(attrValues[u'DateTime'].strftime(self._Str(comp)))
|
|---|
| 607 | pathComponents.append(comp)
|
|---|
| 608 |
|
|---|
| 609 | # Add this path and source dataset to our dictionaries.
|
|---|
| 610 |
|
|---|
| 611 | path = os.path.join(*pathComponents)
|
|---|
| 612 |
|
|---|
| 613 | if path not in pathComponentsForPath:
|
|---|
| 614 | pathComponentsForPath[path] = pathComponents
|
|---|
| 615 |
|
|---|
| 616 | if path not in datasetsForPath:
|
|---|
| 617 | datasetsForPath[path] = []
|
|---|
| 618 | datasetsForPath[path].append(dataset)
|
|---|
| 619 |
|
|---|
| 620 | # Sort the destination paths so we always process them in the
|
|---|
| 621 | # same order.
|
|---|
| 622 |
|
|---|
| 623 | paths = pathComponentsForPath.keys()
|
|---|
| 624 | paths.sort()
|
|---|
| 625 |
|
|---|
| 626 | # If the mode is 'add', check for existing datasets.
|
|---|
| 627 |
|
|---|
| 628 | if mode == u'add':
|
|---|
| 629 | if reportProgress:
|
|---|
| 630 | self._LogInfo(_(u'Checking for existing destination datasets.'))
|
|---|
| 631 | progressReporter = ProgressReporter(progressMessage1=_(u'Still checking: %(elapsed)s elapsed, %(opsCompleted)i datasets checked, %(perOp)s per dataset, %(opsRemaining)i remaining, estimated completion time: %(etc)s.'),
|
|---|
| 632 | completionMessage=_(u'Finished checking: %(elapsed)s elapsed, %(opsCompleted)i datasets checked, %(perOp)s per dataset.'),
|
|---|
| 633 | abortedMessage=_(u'Processing stopped before all datasets were checked: %(elapsed)s elapsed, %(opsCompleted)i datasets checked, %(perOp)s per dataset, %(opsIncomplete)i datasets not checked.'),
|
|---|
| 634 | loggingChannel=DatasetCollection._LoggingChannel)
|
|---|
| 635 | progressReporter.Start(len(datasets))
|
|---|
| 636 | else:
|
|---|
| 637 | self._LogDebug(_(u'%(class)s 0x%(id)08X: Checking for existing destination datasets.') % {u'class': self.__class__.__name__, u'id': id(self)})
|
|---|
| 638 | progressReporter = None
|
|---|
| 639 |
|
|---|
| 640 | try:
|
|---|
| 641 | i = 0
|
|---|
| 642 | datasetsToAdd = 0
|
|---|
| 643 | while i < len(paths):
|
|---|
| 644 | self._RemoveExistingDatasetsFromList(pathComponentsForPath[paths[i]], datasetsForPath[paths[i]], progressReporter)
|
|---|
| 645 | if len(datasetsForPath[paths[i]]) > 0:
|
|---|
| 646 | datasetsToAdd += len(datasetsForPath[paths[i]])
|
|---|
| 647 | i += 1
|
|---|
| 648 | else:
|
|---|
| 649 | del paths[i]
|
|---|
| 650 | finally:
|
|---|
| 651 | if reportProgress:
|
|---|
| 652 | progressReporter.Stop()
|
|---|
| 653 |
|
|---|
| 654 | if datasetsToAdd <= 0:
|
|---|
| 655 | if reportProgress:
|
|---|
| 656 | self._LogInfo(_(u'All %(count)i destination datasets already exist. No datasets will be imported.') % {u'count': len(datasets)})
|
|---|
| 657 | else:
|
|---|
| 658 | self._LogDebug(_(u'%(class)s 0x%(id)08X: All %(count)i destination datasets already exist. No datasets will be imported.') % {u'class': self.__class__.__name__, u'id': id(self), u'count': len(datasets)})
|
|---|
| 659 | return
|
|---|
| 660 |
|
|---|
| 661 | if reportProgress:
|
|---|
| 662 | self._LogInfo(_(u'%(existing)i destination datasets already exist. Importing %(new)i datasets.') % {u'existing': len(datasets) - datasetsToAdd, u'new': datasetsToAdd})
|
|---|
| 663 | else:
|
|---|
| 664 | self._LogDebug(_(u'%(class)s 0x%(id)08X: %(existing)i destination datasets already exist. Importing %(new)i datasets.') % {u'class': self.__class__.__name__, u'id': id(self), u'existing': len(datasets) - datasetsToAdd, u'new': datasetsToAdd})
|
|---|
| 665 | else:
|
|---|
| 666 | datasetsToAdd = len(datasets)
|
|---|
| 667 |
|
|---|
| 668 | # Iterate through the paths, importing the datasets for each
|
|---|
| 669 | # one.
|
|---|
| 670 |
|
|---|
| 671 | if reportProgress:
|
|---|
| 672 | progressReporter = ProgressReporter(progressMessage1=_(u'Import in progress: %(elapsed)s elapsed, %(opsCompleted)i datasets imported, %(perOp)s per dataset, %(opsRemaining)i remaining, estimated completion time: %(etc)s.'),
|
|---|
| 673 | completionMessage=_(u'Import complete: %(elapsed)s elapsed, %(opsCompleted)i datasets imported, %(perOp)s per dataset.'),
|
|---|
| 674 | abortedMessage=_(u'Import stopped before all datasets were imported: %(elapsed)s elapsed, %(opsCompleted)i datasets imported, %(perOp)s per dataset, %(opsIncomplete)i datasets not imported.'),
|
|---|
| 675 | loggingChannel=DatasetCollection._LoggingChannel)
|
|---|
| 676 | progressReporter.Start(datasetsToAdd)
|
|---|
| 677 | else:
|
|---|
| 678 | progressReporter = None
|
|---|
| 679 |
|
|---|
| 680 | try:
|
|---|
| 681 | for path in paths:
|
|---|
| 682 | self._ImportDatasetsToPath(pathComponentsForPath[path], datasetsForPath[path], mode, progressReporter, options)
|
|---|
| 683 | finally:
|
|---|
| 684 | if reportProgress:
|
|---|
| 685 | progressReporter.Stop()
|
|---|
| 686 |
|
|---|
| 687 | # Private methods that the derived class should override.
|
|---|
| 688 |
|
|---|
| 689 | def _ListContents(self, pathComponents):
|
|---|
| 690 | raise NotImplementedError(_(u'The _ListContents method of class %s has not been implemented.') % self.__class__.__name__)
|
|---|
| 691 |
|
|---|
| 692 | def _ConstructFoundObject(self, pathComponents, attrValues, options):
|
|---|
| 693 | raise NotImplementedError(_(u'The _ConstructFoundObject method of class %s has not been implemented.') % self.__class__.__name__)
|
|---|
| 694 |
|
|---|
| 695 | def _RemoveExistingDatasetsFromList(self, pathComponents, datasets, progressReporter):
|
|---|
| 696 | raise NotImplementedError(_(u'The _RemoveExistingDatasetsFromList method of class %s has not been implemented.') % self.__class__.__name__)
|
|---|
| 697 |
|
|---|
| 698 | def _ImportDatasetsToPath(self, pathComponents, sourceDatasets, mode, progressReporter, options):
|
|---|
| 699 | raise NotImplementedError(_(u'The _ImportDatasetsToPath method of class %s has not been implemented.') % self.__class__.__name__)
|
|---|
| 700 |
|
|---|
| 701 |
|
|---|
| 702 | class DirectoryTree(DatasetCollectionTree):
|
|---|
| 703 | __doc__ = DynamicDocString()
|
|---|
| 704 |
|
|---|
| 705 | def _GetPath(self):
|
|---|
| 706 | return self._Path
|
|---|
| 707 |
|
|---|
| 708 | Path = property(_GetPath, doc=DynamicDocString())
|
|---|
| 709 |
|
|---|
| 710 | def _GetDatasetType(self):
|
|---|
| 711 | return self._DatasetType
|
|---|
| 712 |
|
|---|
| 713 | DatasetType = property(_GetDatasetType, doc=DynamicDocString())
|
|---|
| 714 |
|
|---|
| 715 | def _GetCacheTree(self):
|
|---|
| 716 | return self._CacheTree
|
|---|
| 717 |
|
|---|
| 718 | CacheTree = property(_GetCacheTree, doc=DynamicDocString())
|
|---|
| 719 |
|
|---|
| 720 | def __init__(self, path, datasetType, pathParsingExpressions=None, pathCreationExpressions=None, cacheTree=True, queryableAttributes=None, queryableAttributeValues=None, lazyPropertyValues=None, cacheDirectory=None):
|
|---|
| 721 | # TODO: Validation
|
|---|
| 722 |
|
|---|
| 723 | super(DirectoryTree, self).__init__(pathParsingExpressions, pathCreationExpressions, queryableAttributes=queryableAttributes, queryableAttributeValues=queryableAttributeValues, lazyPropertyValues=lazyPropertyValues, cacheDirectory=cacheDirectory)
|
|---|
| 724 |
|
|---|
| 725 | self._Path = path
|
|---|
| 726 | self._DatasetType = datasetType
|
|---|
| 727 | self._CacheTree = cacheTree
|
|---|
| 728 | if self._CacheTree:
|
|---|
| 729 | self._TreeCache = {}
|
|---|
| 730 | else:
|
|---|
| 731 | self._TreeCache = None
|
|---|
| 732 | self._DisplayName = _(u'directory %(name)s') % {u'name': self._Path}
|
|---|
| 733 |
|
|---|
| 734 | def _GetDisplayName(self):
|
|---|
| 735 | return self._DisplayName
|
|---|
| 736 |
|
|---|
| 737 | def _ListContents(self, pathComponents):
|
|---|
| 738 |
|
|---|
| 739 | # If we are supposed to cache the tree, probe our cache for
|
|---|
| 740 | # the contents of this directory.
|
|---|
| 741 |
|
|---|
| 742 | directory = os.path.join(self.Path, *pathComponents)
|
|---|
| 743 |
|
|---|
| 744 | if self._CacheTree and directory in self._TreeCache:
|
|---|
| 745 | self._LogDebug(_(u'%(class)s 0x%(id)08X: Retrieved cached contents of directory %(dir)s'), {u'class': self.__class__.__name__, u'id': id(self), u'dir': directory})
|
|---|
| 746 | return self._TreeCache[directory]
|
|---|
| 747 |
|
|---|
| 748 | # We did not retrieve the contents of this directory from the
|
|---|
| 749 | # cache. Get the contents from the operating system and update
|
|---|
| 750 | # the cache (if required).
|
|---|
| 751 |
|
|---|
| 752 | self._LogDebug(_(u'%(class)s 0x%(id)08X: Listing contents of directory %(dir)s'), {u'class': self.__class__.__name__, u'id': id(self), u'dir': directory})
|
|---|
| 753 |
|
|---|
| 754 | contents = os.listdir(directory) # TODO: If we have not reached the lowest-level path component, filter out things that are not directories
|
|---|
| 755 | contents.sort()
|
|---|
| 756 |
|
|---|
| 757 | if self._CacheTree:
|
|---|
| 758 | self._TreeCache[directory] = contents
|
|---|
| 759 |
|
|---|
| 760 | return contents
|
|---|
| 761 |
|
|---|
| 762 | def _ConstructFoundObject(self, pathComponents, attrValues, options):
|
|---|
| 763 | return self.DatasetType(os.path.join(*pathComponents), parentCollection=self, queryableAttributeValues=attrValues, cacheDirectory=self.CacheDirectory, **options)
|
|---|
| 764 |
|
|---|
| 765 | def _GetLocalFile(self, pathComponents):
|
|---|
| 766 | return os.path.join(self.Path, *pathComponents), False # False indicates that it is NOT ok for the caller to delete the file after decompressing it, to save space
|
|---|
| 767 |
|
|---|
| 768 | def _RemoveExistingDatasetsFromList(self, pathComponents, datasets, progressReporter):
|
|---|
| 769 | self.DatasetType._RemoveExistingDatasetsFromList(os.path.join(self.Path, *pathComponents), datasets, progressReporter)
|
|---|
| 770 |
|
|---|
| 771 | def _ImportDatasetsToPath(self, pathComponents, sourceDatasets, mode, progressReporter, options):
|
|---|
| 772 | self.DatasetType._ImportDatasetsToPath(os.path.join(self.Path, *pathComponents), sourceDatasets, mode, progressReporter, options)
|
|---|
| 773 |
|
|---|
| 774 |
|
|---|
| 775 | class FileDatasetCollection(DatasetCollection):
|
|---|
| 776 | __doc__ = DynamicDocString()
|
|---|
| 777 |
|
|---|
| 778 | def _GetPath(self):
|
|---|
| 779 | return self._Path
|
|---|
| 780 |
|
|---|
| 781 | Path = property(_GetPath, doc=DynamicDocString())
|
|---|
| 782 |
|
|---|
| 783 | def _GetDecompressedFileToReturn(self):
|
|---|
| 784 | return self._DecompressedFileToReturn
|
|---|
| 785 |
|
|---|
| 786 | DecompressedFileToReturn = property(_GetDecompressedFileToReturn, doc=DynamicDocString())
|
|---|
| 787 |
|
|---|
| 788 | def __init__(self, path, decompressedFileToReturn=None, parentCollection=None, queryableAttributes=None, queryableAttributeValues=None, lazyPropertyValues=None, cacheDirectory=None):
|
|---|
| 789 | # TODO: Validation
|
|---|
| 790 |
|
|---|
| 791 | if parentCollection is not None and not hasattr(parentCollection, '_GetLocalFile'):
|
|---|
| 792 | raise TypeError(_(u'The parentCollection, if provided, must have a method called _GetLocalFile().'))
|
|---|
| 793 |
|
|---|
| 794 | self._Path = path
|
|---|
| 795 | self._DecompressedFileToReturn = decompressedFileToReturn
|
|---|
| 796 |
|
|---|
| 797 | super(FileDatasetCollection, self).__init__(parentCollection, queryableAttributes, queryableAttributeValues, lazyPropertyValues, cacheDirectory)
|
|---|
| 798 |
|
|---|
| 799 | def _GetOpenableFile(self):
|
|---|
| 800 |
|
|---|
| 801 | # If the file is not compressed and exists in the file system,
|
|---|
| 802 | # just return it.
|
|---|
| 803 |
|
|---|
| 804 | if self.ParentCollection is None:
|
|---|
| 805 | localPath = self.Path
|
|---|
| 806 | elif hasattr(self.ParentCollection, 'Path') and self.ParentCollection.Path is not None:
|
|---|
| 807 | localPath = os.path.join(self.ParentCollection.Path, self.Path)
|
|---|
| 808 | else:
|
|---|
| 809 | localPath = None
|
|---|
| 810 |
|
|---|
| 811 | if localPath is not None and os.path.splitext(localPath)[1] not in [u'.bz2', u'.gz', u'.tar', u'.z', u'.zip'] and os.path.exists(localPath):
|
|---|
| 812 | return localPath, True
|
|---|
| 813 |
|
|---|
| 814 | # The file is not an uncompressed, existing file system
|
|---|
| 815 | # object. If we our our parent collections define a cache
|
|---|
| 816 | # directory and the file exists there in uncompressed form,
|
|---|
| 817 | # return it.
|
|---|
| 818 |
|
|---|
| 819 | cacheDirectory = None
|
|---|
| 820 | obj = self
|
|---|
| 821 | while obj is not None:
|
|---|
| 822 | if obj.CacheDirectory is not None:
|
|---|
| 823 | cacheDirectory = obj.CacheDirectory
|
|---|
| 824 | break
|
|---|
| 825 | obj = obj.ParentCollection
|
|---|
| 826 |
|
|---|
| 827 | import glob
|
|---|
| 828 |
|
|---|
| 829 | if cacheDirectory is not None:
|
|---|
| 830 | if os.path.splitext(self.Path)[1] not in [u'.bz2', u'.gz', u'.tar', u'.z', u'.zip']:
|
|---|
| 831 | if os.path.exists(os.path.join(cacheDirectory, self.Path)):
|
|---|
| 832 | return os.path.join(cacheDirectory, self.Path), False
|
|---|
| 833 | elif not self.Path.endswith(u'.tar') and not self.Path.endswith(u'.tar.bz2') and not self.Path.endswith(u'.tar.gz') and not self.Path.endswith(u'.tar.z') and not self.Path.endswith(u'.zip'):
|
|---|
| 834 | if os.path.exists(os.path.join(cacheDirectory, os.path.splitext(self.Path)[0])):
|
|---|
| 835 | return os.path.join(cacheDirectory, os.path.splitext(self.Path)[0]), False
|
|---|
| 836 | else:
|
|---|
| 837 | files = glob.glob(os.path.join(os.path.dirname(os.path.join(cacheDirectory, self.Path)), self._DecompressedFileToReturn))
|
|---|
| 838 | if len(files) > 0:
|
|---|
| 839 | return files[0], False
|
|---|
| 840 |
|
|---|
| 841 | # If we have a parent collection, instruct it to create a
|
|---|
| 842 | # local copy of the file, if it does not exist already.
|
|---|
| 843 |
|
|---|
| 844 | if self.ParentCollection is not None:
|
|---|
| 845 | pathComponents = list(os.path.split(self.Path))
|
|---|
| 846 | while pathComponents[1] != '':
|
|---|
| 847 | pathComponents = list(os.path.split(pathComponents[0])) + pathComponents[1:]
|
|---|
| 848 | pathComponents = filter(lambda s: s != '', pathComponents)
|
|---|
| 849 | localPath, deleteFileAfterDecompressing = self.ParentCollection._GetLocalFile(pathComponents)
|
|---|
| 850 | else:
|
|---|
| 851 | pathComponents = ['']
|
|---|
| 852 | localPath = os.path.join(self.Path)
|
|---|
| 853 | deleteFileAfterDecompressing = False
|
|---|
| 854 |
|
|---|
| 855 | # If the file is compressed, decompress it.
|
|---|
| 856 |
|
|---|
| 857 | if os.path.splitext(localPath)[1] in [u'.bz2', u'.gz', u'.tar', u'.z', u'.zip']:
|
|---|
| 858 |
|
|---|
| 859 | # If we or our parent collections did not define a cache
|
|---|
| 860 | # directory, create one.
|
|---|
| 861 |
|
|---|
| 862 | if cacheDirectory is None:
|
|---|
| 863 | self.CacheDirectory = self._CreateTempDirectory()
|
|---|
| 864 | cacheDirectory = self.CacheDirectory
|
|---|
| 865 |
|
|---|
| 866 | # Decompress the file.
|
|---|
| 867 |
|
|---|
| 868 | from GeoEco.Logging import Logger
|
|---|
| 869 | from GeoEco.DataManagement.Files import File
|
|---|
| 870 | oldLogInfoAsDebug = Logger.GetLogInfoAsDebug()
|
|---|
| 871 | Logger.SetLogInfoAsDebug(True)
|
|---|
| 872 | try:
|
|---|
| 873 | decompressedFile = File.Decompress(localPath, os.path.join(cacheDirectory, os.path.dirname(self.Path)), True, False, self._DecompressedFileToReturn)
|
|---|
| 874 | finally:
|
|---|
| 875 | Logger.SetLogInfoAsDebug(oldLogInfoAsDebug)
|
|---|
| 876 |
|
|---|
| 877 | # If the parent collection indicated that it is ok to
|
|---|
| 878 | # delete the compressed file after decompressing it,
|
|---|
| 879 | # delete it now.
|
|---|
| 880 |
|
|---|
| 881 | if deleteFileAfterDecompressing:
|
|---|
| 882 | self._LogDebug(_(u'%(class)s 0x%(id)08X: Deleting %(file)s to save disk space'), {u'class': self.__class__.__name__, u'id': id(self), u'file': localPath})
|
|---|
| 883 | try:
|
|---|
| 884 | os.remove(localPath)
|
|---|
| 885 | except:
|
|---|
| 886 | pass
|
|---|
| 887 |
|
|---|
| 888 | localPath = decompressedFile
|
|---|
| 889 |
|
|---|
| 890 | # Return successfully.
|
|---|
| 891 |
|
|---|
| 892 | return localPath, False
|
|---|
| 893 |
|
|---|
| 894 |
|
|---|
| 895 | ###############################################################################
|
|---|
| 896 | # Metadata: module
|
|---|
| 897 | ###############################################################################
|
|---|
| 898 |
|
|---|
| 899 | from GeoEco.Metadata import *
|
|---|
| 900 | from GeoEco.Types import *
|
|---|
| 901 |
|
|---|
| 902 | AddModuleMetadata(shortDescription=_(u'Classes that represent collections of datasets.'))
|
|---|
| 903 |
|
|---|
| 904 | ###############################################################################
|
|---|
| 905 | # Metadata: DatasetCollectionTree class
|
|---|
| 906 | ###############################################################################
|
|---|
| 907 |
|
|---|
| 908 | AddClassMetadata(DatasetCollectionTree,
|
|---|
| 909 | shortDescription=_(u'TODO: Add description.'))
|
|---|
| 910 |
|
|---|
| 911 | # TODO: Add metadata
|
|---|
| 912 |
|
|---|
| 913 | ###############################################################################
|
|---|
| 914 | # Metadata: DirectoryTree class
|
|---|
| 915 | ###############################################################################
|
|---|
| 916 |
|
|---|
| 917 | AddClassMetadata(DirectoryTree,
|
|---|
| 918 | shortDescription=_(u'TODO: Add description.'))
|
|---|
| 919 |
|
|---|
| 920 | # TODO: Add metadata
|
|---|
| 921 |
|
|---|
| 922 | ###############################################################################
|
|---|
| 923 | # Metadata: FileDatasetCollection class
|
|---|
| 924 | ###############################################################################
|
|---|
| 925 |
|
|---|
| 926 | AddClassMetadata(FileDatasetCollection,
|
|---|
| 927 | shortDescription=_(u'TODO: Add description.'))
|
|---|
| 928 |
|
|---|
| 929 | # Public properties
|
|---|
| 930 |
|
|---|
| 931 | AddPropertyMetadata(FileDatasetCollection.Path,
|
|---|
| 932 | typeMetadata=UnicodeStringTypeMetadata(),
|
|---|
| 933 | shortDescription=_(u'Path to the file to open.'),
|
|---|
| 934 | longDescription=_(
|
|---|
| 935 | u"""If there is no parent collection, this is the full path to the
|
|---|
| 936 | file. It will be opened as stand-alone collection.
|
|---|
| 937 |
|
|---|
| 938 | If there is a parent collection, this path is relative to it. For
|
|---|
| 939 | example, if the parent collection is a DirectoryTree, this path is
|
|---|
| 940 | relative to a leaf directory of the DirectoryTree. Often, the leaf
|
|---|
| 941 | directory will be the one containing the file, in which case the path
|
|---|
| 942 | provided here will simply be the name of the file.
|
|---|
| 943 |
|
|---|
| 944 | If the path points to compressed file, it will be decompressed
|
|---|
| 945 | automatically. If a cache directory is provided, it will be checked
|
|---|
| 946 | first for an existing decompressed file. If none is found the file
|
|---|
| 947 | will be decompressed there.
|
|---|
| 948 |
|
|---|
| 949 | If the compressed file is an archive (e.g. .zip or .tar), you must
|
|---|
| 950 | also specify a decompressed file to return."""))
|
|---|
| 951 |
|
|---|
| 952 | AddPropertyMetadata(FileDatasetCollection.DecompressedFileToReturn,
|
|---|
| 953 | typeMetadata=UnicodeStringTypeMetadata(canBeNone=True),
|
|---|
| 954 | shortDescription=_(u'Glob expression that identifies the extracted file to open when the path points to an archive (e.g. a .zip or .tar file).'),
|
|---|
| 955 | longDescription=_(
|
|---|
| 956 | u"""This expression must select exactly one of the extracted files. Be
|
|---|
| 957 | sure to leave it as None when the path does not point to an
|
|---|
| 958 | archive."""))
|
|---|
| 959 |
|
|---|
| 960 | # Private constructor: FileDatasetCollection.__init__
|
|---|
| 961 |
|
|---|
| 962 | AddMethodMetadata(FileDatasetCollection.__init__,
|
|---|
| 963 | shortDescription=_(u'FileDatasetCollection constructor. Not intended to be called directly. Only intended to be called from derived class constructors.'))
|
|---|
| 964 |
|
|---|
| 965 | AddArgumentMetadata(FileDatasetCollection.__init__, u'self',
|
|---|
| 966 | typeMetadata=ClassInstanceTypeMetadata(cls=FileDatasetCollection),
|
|---|
| 967 | description=_(u'%s instance.') % FileDatasetCollection.__name__)
|
|---|
| 968 |
|
|---|
| 969 | AddArgumentMetadata(FileDatasetCollection.__init__, u'path',
|
|---|
| 970 | typeMetadata=FileDatasetCollection.Path.__doc__.Obj.Type,
|
|---|
| 971 | description=FileDatasetCollection.Path.__doc__.Obj.ShortDescription + u'\n\n' + FileDatasetCollection.Path.__doc__.Obj.LongDescription)
|
|---|
| 972 |
|
|---|
| 973 | AddArgumentMetadata(FileDatasetCollection.__init__, u'decompressedFileToReturn',
|
|---|
| 974 | typeMetadata=FileDatasetCollection.DecompressedFileToReturn.__doc__.Obj.Type,
|
|---|
| 975 | description=FileDatasetCollection.DecompressedFileToReturn.__doc__.Obj.ShortDescription + u'\n\n' + FileDatasetCollection.DecompressedFileToReturn.__doc__.Obj.LongDescription)
|
|---|
| 976 |
|
|---|
| 977 | AddArgumentMetadata(FileDatasetCollection.__init__, u'parentCollection',
|
|---|
| 978 | typeMetadata=DatasetCollection.__init__.__doc__.Obj.GetArgumentByName(u'parentCollection').Type,
|
|---|
| 979 | description=DatasetCollection.__init__.__doc__.Obj.GetArgumentByName(u'parentCollection').Description)
|
|---|
| 980 |
|
|---|
| 981 | AddArgumentMetadata(FileDatasetCollection.__init__, u'queryableAttributes',
|
|---|
| 982 | typeMetadata=DatasetCollection.__init__.__doc__.Obj.GetArgumentByName(u'queryableAttributes').Type,
|
|---|
| 983 | description=DatasetCollection.__init__.__doc__.Obj.GetArgumentByName(u'queryableAttributes').Description)
|
|---|
| 984 |
|
|---|
| 985 | AddArgumentMetadata(FileDatasetCollection.__init__, u'queryableAttributeValues',
|
|---|
| 986 | typeMetadata=DatasetCollection.__init__.__doc__.Obj.GetArgumentByName(u'queryableAttributeValues').Type,
|
|---|
| 987 | description=DatasetCollection.__init__.__doc__.Obj.GetArgumentByName(u'queryableAttributeValues').Description)
|
|---|
| 988 |
|
|---|
| 989 | AddArgumentMetadata(FileDatasetCollection.__init__, u'lazyPropertyValues',
|
|---|
| 990 | typeMetadata=DatasetCollection.__init__.__doc__.Obj.GetArgumentByName(u'lazyPropertyValues').Type,
|
|---|
| 991 | description=DatasetCollection.__init__.__doc__.Obj.GetArgumentByName(u'lazyPropertyValues').Description)
|
|---|
| 992 |
|
|---|
| 993 | AddArgumentMetadata(FileDatasetCollection.__init__, u'cacheDirectory',
|
|---|
| 994 | typeMetadata=DirectoryTypeMetadata(canBeNone=True),
|
|---|
| 995 | description=_(
|
|---|
| 996 | u"""Directory to cache a copy of the downloaded and/or decompressed
|
|---|
| 997 | file.
|
|---|
| 998 |
|
|---|
| 999 | If provided, this directory will be checked for the file prior to
|
|---|
| 1000 | download and/or decompression. If the file is found, the download
|
|---|
| 1001 | and/or decompression will be skipped. Thus, when performing repetitive
|
|---|
| 1002 | processing with remote or compressed datasets, you can speed up
|
|---|
| 1003 | processing considerably by providing a cache directory."""))
|
|---|
| 1004 |
|
|---|
| 1005 | AddResultMetadata(FileDatasetCollection.__init__, u'collection',
|
|---|
| 1006 | typeMetadata=ClassInstanceTypeMetadata(cls=FileDatasetCollection),
|
|---|
| 1007 | description=_(u'%s instance.') % FileDatasetCollection.__name__)
|
|---|
| 1008 |
|
|---|
| 1009 | # TODO: Add metadata
|
|---|
| 1010 |
|
|---|
| 1011 | ###############################################################################
|
|---|
| 1012 | # Names exported by this module
|
|---|
| 1013 | ###############################################################################
|
|---|
| 1014 |
|
|---|
| 1015 | __all__ = ['DatasetCollectionTree', 'DirectoryTree', 'FileDatasetCollection']
|
|---|