paul@6 | 1 | """Common operations on Posix pathnames. |
paul@6 | 2 | |
paul@6 | 3 | Instead of importing this module directly, import os and refer to |
paul@6 | 4 | this module as os.path. The "os.path" name is an alias for this |
paul@6 | 5 | module on Posix systems; on other systems (e.g. Mac, Windows), |
paul@6 | 6 | os.path provides the same operations in a manner specific to that |
paul@6 | 7 | platform, and is an alias to another module (e.g. macpath, ntpath). |
paul@6 | 8 | |
paul@6 | 9 | Some of this can actually be useful on non-Posix systems too, e.g. |
paul@6 | 10 | for manipulation of the pathname component of URLs. |
paul@6 | 11 | """ |
paul@6 | 12 | |
paul@6 | 13 | from genericos import environ, error, fstat, getcwd, getcwdu, getuid, listdir, lstat, readlink |
paul@6 | 14 | import sys |
paul@6 | 15 | import stat |
paul@6 | 16 | import genericpath |
paul@6 | 17 | from genericpath import * |
paul@6 | 18 | |
paul@6 | 19 | try: |
paul@6 | 20 | _unicode = unicode |
paul@6 | 21 | except NameError: |
paul@6 | 22 | # If Python is built without Unicode support, the unicode type |
paul@6 | 23 | # will not exist. Fake one. |
paul@6 | 24 | class _unicode(object): |
paul@6 | 25 | pass |
paul@6 | 26 | |
paul@6 | 27 | __all__ = ["normcase","isabs","join","splitdrive","split","splitext", |
paul@6 | 28 | "basename","dirname","commonprefix","getsize","getmtime", |
paul@6 | 29 | "getatime","getctime","islink","exists","lexists","isdir","isfile", |
paul@6 | 30 | "ismount","walk","expanduser","expandvars","normpath","abspath", |
paul@6 | 31 | "samefile","sameopenfile","samestat", |
paul@6 | 32 | "curdir","pardir","sep","pathsep","defpath","altsep","extsep", |
paul@6 | 33 | "devnull","realpath","supports_unicode_filenames","relpath"] |
paul@6 | 34 | |
paul@6 | 35 | # strings representing various path-related bits and pieces |
paul@6 | 36 | curdir = '.' |
paul@6 | 37 | pardir = '..' |
paul@6 | 38 | extsep = '.' |
paul@6 | 39 | sep = '/' |
paul@6 | 40 | pathsep = ':' |
paul@6 | 41 | defpath = ':/bin:/usr/bin' |
paul@6 | 42 | altsep = None |
paul@6 | 43 | devnull = '/dev/null' |
paul@6 | 44 | |
paul@6 | 45 | # Normalize the case of a pathname. Trivial in Posix, string.lower on Mac. |
paul@6 | 46 | # On MS-DOS this may also turn slashes into backslashes; however, other |
paul@6 | 47 | # normalizations (such as optimizing '../' away) are not allowed |
paul@6 | 48 | # (another function should be defined to do that). |
paul@6 | 49 | |
paul@6 | 50 | def normcase(s): |
paul@6 | 51 | """Normalize case of pathname. Has no effect under Posix""" |
paul@6 | 52 | return s |
paul@6 | 53 | |
paul@6 | 54 | |
paul@6 | 55 | # Return whether a path is absolute. |
paul@6 | 56 | # Trivial in Posix, harder on the Mac or MS-DOS. |
paul@6 | 57 | |
paul@6 | 58 | def isabs(s): |
paul@6 | 59 | """Test whether a path is absolute""" |
paul@6 | 60 | return s.startswith('/') |
paul@6 | 61 | |
paul@6 | 62 | |
paul@6 | 63 | # Join pathnames. |
paul@6 | 64 | # Ignore the previous parts if a part is absolute. |
paul@6 | 65 | # Insert a '/' unless the first part is empty or already ends in '/'. |
paul@6 | 66 | |
paul@6 | 67 | def join(a, *p): |
paul@6 | 68 | """Join two or more pathname components, inserting '/' as needed. |
paul@6 | 69 | If any component is an absolute path, all previous path components |
paul@6 | 70 | will be discarded. An empty last part will result in a path that |
paul@6 | 71 | ends with a separator.""" |
paul@6 | 72 | path = a |
paul@6 | 73 | for b in p: |
paul@6 | 74 | if b.startswith('/'): |
paul@6 | 75 | path = b |
paul@6 | 76 | elif path == '' or path.endswith('/'): |
paul@6 | 77 | path += b |
paul@6 | 78 | else: |
paul@6 | 79 | path += '/' + b |
paul@6 | 80 | return path |
paul@6 | 81 | |
paul@6 | 82 | |
paul@6 | 83 | # Split a path in head (everything up to the last '/') and tail (the |
paul@6 | 84 | # rest). If the path ends in '/', tail will be empty. If there is no |
paul@6 | 85 | # '/' in the path, head will be empty. |
paul@6 | 86 | # Trailing '/'es are stripped from head unless it is the root. |
paul@6 | 87 | |
paul@6 | 88 | def split(p): |
paul@6 | 89 | """Split a pathname. Returns tuple "(head, tail)" where "tail" is |
paul@6 | 90 | everything after the final slash. Either part may be empty.""" |
paul@6 | 91 | i = p.rfind('/') + 1 |
paul@6 | 92 | head, tail = p[:i], p[i:] |
paul@6 | 93 | if head and head != '/'*len(head): |
paul@6 | 94 | head = head.rstrip('/') |
paul@6 | 95 | return head, tail |
paul@6 | 96 | |
paul@6 | 97 | |
paul@6 | 98 | # Split a path in root and extension. |
paul@6 | 99 | # The extension is everything starting at the last dot in the last |
paul@6 | 100 | # pathname component; the root is everything before that. |
paul@6 | 101 | # It is always true that root + ext == p. |
paul@6 | 102 | |
paul@6 | 103 | def splitext(p): |
paul@6 | 104 | return genericpath._splitext(p, sep, altsep, extsep) |
paul@6 | 105 | |
paul@6 | 106 | # Split a pathname into a drive specification and the rest of the |
paul@6 | 107 | # path. Useful on DOS/Windows/NT; on Unix, the drive is always empty. |
paul@6 | 108 | |
paul@6 | 109 | def splitdrive(p): |
paul@6 | 110 | """Split a pathname into drive and path. On Posix, drive is always |
paul@6 | 111 | empty.""" |
paul@6 | 112 | return '', p |
paul@6 | 113 | |
paul@6 | 114 | |
paul@6 | 115 | # Return the tail (basename) part of a path, same as split(path)[1]. |
paul@6 | 116 | |
paul@6 | 117 | def basename(p): |
paul@6 | 118 | """Returns the final component of a pathname""" |
paul@6 | 119 | i = p.rfind('/') + 1 |
paul@6 | 120 | return p[i:] |
paul@6 | 121 | |
paul@6 | 122 | |
paul@6 | 123 | # Return the head (dirname) part of a path, same as split(path)[0]. |
paul@6 | 124 | |
paul@6 | 125 | def dirname(p): |
paul@6 | 126 | """Returns the directory component of a pathname""" |
paul@6 | 127 | i = p.rfind('/') + 1 |
paul@6 | 128 | head = p[:i] |
paul@6 | 129 | if head and head != '/'*len(head): |
paul@6 | 130 | head = head.rstrip('/') |
paul@6 | 131 | return head |
paul@6 | 132 | |
paul@6 | 133 | |
paul@6 | 134 | # Is a path a symbolic link? |
paul@6 | 135 | # This will always return false on systems where os.lstat doesn't exist. |
paul@6 | 136 | |
paul@6 | 137 | def islink(path): |
paul@6 | 138 | """Test whether a path is a symbolic link""" |
paul@6 | 139 | try: |
paul@6 | 140 | st = lstat(path) |
paul@6 | 141 | except (error, AttributeError): |
paul@6 | 142 | return False |
paul@6 | 143 | return stat.S_ISLNK(st.st_mode) |
paul@6 | 144 | |
paul@6 | 145 | # Being true for dangling symbolic links is also useful. |
paul@6 | 146 | |
paul@6 | 147 | def lexists(path): |
paul@6 | 148 | """Test whether a path exists. Returns True for broken symbolic links""" |
paul@6 | 149 | try: |
paul@6 | 150 | lstat(path) |
paul@6 | 151 | except error: |
paul@6 | 152 | return False |
paul@6 | 153 | return True |
paul@6 | 154 | |
paul@6 | 155 | |
paul@6 | 156 | # Are two filenames really pointing to the same file? |
paul@6 | 157 | |
paul@6 | 158 | def samefile(f1, f2): |
paul@6 | 159 | """Test whether two pathnames reference the same actual file""" |
paul@6 | 160 | s1 = stat(f1) |
paul@6 | 161 | s2 = stat(f2) |
paul@6 | 162 | return samestat(s1, s2) |
paul@6 | 163 | |
paul@6 | 164 | |
paul@6 | 165 | # Are two open files really referencing the same file? |
paul@6 | 166 | # (Not necessarily the same file descriptor!) |
paul@6 | 167 | |
paul@6 | 168 | def sameopenfile(fp1, fp2): |
paul@6 | 169 | """Test whether two open file objects reference the same file""" |
paul@6 | 170 | s1 = fstat(fp1) |
paul@6 | 171 | s2 = fstat(fp2) |
paul@6 | 172 | return samestat(s1, s2) |
paul@6 | 173 | |
paul@6 | 174 | |
paul@6 | 175 | # Are two stat buffers (obtained from stat, fstat or lstat) |
paul@6 | 176 | # describing the same file? |
paul@6 | 177 | |
paul@6 | 178 | def samestat(s1, s2): |
paul@6 | 179 | """Test whether two stat buffers reference the same file""" |
paul@6 | 180 | return s1.st_ino == s2.st_ino and \ |
paul@6 | 181 | s1.st_dev == s2.st_dev |
paul@6 | 182 | |
paul@6 | 183 | |
paul@6 | 184 | # Is a path a mount point? |
paul@6 | 185 | # (Does this work for all UNIXes? Is it even guaranteed to work by Posix?) |
paul@6 | 186 | |
paul@6 | 187 | def ismount(path): |
paul@6 | 188 | """Test whether a path is a mount point""" |
paul@6 | 189 | if islink(path): |
paul@6 | 190 | # A symlink can never be a mount point |
paul@6 | 191 | return False |
paul@6 | 192 | try: |
paul@6 | 193 | s1 = lstat(path) |
paul@6 | 194 | s2 = lstat(join(path, '..')) |
paul@6 | 195 | except error: |
paul@6 | 196 | return False # It doesn't exist -- so not a mount point :-) |
paul@6 | 197 | dev1 = s1.st_dev |
paul@6 | 198 | dev2 = s2.st_dev |
paul@6 | 199 | if dev1 != dev2: |
paul@6 | 200 | return True # path/.. on a different device as path |
paul@6 | 201 | ino1 = s1.st_ino |
paul@6 | 202 | ino2 = s2.st_ino |
paul@6 | 203 | if ino1 == ino2: |
paul@6 | 204 | return True # path/.. is the same i-node as path |
paul@6 | 205 | return False |
paul@6 | 206 | |
paul@6 | 207 | |
paul@6 | 208 | # Directory tree walk. |
paul@6 | 209 | # For each directory under top (including top itself, but excluding |
paul@6 | 210 | # '.' and '..'), func(arg, dirname, filenames) is called, where |
paul@6 | 211 | # dirname is the name of the directory and filenames is the list |
paul@6 | 212 | # of files (and subdirectories etc.) in the directory. |
paul@6 | 213 | # The func may modify the filenames list, to implement a filter, |
paul@6 | 214 | # or to impose a different order of visiting. |
paul@6 | 215 | |
paul@6 | 216 | def walk(top, func, arg): |
paul@6 | 217 | """Directory tree walk with callback function. |
paul@6 | 218 | |
paul@6 | 219 | For each directory in the directory tree rooted at top (including top |
paul@6 | 220 | itself, but excluding '.' and '..'), call func(arg, dirname, fnames). |
paul@6 | 221 | dirname is the name of the directory, and fnames a list of the names of |
paul@6 | 222 | the files and subdirectories in dirname (excluding '.' and '..'). func |
paul@6 | 223 | may modify the fnames list in-place (e.g. via del or slice assignment), |
paul@6 | 224 | and walk will only recurse into the subdirectories whose names remain in |
paul@6 | 225 | fnames; this can be used to implement a filter, or to impose a specific |
paul@6 | 226 | order of visiting. No semantics are defined for, or required of, arg, |
paul@6 | 227 | beyond that arg is always passed to func. It can be used, e.g., to pass |
paul@6 | 228 | a filename pattern, or a mutable object designed to accumulate |
paul@6 | 229 | statistics. Passing None for arg is common.""" |
paul@6 | 230 | try: |
paul@6 | 231 | names = listdir(top) |
paul@6 | 232 | except error: |
paul@6 | 233 | return |
paul@6 | 234 | func(arg, top, names) |
paul@6 | 235 | for name in names: |
paul@6 | 236 | name = join(top, name) |
paul@6 | 237 | try: |
paul@6 | 238 | st = lstat(name) |
paul@6 | 239 | except error: |
paul@6 | 240 | continue |
paul@6 | 241 | if stat.S_ISDIR(st.st_mode): |
paul@6 | 242 | walk(name, func, arg) |
paul@6 | 243 | |
paul@6 | 244 | |
paul@6 | 245 | # Expand paths beginning with '~' or '~user'. |
paul@6 | 246 | # '~' means $HOME; '~user' means that user's home directory. |
paul@6 | 247 | # If the path doesn't begin with '~', or if the user or $HOME is unknown, |
paul@6 | 248 | # the path is returned unchanged (leaving error reporting to whatever |
paul@6 | 249 | # function is called with the expanded path as argument). |
paul@6 | 250 | # See also module 'glob' for expansion of *, ? and [...] in pathnames. |
paul@6 | 251 | # (A function should also be defined to do full *sh-style environment |
paul@6 | 252 | # variable expansion.) |
paul@6 | 253 | |
paul@6 | 254 | def expanduser(path): |
paul@6 | 255 | """Expand ~ and ~user constructions. If user or $HOME is unknown, |
paul@6 | 256 | do nothing.""" |
paul@6 | 257 | if not path.startswith('~'): |
paul@6 | 258 | return path |
paul@6 | 259 | i = path.find('/', 1) |
paul@6 | 260 | if i < 0: |
paul@6 | 261 | i = len(path) |
paul@6 | 262 | if i == 1: |
paul@6 | 263 | if 'HOME' not in environ: |
paul@6 | 264 | import pwd |
paul@6 | 265 | userhome = pwd.getpwuid(getuid()).pw_dir |
paul@6 | 266 | else: |
paul@6 | 267 | userhome = environ['HOME'] |
paul@6 | 268 | else: |
paul@6 | 269 | import pwd |
paul@6 | 270 | try: |
paul@6 | 271 | pwent = pwd.getpwnam(path[1:i]) |
paul@6 | 272 | except KeyError: |
paul@6 | 273 | return path |
paul@6 | 274 | userhome = pwent.pw_dir |
paul@6 | 275 | userhome = userhome.rstrip('/') |
paul@6 | 276 | return (userhome + path[i:]) or '/' |
paul@6 | 277 | |
paul@6 | 278 | |
paul@6 | 279 | # Expand paths containing shell variable substitutions. |
paul@6 | 280 | # This expands the forms $variable and ${variable} only. |
paul@6 | 281 | # Non-existent variables are left unchanged. |
paul@6 | 282 | |
paul@6 | 283 | _varprog = None |
paul@6 | 284 | |
paul@6 | 285 | def expandvars(path): |
paul@6 | 286 | """Expand shell variables of form $var and ${var}. Unknown variables |
paul@6 | 287 | are left unchanged.""" |
paul@6 | 288 | global _varprog |
paul@6 | 289 | if '$' not in path: |
paul@6 | 290 | return path |
paul@6 | 291 | if not _varprog: |
paul@6 | 292 | import re |
paul@6 | 293 | _varprog = re.compile(r'\$(\w+|\{[^}]*\})') |
paul@6 | 294 | i = 0 |
paul@6 | 295 | while True: |
paul@6 | 296 | m = _varprog.search(path, i) |
paul@6 | 297 | if not m: |
paul@6 | 298 | break |
paul@6 | 299 | i, j = m.span(0) |
paul@6 | 300 | name = m.group(1) |
paul@6 | 301 | if name.startswith('{') and name.endswith('}'): |
paul@6 | 302 | name = name[1:-1] |
paul@6 | 303 | if name in environ: |
paul@6 | 304 | tail = path[j:] |
paul@6 | 305 | path = path[:i] + environ[name] |
paul@6 | 306 | i = len(path) |
paul@6 | 307 | path += tail |
paul@6 | 308 | else: |
paul@6 | 309 | i = j |
paul@6 | 310 | return path |
paul@6 | 311 | |
paul@6 | 312 | |
paul@6 | 313 | # Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A/B. |
paul@6 | 314 | # It should be understood that this may change the meaning of the path |
paul@6 | 315 | # if it contains symbolic links! |
paul@6 | 316 | |
paul@6 | 317 | def normpath(path): |
paul@6 | 318 | """Normalize path, eliminating double slashes, etc.""" |
paul@6 | 319 | # Preserve unicode (if path is unicode) |
paul@6 | 320 | slash, dot = (u'/', u'.') if isinstance(path, _unicode) else ('/', '.') |
paul@6 | 321 | if path == '': |
paul@6 | 322 | return dot |
paul@6 | 323 | initial_slashes = path.startswith('/') |
paul@6 | 324 | # POSIX allows one or two initial slashes, but treats three or more |
paul@6 | 325 | # as single slash. |
paul@6 | 326 | if (initial_slashes and |
paul@6 | 327 | path.startswith('//') and not path.startswith('///')): |
paul@6 | 328 | initial_slashes = 2 |
paul@6 | 329 | comps = path.split('/') |
paul@6 | 330 | new_comps = [] |
paul@6 | 331 | for comp in comps: |
paul@6 | 332 | if comp in ('', '.'): |
paul@6 | 333 | continue |
paul@6 | 334 | if (comp != '..' or (not initial_slashes and not new_comps) or |
paul@6 | 335 | (new_comps and new_comps[-1] == '..')): |
paul@6 | 336 | new_comps.append(comp) |
paul@6 | 337 | elif new_comps: |
paul@6 | 338 | new_comps.pop() |
paul@6 | 339 | comps = new_comps |
paul@6 | 340 | path = slash.join(comps) |
paul@6 | 341 | if initial_slashes: |
paul@6 | 342 | path = slash*initial_slashes + path |
paul@6 | 343 | return path or dot |
paul@6 | 344 | |
paul@6 | 345 | |
paul@6 | 346 | def abspath(path): |
paul@6 | 347 | """Return an absolute path.""" |
paul@6 | 348 | if not isabs(path): |
paul@6 | 349 | if isinstance(path, _unicode): |
paul@6 | 350 | cwd = getcwdu() |
paul@6 | 351 | else: |
paul@6 | 352 | cwd = getcwd() |
paul@6 | 353 | path = join(cwd, path) |
paul@6 | 354 | return normpath(path) |
paul@6 | 355 | |
paul@6 | 356 | |
paul@6 | 357 | # Return a canonical path (i.e. the absolute location of a file on the |
paul@6 | 358 | # filesystem). |
paul@6 | 359 | |
paul@6 | 360 | def realpath(filename): |
paul@6 | 361 | """Return the canonical path of the specified filename, eliminating any |
paul@6 | 362 | symbolic links encountered in the path.""" |
paul@6 | 363 | if isabs(filename): |
paul@6 | 364 | bits = ['/'] + filename.split('/')[1:] |
paul@6 | 365 | else: |
paul@6 | 366 | bits = [''] + filename.split('/') |
paul@6 | 367 | |
paul@6 | 368 | for i in range(2, len(bits)+1): |
paul@6 | 369 | component = join(*bits[0:i]) |
paul@6 | 370 | # Resolve symbolic links. |
paul@6 | 371 | if islink(component): |
paul@6 | 372 | resolved = _resolve_link(component) |
paul@6 | 373 | if resolved is None: |
paul@6 | 374 | # Infinite loop -- return original component + rest of the path |
paul@6 | 375 | return abspath(join(*([component] + bits[i:]))) |
paul@6 | 376 | else: |
paul@6 | 377 | newpath = join(*([resolved] + bits[i:])) |
paul@6 | 378 | return realpath(newpath) |
paul@6 | 379 | |
paul@6 | 380 | return abspath(filename) |
paul@6 | 381 | |
paul@6 | 382 | |
paul@6 | 383 | def _resolve_link(path): |
paul@6 | 384 | """Internal helper function. Takes a path and follows symlinks |
paul@6 | 385 | until we either arrive at something that isn't a symlink, or |
paul@6 | 386 | encounter a path we've seen before (meaning that there's a loop). |
paul@6 | 387 | """ |
paul@6 | 388 | paths_seen = set() |
paul@6 | 389 | while islink(path): |
paul@6 | 390 | if path in paths_seen: |
paul@6 | 391 | # Already seen this path, so we must have a symlink loop |
paul@6 | 392 | return None |
paul@6 | 393 | paths_seen.add(path) |
paul@6 | 394 | # Resolve where the link points to |
paul@6 | 395 | resolved = readlink(path) |
paul@6 | 396 | if not isabs(resolved): |
paul@6 | 397 | dir = dirname(path) |
paul@6 | 398 | path = normpath(join(dir, resolved)) |
paul@6 | 399 | else: |
paul@6 | 400 | path = normpath(resolved) |
paul@6 | 401 | return path |
paul@6 | 402 | |
paul@6 | 403 | supports_unicode_filenames = (sys.platform == 'darwin') |
paul@6 | 404 | |
paul@6 | 405 | def relpath(path, start=curdir): |
paul@6 | 406 | """Return a relative version of a path""" |
paul@6 | 407 | |
paul@6 | 408 | if not path: |
paul@6 | 409 | raise ValueError("no path specified") |
paul@6 | 410 | |
paul@6 | 411 | start_list = [x for x in abspath(start).split(sep) if x] |
paul@6 | 412 | path_list = [x for x in abspath(path).split(sep) if x] |
paul@6 | 413 | |
paul@6 | 414 | # Work out how much of the filepath is shared by start and path. |
paul@6 | 415 | i = len(commonprefix([start_list, path_list])) |
paul@6 | 416 | |
paul@6 | 417 | rel_list = [pardir] * (len(start_list)-i) + path_list[i:] |
paul@6 | 418 | if not rel_list: |
paul@6 | 419 | return curdir |
paul@6 | 420 | return join(*rel_list) |