Coverage for debputy/filesystem_scan.py: 60%
317 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-01-22 14:29 +0100
« prev ^ index » next coverage.py v6.5.0, created at 2023-01-22 14:29 +0100
1import operator
2import os
3from abc import ABC
4from collections.abc import Container
5from typing import List, TYPE_CHECKING, Iterable, Dict, Optional, Iterator, Tuple
6from weakref import ref, ReferenceType
8from debputy.intermediate_manifest import PathType
10if TYPE_CHECKING:
11 # Circular dependency
12 from debputy.highlevel_manifest import ManifestPathRule
15BY_BASENAME = operator.attrgetter('basename')
18class FSPath:
20 __slots__ = ('_basename', '_parent_dir', '_children', '_path_cache', '_parent_path_cache', '__weakref__',)
22 def __init__(self,
23 basename: str,
24 parent: Optional['FSPath'],
25 children: Optional[Dict[str, 'FSPath']] = None,
26 ):
27 self._basename = basename
28 self._path_cache: Optional[str] = None
29 self._parent_path_cache: Optional[str] = None
30 self._children = children
31 # The self._parent_dir = None is to create `_parent_dir` because the parent_dir setter calls
32 # is_orphaned, which assumes self._parent_dir is an attribute.
33 self._parent_dir: Optional[ReferenceType['FSPath']] = None
34 if parent is not None:
35 self.parent_dir = parent
37 def __repr__(self):
38 return (f'{self.__class__.__name__}({self._orphan_safe_path()!r},'
39 f' is_file={self.is_file},'
40 f' is_dir={self.is_dir},'
41 f' is_symlink={self.is_symlink},'
42 f' children_len={len(self._children) if self._children else 0})'
43 )
45 @property
46 def basename(self) -> str:
47 return self._basename
49 @basename.setter
50 def basename(self, new_name: str) -> None:
51 if self.is_orphaned:
52 self._basename = new_name
53 return
54 parent = self.parent_dir
55 self.parent_dir = None
56 self._basename = new_name
57 self.parent_dir = parent
59 @property
60 def children(self) -> Iterable['FSPath']:
61 if self._children is not None:
62 yield from self._children.values()
64 def lookup(self, path: str) -> Optional['FSPath']:
65 match, missing = self.attempt_lookup(path)
66 if missing:
67 return None
68 return match
70 def attempt_lookup(self, path: str) -> Tuple['FSPath', List[str]]:
71 dir_parts = path.split('/')
72 current = self
73 for no, dir_part in enumerate(dir_parts):
74 if dir_part == '.': 74 ↛ 75line 74 didn't jump to line 75, because the condition on line 74 was never true
75 continue
76 if dir_part == '..': 76 ↛ 77line 76 didn't jump to line 77, because the condition on line 76 was never true
77 current = current.parent_dir
78 if current is None:
79 raise ValueError(f'The path "{path}" escapes the root dir')
80 continue
81 try:
82 current = current[dir_part]
83 except KeyError:
84 return current, dir_parts[no:]
85 return current, []
87 def all_paths(self) -> Iterable['FSPath']:
88 yield self
89 if not self.is_dir: 89 ↛ 90line 89 didn't jump to line 90, because the condition on line 89 was never true
90 return
91 by_basename = BY_BASENAME
92 stack = sorted(self.children, key=by_basename, reverse=True)
93 while stack:
94 current = stack.pop()
95 yield current
96 if current.is_dir:
97 stack.extend(sorted(current.children, key=by_basename, reverse=True))
99 def walk(self) -> Iterable[Tuple['FSPath', List['FSPath']]]:
100 if not self.is_dir:
101 yield self, []
102 return
103 by_basename = BY_BASENAME
104 stack = [self]
105 while stack:
106 current = stack.pop()
107 children = sorted(current.children, key=by_basename)
108 assert not children or current.is_dir
109 yield current, children
110 stack.extend(reversed(children))
112 def _orphan_safe_path(self) -> str:
113 return self.path if not self.is_orphaned else f'<orphaned>/{self.basename}'
115 @property
116 def is_orphaned(self) -> bool:
117 return self._parent_dir is None or self._parent_dir() is None
119 def __getitem__(self, key) -> 'FSPath':
120 if self._children is None: 120 ↛ 121line 120 didn't jump to line 121, because the condition on line 120 was never true
121 raise KeyError(f"{key} (note: {self._orphan_safe_path()!r} has no children)")
122 if isinstance(key, FSPath): 122 ↛ 123line 122 didn't jump to line 123, because the condition on line 122 was never true
123 key = key.basename
124 return self._children[key]
126 def __delitem__(self, key) -> None:
127 del self._children[key]
129 def get(self, key: str) -> 'Optional[FSPath]':
130 try:
131 return self[key]
132 except KeyError:
133 return None
135 def __contains__(self, item: object) -> bool:
136 k = item
137 v = item
138 if isinstance(item, str):
139 v = None
140 elif isinstance(item, FSPath):
141 k = item.basename
142 else:
143 return False
144 m = self.get(k)
145 return m is not None and (v is None or v == m)
147 def _add_child(self, child: 'FSPath') -> None:
148 if not self.is_dir: 148 ↛ 149line 148 didn't jump to line 149, because the condition on line 148 was never true
149 raise TypeError(f"{self._orphan_safe_path()!r} is not a directory")
150 if self._children is None:
151 self._children = {}
153 conflict_child = self.get(child.basename)
154 if conflict_child is not None: 154 ↛ 155line 154 didn't jump to line 155, because the condition on line 154 was never true
155 conflict_child.parent_dir = None
156 self._children[child.basename] = child
158 @property
159 def tar_path(self) -> str:
160 path = self.path
161 if self.is_dir:
162 return path + '/'
163 return path
165 @property
166 def path(self) -> str:
167 parent_path = self.parent_dir_path
168 if self._parent_path_cache is not None and self._parent_path_cache == parent_path:
169 return self._path_cache
170 self._parent_path_cache = parent_path
171 self._path_cache = os.path.join(parent_path, self.basename)
172 return self._path_cache
174 @property
175 def parent_dir(self) -> Optional['FSPath']:
176 p = self._parent_dir()
177 if p is None: 177 ↛ 178line 177 didn't jump to line 178, because the condition on line 177 was never true
178 raise ReferenceError(f"The path {self.basename} is orphaned! {self.__class__.__name__}")
179 return p
181 @parent_dir.setter
182 def parent_dir(self, new_parent: Optional['FSPath']) -> None:
183 if new_parent is not None and not new_parent.is_dir: 183 ↛ 184line 183 didn't jump to line 184, because the condition on line 183 was never true
184 raise ValueError(f"The parent {new_parent._orphan_safe_path()} must be a directory")
185 if not self.is_orphaned: 185 ↛ 186line 185 didn't jump to line 186, because the condition on line 185 was never true
186 old_parent = self.parent_dir
187 del old_parent._children[self.basename]
188 if new_parent is not None: 188 ↛ 192line 188 didn't jump to line 192, because the condition on line 188 was never false
189 self._parent_dir = ref(new_parent)
190 new_parent._add_child(self)
191 else:
192 self._parent_dir = None
193 self._parent_path_cache = None
195 @property
196 def parent_dir_path(self) -> Optional[str]:
197 return self.parent_dir.path
199 def stat(self) -> os.stat_result:
200 raise NotImplementedError()
202 def readlink(self) -> str:
203 raise NotImplementedError()
205 @property
206 def fs_path(self) -> str:
207 raise NotImplementedError()
209 @property
210 def is_dir(self) -> bool:
211 raise NotImplementedError()
213 @property
214 def is_file(self) -> bool:
215 raise NotImplementedError()
217 @property
218 def is_symlink(self) -> bool:
219 raise NotImplementedError()
221 @property
222 def has_fs_path(self) -> bool:
223 raise NotImplementedError()
226class FSPathDirEntry(FSPath):
227 __slots__ = ('_dir_entry',)
229 def __init__(self,
230 dir_entry: os.DirEntry,
231 parent: 'FSPath',
232 children: Optional[Dict[str, 'FSPath[str, FSPath]']] = None,
233 ):
234 self._dir_entry = dir_entry
235 super().__init__(
236 dir_entry.name,
237 parent=parent,
238 children=children,
239 )
241 @property
242 def is_dir(self) -> bool:
243 return self._dir_entry.is_dir(follow_symlinks=False)
245 @property
246 def is_file(self) -> bool:
247 return self._dir_entry.is_file(follow_symlinks=False)
249 @property
250 def is_symlink(self) -> bool:
251 return self._dir_entry.is_symlink()
253 def stat(self) -> os.stat_result:
254 return self._dir_entry.stat(follow_symlinks=False)
256 def readlink(self) -> str:
257 if not self.is_symlink:
258 raise TypeError(f'"{self._orphan_safe_path()!r}" is not a symlink')
259 return os.readlink(self.fs_path)
261 @property
262 def fs_path(self) -> str:
263 return self._dir_entry.path
265 @property
266 def has_fs_path(self) -> bool:
267 return True
270class VirtualFSPathBase(FSPath, ABC):
272 __slots__ = ()
274 @property
275 def has_fs_path(self) -> bool:
276 return False
278 def stat(self) -> os.stat_result:
279 raise TypeError("stat() is only applicable to paths backed by the file system. The path"
280 f" {self._orphan_safe_path()!r} is purely virtual")
282 @property
283 def fs_path(self) -> str:
284 raise TypeError("fs_path is only applicable to paths backed by the file system. The path"
285 f" {self._orphan_safe_path()!r} is purely virtual")
288class FSRootDir(FSPath):
290 __slots__ = ('_fs_path',)
292 def __init__(self, fs_path: Optional[str] = None) -> None:
293 self._fs_path = fs_path
294 super().__init__('.', None, children={})
296 @property
297 def is_orphaned(self) -> bool:
298 return False
300 def _orphan_safe_path(self) -> str:
301 return self.basename
303 @property
304 def path(self) -> str:
305 return self.basename
307 @property
308 def parent_dir(self) -> Optional['FSPath']:
309 return None
311 @property
312 def parent_dir_path(self) -> Optional[str]:
313 return None
315 @property
316 def is_dir(self) -> bool:
317 return True
319 @property
320 def is_file(self) -> bool:
321 return False
323 @property
324 def is_symlink(self) -> bool:
325 return False
327 def readlink(self) -> str:
328 raise TypeError(f'"{self._orphan_safe_path()!r}" is a directory; not a symlink')
330 @property
331 def has_fs_path(self) -> bool:
332 return self._fs_path is not None
334 def stat(self) -> os.stat_result:
335 if not self.has_fs_path:
336 raise TypeError("stat() is only applicable to paths backed by the file system. The path"
337 f" {self._orphan_safe_path()!r} is purely virtual")
338 return os.stat(self.fs_path)
340 @property
341 def fs_path(self) -> str:
342 if not self.has_fs_path:
343 raise TypeError("fs_path is only applicable to paths backed by the file system. The path"
344 f" {self._orphan_safe_path()!r} is purely virtual")
345 return self._fs_path
348class VirtualDirectoryFSPath(VirtualFSPathBase):
350 __slots__ = ()
352 def __init__(self, basename: str, parent_dir: FSPath) -> None:
353 super().__init__(
354 basename,
355 parent_dir,
356 )
358 @property
359 def is_dir(self) -> bool:
360 return True
362 @property
363 def is_file(self) -> bool:
364 return False
366 @property
367 def is_symlink(self) -> bool:
368 return False
370 def readlink(self) -> str:
371 raise TypeError(f'"{self._orphan_safe_path()!r}" is a directory; not a symlink')
374class FSPathVirtualPath(VirtualFSPathBase):
376 __slots__ = ('_path_info',)
378 def __init__(self, path_info: 'ManifestPathRule', parent_dir: FSPath):
379 spec_path = path_info.member_path
380 assert spec_path is not None
381 if path_info.ensure_path_type is None or not path_info.ensure_path_type.can_be_virtual:
382 raise ValueError("path_info.ensure_path_type must be not None and have can_be_virtual=True")
383 super().__init__(
384 os.path.basename(spec_path),
385 parent_dir,
386 )
387 self._path_info = path_info
389 @property
390 def is_dir(self) -> bool:
391 return self._path_info.ensure_path_type == PathType.DIRECTORY
393 @property
394 def is_file(self) -> bool:
395 return self._path_info.ensure_path_type == PathType.FILE
397 @property
398 def is_symlink(self) -> bool:
399 return self._path_info.ensure_path_type == PathType.SYMLINK
401 def readlink(self) -> str:
402 if not self.is_symlink:
403 raise TypeError(f'"{self._orphan_safe_path()!r}" is a directory; not a symlink')
404 return self._path_info.link_target
407class VirtualTestPath(VirtualFSPathBase):
409 __slots__ = ('_path_type',)
411 def __init__(self,
412 basename,
413 parent_dir: Optional[FSPath],
414 is_dir=False,
415 is_file=False,
416 ) -> None:
417 if is_file == is_dir: 417 ↛ 418line 417 didn't jump to line 418, because the condition on line 417 was never true
418 raise ValueError("Path cannot both be file and directory at the same time")
419 self._path_type = PathType.DIRECTORY if is_dir else PathType.FILE
420 super().__init__(
421 basename,
422 parent=parent_dir,
423 )
425 @property
426 def is_dir(self) -> bool:
427 return self._path_type == PathType.DIRECTORY
429 @property
430 def is_file(self) -> bool:
431 return self._path_type == PathType.FILE
433 @property
434 def is_symlink(self) -> bool:
435 return self._path_type == PathType.SYMLINK
437 def readlink(self) -> str:
438 assert not self.is_symlink
439 raise TypeError(f"readlink is only valid for symlinks ({self.path!r})")
442BY_NAME = operator.attrgetter('basename')
445def _scan_dir(parent: FSPath) -> Iterable['FSPath']:
447 children = (dir_entry for dir_entry in os.scandir(parent.fs_path) if dir_entry.name not in ('.', '..'))
448 children = (FSPathDirEntry(dir_entry, parent)
449 for dir_entry in children)
450 return sorted(children, key=BY_NAME, reverse=True)
453def build_fs_from_root_dir(root_dir) -> FSPath:
454 # We cannot use os.walk as it does not produce the right order. We wanted "sorted DFS (dir/files interleaved,
455 # but symlinks last)" and os.walk can give use "unsorted DFS (child first)" or "sorted BFS (files first,
456 # then dirs)" - neither of which is what we need.
457 #
458 # Test with:
459 #
460 # etc/
461 # etc/apache2/
462 # etc/apache2/...
463 # etc/cgitrc (<-- non-directory)
464 # etc/... (<-- any non-symlink file type here as long as it sort after cgitrc)
465 # ...
466 # <... followed by all symlinks ordered as above ...>
467 #
468 # The listed order is the correct order for emulating dpkg-deb but os.walk does not enable us to generate
469 # that order.
470 #
471 fs = FSRootDir(fs_path=root_dir)
472 stack: List[FSPath] = [fs]
473 while stack:
474 current = stack.pop()
475 if not current.is_dir:
476 continue
477 child_paths = _scan_dir(current)
478 if current.path == '.':
479 child_paths = (t for t in child_paths if t.path != './DEBIAN')
480 if 'DEBIAN' in current:
481 del current['DEBIAN']
482 stack.extend(child_paths)
483 return fs