Coverage for debputy/filesystem_scan.py: 60%

317 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-01-22 14:29 +0100

1import operator 

2import os 

3from abc import ABC 

4from collections.abc import Container 

5from typing import List, TYPE_CHECKING, Iterable, Dict, Optional, Iterator, Tuple 

6from weakref import ref, ReferenceType 

7 

8from debputy.intermediate_manifest import PathType 

9 

10if TYPE_CHECKING: 

11 # Circular dependency 

12 from debputy.highlevel_manifest import ManifestPathRule 

13 

14 

15BY_BASENAME = operator.attrgetter('basename') 

16 

17 

18class FSPath: 

19 

20 __slots__ = ('_basename', '_parent_dir', '_children', '_path_cache', '_parent_path_cache', '__weakref__',) 

21 

22 def __init__(self, 

23 basename: str, 

24 parent: Optional['FSPath'], 

25 children: Optional[Dict[str, 'FSPath']] = None, 

26 ): 

27 self._basename = basename 

28 self._path_cache: Optional[str] = None 

29 self._parent_path_cache: Optional[str] = None 

30 self._children = children 

31 # The self._parent_dir = None is to create `_parent_dir` because the parent_dir setter calls 

32 # is_orphaned, which assumes self._parent_dir is an attribute. 

33 self._parent_dir: Optional[ReferenceType['FSPath']] = None 

34 if parent is not None: 

35 self.parent_dir = parent 

36 

37 def __repr__(self): 

38 return (f'{self.__class__.__name__}({self._orphan_safe_path()!r},' 

39 f' is_file={self.is_file},' 

40 f' is_dir={self.is_dir},' 

41 f' is_symlink={self.is_symlink},' 

42 f' children_len={len(self._children) if self._children else 0})' 

43 ) 

44 

45 @property 

46 def basename(self) -> str: 

47 return self._basename 

48 

49 @basename.setter 

50 def basename(self, new_name: str) -> None: 

51 if self.is_orphaned: 

52 self._basename = new_name 

53 return 

54 parent = self.parent_dir 

55 self.parent_dir = None 

56 self._basename = new_name 

57 self.parent_dir = parent 

58 

59 @property 

60 def children(self) -> Iterable['FSPath']: 

61 if self._children is not None: 

62 yield from self._children.values() 

63 

64 def lookup(self, path: str) -> Optional['FSPath']: 

65 match, missing = self.attempt_lookup(path) 

66 if missing: 

67 return None 

68 return match 

69 

70 def attempt_lookup(self, path: str) -> Tuple['FSPath', List[str]]: 

71 dir_parts = path.split('/') 

72 current = self 

73 for no, dir_part in enumerate(dir_parts): 

74 if dir_part == '.': 74 ↛ 75line 74 didn't jump to line 75, because the condition on line 74 was never true

75 continue 

76 if dir_part == '..': 76 ↛ 77line 76 didn't jump to line 77, because the condition on line 76 was never true

77 current = current.parent_dir 

78 if current is None: 

79 raise ValueError(f'The path "{path}" escapes the root dir') 

80 continue 

81 try: 

82 current = current[dir_part] 

83 except KeyError: 

84 return current, dir_parts[no:] 

85 return current, [] 

86 

87 def all_paths(self) -> Iterable['FSPath']: 

88 yield self 

89 if not self.is_dir: 89 ↛ 90line 89 didn't jump to line 90, because the condition on line 89 was never true

90 return 

91 by_basename = BY_BASENAME 

92 stack = sorted(self.children, key=by_basename, reverse=True) 

93 while stack: 

94 current = stack.pop() 

95 yield current 

96 if current.is_dir: 

97 stack.extend(sorted(current.children, key=by_basename, reverse=True)) 

98 

99 def walk(self) -> Iterable[Tuple['FSPath', List['FSPath']]]: 

100 if not self.is_dir: 

101 yield self, [] 

102 return 

103 by_basename = BY_BASENAME 

104 stack = [self] 

105 while stack: 

106 current = stack.pop() 

107 children = sorted(current.children, key=by_basename) 

108 assert not children or current.is_dir 

109 yield current, children 

110 stack.extend(reversed(children)) 

111 

112 def _orphan_safe_path(self) -> str: 

113 return self.path if not self.is_orphaned else f'<orphaned>/{self.basename}' 

114 

115 @property 

116 def is_orphaned(self) -> bool: 

117 return self._parent_dir is None or self._parent_dir() is None 

118 

119 def __getitem__(self, key) -> 'FSPath': 

120 if self._children is None: 120 ↛ 121line 120 didn't jump to line 121, because the condition on line 120 was never true

121 raise KeyError(f"{key} (note: {self._orphan_safe_path()!r} has no children)") 

122 if isinstance(key, FSPath): 122 ↛ 123line 122 didn't jump to line 123, because the condition on line 122 was never true

123 key = key.basename 

124 return self._children[key] 

125 

126 def __delitem__(self, key) -> None: 

127 del self._children[key] 

128 

129 def get(self, key: str) -> 'Optional[FSPath]': 

130 try: 

131 return self[key] 

132 except KeyError: 

133 return None 

134 

135 def __contains__(self, item: object) -> bool: 

136 k = item 

137 v = item 

138 if isinstance(item, str): 

139 v = None 

140 elif isinstance(item, FSPath): 

141 k = item.basename 

142 else: 

143 return False 

144 m = self.get(k) 

145 return m is not None and (v is None or v == m) 

146 

147 def _add_child(self, child: 'FSPath') -> None: 

148 if not self.is_dir: 148 ↛ 149line 148 didn't jump to line 149, because the condition on line 148 was never true

149 raise TypeError(f"{self._orphan_safe_path()!r} is not a directory") 

150 if self._children is None: 

151 self._children = {} 

152 

153 conflict_child = self.get(child.basename) 

154 if conflict_child is not None: 154 ↛ 155line 154 didn't jump to line 155, because the condition on line 154 was never true

155 conflict_child.parent_dir = None 

156 self._children[child.basename] = child 

157 

158 @property 

159 def tar_path(self) -> str: 

160 path = self.path 

161 if self.is_dir: 

162 return path + '/' 

163 return path 

164 

165 @property 

166 def path(self) -> str: 

167 parent_path = self.parent_dir_path 

168 if self._parent_path_cache is not None and self._parent_path_cache == parent_path: 

169 return self._path_cache 

170 self._parent_path_cache = parent_path 

171 self._path_cache = os.path.join(parent_path, self.basename) 

172 return self._path_cache 

173 

174 @property 

175 def parent_dir(self) -> Optional['FSPath']: 

176 p = self._parent_dir() 

177 if p is None: 177 ↛ 178line 177 didn't jump to line 178, because the condition on line 177 was never true

178 raise ReferenceError(f"The path {self.basename} is orphaned! {self.__class__.__name__}") 

179 return p 

180 

181 @parent_dir.setter 

182 def parent_dir(self, new_parent: Optional['FSPath']) -> None: 

183 if new_parent is not None and not new_parent.is_dir: 183 ↛ 184line 183 didn't jump to line 184, because the condition on line 183 was never true

184 raise ValueError(f"The parent {new_parent._orphan_safe_path()} must be a directory") 

185 if not self.is_orphaned: 185 ↛ 186line 185 didn't jump to line 186, because the condition on line 185 was never true

186 old_parent = self.parent_dir 

187 del old_parent._children[self.basename] 

188 if new_parent is not None: 188 ↛ 192line 188 didn't jump to line 192, because the condition on line 188 was never false

189 self._parent_dir = ref(new_parent) 

190 new_parent._add_child(self) 

191 else: 

192 self._parent_dir = None 

193 self._parent_path_cache = None 

194 

195 @property 

196 def parent_dir_path(self) -> Optional[str]: 

197 return self.parent_dir.path 

198 

199 def stat(self) -> os.stat_result: 

200 raise NotImplementedError() 

201 

202 def readlink(self) -> str: 

203 raise NotImplementedError() 

204 

205 @property 

206 def fs_path(self) -> str: 

207 raise NotImplementedError() 

208 

209 @property 

210 def is_dir(self) -> bool: 

211 raise NotImplementedError() 

212 

213 @property 

214 def is_file(self) -> bool: 

215 raise NotImplementedError() 

216 

217 @property 

218 def is_symlink(self) -> bool: 

219 raise NotImplementedError() 

220 

221 @property 

222 def has_fs_path(self) -> bool: 

223 raise NotImplementedError() 

224 

225 

226class FSPathDirEntry(FSPath): 

227 __slots__ = ('_dir_entry',) 

228 

229 def __init__(self, 

230 dir_entry: os.DirEntry, 

231 parent: 'FSPath', 

232 children: Optional[Dict[str, 'FSPath[str, FSPath]']] = None, 

233 ): 

234 self._dir_entry = dir_entry 

235 super().__init__( 

236 dir_entry.name, 

237 parent=parent, 

238 children=children, 

239 ) 

240 

241 @property 

242 def is_dir(self) -> bool: 

243 return self._dir_entry.is_dir(follow_symlinks=False) 

244 

245 @property 

246 def is_file(self) -> bool: 

247 return self._dir_entry.is_file(follow_symlinks=False) 

248 

249 @property 

250 def is_symlink(self) -> bool: 

251 return self._dir_entry.is_symlink() 

252 

253 def stat(self) -> os.stat_result: 

254 return self._dir_entry.stat(follow_symlinks=False) 

255 

256 def readlink(self) -> str: 

257 if not self.is_symlink: 

258 raise TypeError(f'"{self._orphan_safe_path()!r}" is not a symlink') 

259 return os.readlink(self.fs_path) 

260 

261 @property 

262 def fs_path(self) -> str: 

263 return self._dir_entry.path 

264 

265 @property 

266 def has_fs_path(self) -> bool: 

267 return True 

268 

269 

270class VirtualFSPathBase(FSPath, ABC): 

271 

272 __slots__ = () 

273 

274 @property 

275 def has_fs_path(self) -> bool: 

276 return False 

277 

278 def stat(self) -> os.stat_result: 

279 raise TypeError("stat() is only applicable to paths backed by the file system. The path" 

280 f" {self._orphan_safe_path()!r} is purely virtual") 

281 

282 @property 

283 def fs_path(self) -> str: 

284 raise TypeError("fs_path is only applicable to paths backed by the file system. The path" 

285 f" {self._orphan_safe_path()!r} is purely virtual") 

286 

287 

288class FSRootDir(FSPath): 

289 

290 __slots__ = ('_fs_path',) 

291 

292 def __init__(self, fs_path: Optional[str] = None) -> None: 

293 self._fs_path = fs_path 

294 super().__init__('.', None, children={}) 

295 

296 @property 

297 def is_orphaned(self) -> bool: 

298 return False 

299 

300 def _orphan_safe_path(self) -> str: 

301 return self.basename 

302 

303 @property 

304 def path(self) -> str: 

305 return self.basename 

306 

307 @property 

308 def parent_dir(self) -> Optional['FSPath']: 

309 return None 

310 

311 @property 

312 def parent_dir_path(self) -> Optional[str]: 

313 return None 

314 

315 @property 

316 def is_dir(self) -> bool: 

317 return True 

318 

319 @property 

320 def is_file(self) -> bool: 

321 return False 

322 

323 @property 

324 def is_symlink(self) -> bool: 

325 return False 

326 

327 def readlink(self) -> str: 

328 raise TypeError(f'"{self._orphan_safe_path()!r}" is a directory; not a symlink') 

329 

330 @property 

331 def has_fs_path(self) -> bool: 

332 return self._fs_path is not None 

333 

334 def stat(self) -> os.stat_result: 

335 if not self.has_fs_path: 

336 raise TypeError("stat() is only applicable to paths backed by the file system. The path" 

337 f" {self._orphan_safe_path()!r} is purely virtual") 

338 return os.stat(self.fs_path) 

339 

340 @property 

341 def fs_path(self) -> str: 

342 if not self.has_fs_path: 

343 raise TypeError("fs_path is only applicable to paths backed by the file system. The path" 

344 f" {self._orphan_safe_path()!r} is purely virtual") 

345 return self._fs_path 

346 

347 

348class VirtualDirectoryFSPath(VirtualFSPathBase): 

349 

350 __slots__ = () 

351 

352 def __init__(self, basename: str, parent_dir: FSPath) -> None: 

353 super().__init__( 

354 basename, 

355 parent_dir, 

356 ) 

357 

358 @property 

359 def is_dir(self) -> bool: 

360 return True 

361 

362 @property 

363 def is_file(self) -> bool: 

364 return False 

365 

366 @property 

367 def is_symlink(self) -> bool: 

368 return False 

369 

370 def readlink(self) -> str: 

371 raise TypeError(f'"{self._orphan_safe_path()!r}" is a directory; not a symlink') 

372 

373 

374class FSPathVirtualPath(VirtualFSPathBase): 

375 

376 __slots__ = ('_path_info',) 

377 

378 def __init__(self, path_info: 'ManifestPathRule', parent_dir: FSPath): 

379 spec_path = path_info.member_path 

380 assert spec_path is not None 

381 if path_info.ensure_path_type is None or not path_info.ensure_path_type.can_be_virtual: 

382 raise ValueError("path_info.ensure_path_type must be not None and have can_be_virtual=True") 

383 super().__init__( 

384 os.path.basename(spec_path), 

385 parent_dir, 

386 ) 

387 self._path_info = path_info 

388 

389 @property 

390 def is_dir(self) -> bool: 

391 return self._path_info.ensure_path_type == PathType.DIRECTORY 

392 

393 @property 

394 def is_file(self) -> bool: 

395 return self._path_info.ensure_path_type == PathType.FILE 

396 

397 @property 

398 def is_symlink(self) -> bool: 

399 return self._path_info.ensure_path_type == PathType.SYMLINK 

400 

401 def readlink(self) -> str: 

402 if not self.is_symlink: 

403 raise TypeError(f'"{self._orphan_safe_path()!r}" is a directory; not a symlink') 

404 return self._path_info.link_target 

405 

406 

407class VirtualTestPath(VirtualFSPathBase): 

408 

409 __slots__ = ('_path_type',) 

410 

411 def __init__(self, 

412 basename, 

413 parent_dir: Optional[FSPath], 

414 is_dir=False, 

415 is_file=False, 

416 ) -> None: 

417 if is_file == is_dir: 417 ↛ 418line 417 didn't jump to line 418, because the condition on line 417 was never true

418 raise ValueError("Path cannot both be file and directory at the same time") 

419 self._path_type = PathType.DIRECTORY if is_dir else PathType.FILE 

420 super().__init__( 

421 basename, 

422 parent=parent_dir, 

423 ) 

424 

425 @property 

426 def is_dir(self) -> bool: 

427 return self._path_type == PathType.DIRECTORY 

428 

429 @property 

430 def is_file(self) -> bool: 

431 return self._path_type == PathType.FILE 

432 

433 @property 

434 def is_symlink(self) -> bool: 

435 return self._path_type == PathType.SYMLINK 

436 

437 def readlink(self) -> str: 

438 assert not self.is_symlink 

439 raise TypeError(f"readlink is only valid for symlinks ({self.path!r})") 

440 

441 

442BY_NAME = operator.attrgetter('basename') 

443 

444 

445def _scan_dir(parent: FSPath) -> Iterable['FSPath']: 

446 

447 children = (dir_entry for dir_entry in os.scandir(parent.fs_path) if dir_entry.name not in ('.', '..')) 

448 children = (FSPathDirEntry(dir_entry, parent) 

449 for dir_entry in children) 

450 return sorted(children, key=BY_NAME, reverse=True) 

451 

452 

453def build_fs_from_root_dir(root_dir) -> FSPath: 

454 # We cannot use os.walk as it does not produce the right order. We wanted "sorted DFS (dir/files interleaved, 

455 # but symlinks last)" and os.walk can give use "unsorted DFS (child first)" or "sorted BFS (files first, 

456 # then dirs)" - neither of which is what we need. 

457 # 

458 # Test with: 

459 # 

460 # etc/ 

461 # etc/apache2/ 

462 # etc/apache2/... 

463 # etc/cgitrc (<-- non-directory) 

464 # etc/... (<-- any non-symlink file type here as long as it sort after cgitrc) 

465 # ... 

466 # <... followed by all symlinks ordered as above ...> 

467 # 

468 # The listed order is the correct order for emulating dpkg-deb but os.walk does not enable us to generate 

469 # that order. 

470 # 

471 fs = FSRootDir(fs_path=root_dir) 

472 stack: List[FSPath] = [fs] 

473 while stack: 

474 current = stack.pop() 

475 if not current.is_dir: 

476 continue 

477 child_paths = _scan_dir(current) 

478 if current.path == '.': 

479 child_paths = (t for t in child_paths if t.path != './DEBIAN') 

480 if 'DEBIAN' in current: 

481 del current['DEBIAN'] 

482 stack.extend(child_paths) 

483 return fs