Coverage for debputy/intermediate_manifest.py: 62%

154 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-01-22 14:29 +0100

1import dataclasses 

2import json 

3import os 

4import stat 

5import sys 

6import tarfile 

7from enum import Enum 

8 

9 

10try: 

11 from typing import Union, NoReturn, Optional, List, FrozenSet, Iterable, IO, Dict, Any 

12except ImportError: 

13 pass 

14 

15 

16class PathType(Enum): 

17 FILE = ('file', tarfile.REGTYPE) 

18 DIRECTORY = ('directory', tarfile.DIRTYPE) 

19 SYMLINK = ('symlink', tarfile.SYMTYPE) 

20 # TODO: Add hardlink, FIFO, Char device, BLK device, etc. 

21 

22 @property 

23 def manifest_key(self) -> str: 

24 return self.value[0] 

25 

26 @property 

27 def tarinfo_type(self): 

28 return self.value[1] 

29 

30 @property 

31 def can_be_virtual(self): 

32 return self in (PathType.DIRECTORY, PathType.SYMLINK) 

33 

34 

35KEY2PATH_TYPE = { 

36 pt.manifest_key: pt for pt in PathType 

37} 

38 

39 

40def _dirname(path: str) -> str: 

41 path = path.rstrip('/') 

42 if path == '.': 42 ↛ 44line 42 didn't jump to line 44, because the condition on line 42 was never false

43 return path 

44 return os.path.dirname(path) 

45 

46 

47@dataclasses.dataclass(slots=True) 

48class TarMember: 

49 

50 member_path: str 

51 path_type: PathType 

52 fs_path: Optional[str] 

53 mode: int 

54 owner: str 

55 uid: int 

56 group: str 

57 gid: int 

58 mtime: int 

59 link_target: str = "" 

60 is_virtual_entry: bool = False 

61 

62 def create_tar_info(self, tar_fd: tarfile.TarFile) -> tarfile.TarInfo: 

63 if self.is_virtual_entry: 

64 assert self.path_type.can_be_virtual 

65 tar_info: tarfile.TarInfo = tar_fd.tarinfo(self.member_path) 

66 tar_info.size = 0 

67 tar_info.type = self.path_type.tarinfo_type 

68 tar_info.linkpath = self.link_target 

69 else: 

70 try: 

71 tar_info: tarfile.TarInfo = tar_fd.gettarinfo(name=self.fs_path, arcname=self.member_path) 

72 except (TypeError, ValueError) as e: 

73 raise ValueError(f"Unable to prepare tar info for {self.member_path}") from e 

74 # TODO: Eventually, we should be able to unconditionally rely on link_target. However, 

75 # until we got symlinks and hardlinks correctly done in the JSON generator, it will be 

76 # conditional for now. 

77 if self.link_target != '': 77 ↛ 78line 77 didn't jump to line 78, because the condition on line 77 was never true

78 tar_info.linkpath = self.link_target 

79 tar_info.mode = self.mode 

80 tar_info.uname = self.owner 

81 tar_info.uid = self.uid 

82 tar_info.gname = self.group 

83 tar_info.gid = self.gid 

84 tar_info.mode = self.mode 

85 tar_info.mtime = self.mtime 

86 

87 return tar_info 

88 

89 @classmethod 

90 def from_file(cls, 

91 member_path: str, 

92 fs_path: str, 

93 mode: Optional[int] = None, 

94 owner: str = 'root', 

95 uid: int = 0, 

96 group: str = 'root', 

97 gid: int = 0, 

98 clamp_mtime_to: Optional[int] = None, 

99 ): 

100 st_result = os.lstat(fs_path) 

101 st_mode = st_result.st_mode 

102 if stat.S_ISREG(st_mode): 

103 path_type = PathType.FILE 

104 elif stat.S_ISDIR(st_mode): 

105 path_type = PathType.DIRECTORY 

106# elif stat.S_ISFIFO(st_result): 

107# type = FIFOTYPE 

108 elif stat.S_ISLNK(st_mode): 

109 raise ValueError("Symlinks should have been rewritten to use the virtual rule. Otherwise, the link would" 

110 " not be normalized according to Debian Policy.") 

111# elif stat.S_ISCHR(st_result): 

112# type = CHRTYPE 

113# elif stat.S_ISBLK(st_result): 

114# type = BLKTYPE 

115 else: 

116 raise ValueError(f"The path {fs_path} had an unsupported/unknown file type. Probably a bug in the tool") 

117 

118 if mode is None: 

119 mode = stat.S_IMODE(st_mode) 

120 mtime = st_result.st_mtime 

121 if clamp_mtime_to is not None and mtime > clamp_mtime_to: 

122 mtime = clamp_mtime_to 

123 

124 return cls( 

125 member_path=member_path, 

126 path_type=path_type, 

127 fs_path=fs_path, 

128 mode=mode, 

129 owner=owner, 

130 uid=uid, 

131 group=group, 

132 gid=gid, 

133 mtime=int(mtime), 

134 is_virtual_entry=False, 

135 ) 

136 

137 @classmethod 

138 def virtual_path(cls, 

139 member_path: str, 

140 path_type: PathType, 

141 mtime: int, 

142 link_target: Optional[str] = None, 

143 mode: Optional[int] = None, 

144 owner: str = 'root', 

145 uid: int = 0, 

146 group: str = 'root', 

147 gid: int = 0, 

148 ): 

149 if not path_type.can_be_virtual: 149 ↛ 150line 149 didn't jump to line 150, because the condition on line 149 was never true

150 raise ValueError(f"The path type {path_type.name} cannot be virtual") 

151 if (path_type == PathType.SYMLINK) ^ bool(link_target): 151 ↛ 152line 151 didn't jump to line 152, because the condition on line 151 was never true

152 if not link_target: 

153 raise ValueError("Symlinks must have a link target") 

154 # TODO: Dear future programmer. Hardlinks will appear here some day and you will have to fix this 

155 # code then! 

156 raise ValueError("Non-symlinks must not have a link target") 

157 return cls( 

158 member_path=member_path, 

159 path_type=path_type, 

160 fs_path=None, 

161 link_target=link_target, 

162 mode=mode, 

163 owner=owner, 

164 uid=uid, 

165 group=group, 

166 gid=gid, 

167 mtime=mtime, 

168 is_virtual_entry=True, 

169 ) 

170 

171 def to_manifest(self) -> Dict[str, Any]: 

172 d = dataclasses.asdict(self) 

173 try: 

174 d['mode'] = oct(self.mode) 

175 except (TypeError, ValueError) as e: 

176 raise TypeError(f"Bad mode in TarMember {self.member_path}") from e 

177 d['path_type'] = self.path_type.manifest_key 

178 # "compress" the output by removing redundant fields 

179 if self.link_target is None or self.link_target == '': 179 ↛ 181line 179 didn't jump to line 181, because the condition on line 179 was never false

180 del d['link_target'] 

181 if self.is_virtual_entry: 181 ↛ 185line 181 didn't jump to line 185, because the condition on line 181 was never false

182 assert self.fs_path is None 

183 del d['fs_path'] 

184 else: 

185 del d['is_virtual_entry'] 

186 return d 

187 

188 @classmethod 

189 def parse_intermediate_manifest(cls, manifest_path: str) -> List['TarMember']: 

190 directories = {'.'} 

191 if manifest_path == '-': 191 ↛ 192line 191 didn't jump to line 192, because the condition on line 191 was never true

192 with sys.stdin as fd: 

193 data = json.load(fd) 

194 contents = [TarMember.from_dict(m) for m in data] 

195 else: 

196 with open(manifest_path) as fd: 

197 data = json.load(fd) 

198 contents = [TarMember.from_dict(m) for m in data] 

199 if not contents: 199 ↛ 200line 199 didn't jump to line 200, because the condition on line 199 was never true

200 raise ValueError("Empty manifest (note that the root directory should always be present") 

201 if contents[0].member_path != './': 201 ↛ 202line 201 didn't jump to line 202, because the condition on line 201 was never true

202 raise ValueError('The first member must always be the root directory "./"') 

203 for tar_member in contents: 

204 directory = _dirname(tar_member.member_path) 

205 if directory not in directories: 205 ↛ 206line 205 didn't jump to line 206, because the condition on line 205 was never true

206 raise ValueError(f'The path "{tar_member.member_path}" came before the directory it is in (or the path' 

207 f' is not a directory). Either way leads to a broken deb.') 

208 if tar_member.path_type == PathType.DIRECTORY: 208 ↛ 203line 208 didn't jump to line 203, because the condition on line 208 was never false

209 directories.add(tar_member.member_path.rstrip('/')) 

210 return contents 

211 

212 @classmethod 

213 def from_dict(cls, d) -> 'TarMember': 

214 member_path = d['member_path'] 

215 raw_mode = d['mode'] 

216 if not raw_mode.startswith('0o'): 216 ↛ 217line 216 didn't jump to line 217, because the condition on line 216 was never true

217 raise ValueError(f"Bad mode for {member_path}") 

218 is_virtual_entry = d.get('is_virtual_entry') 

219 path_type = KEY2PATH_TYPE[d['path_type']] 

220 fs_path = d.get('fs_path') 

221 mode = int(raw_mode[2:], 8) 

222 if is_virtual_entry: 222 ↛ 229line 222 didn't jump to line 229, because the condition on line 222 was never false

223 if not path_type.can_be_virtual: 223 ↛ 224line 223 didn't jump to line 224, because the condition on line 223 was never true

224 raise ValueError(f"Bad file type or is_virtual_entry for {d['member_path']}." 

225 " The file type cannot be virtual") 

226 if fs_path is not None: 226 ↛ 227line 226 didn't jump to line 227, because the condition on line 226 was never true

227 raise ValueError(f'Invalid declaration for "{member_path}".' 

228 " The path is listed as a virtual entry but has a file system path") 

229 elif fs_path is None: 

230 raise ValueError(f'Invalid declaration for "{member_path}".' 

231 " The path is neither a virtual path nor does it have a file system path!") 

232 if path_type == PathType.DIRECTORY and not member_path.endswith('/'): 232 ↛ 233line 232 didn't jump to line 233, because the condition on line 232 was never true

233 raise ValueError(f'Invalid declaration for "{member_path}".' 

234 " The path is listed as a directory but does not end with a slash") 

235 

236 link_target = d.get('link_target') 

237 if path_type == PathType.SYMLINK: 237 ↛ 238line 237 didn't jump to line 238, because the condition on line 237 was never true

238 if mode != 0o777: 

239 raise ValueError(f'Invalid declaration for "{member_path}".' 

240 f" Symlinks must have mode 0o0777, got {oct(mode)[2:]}.") 

241 if not link_target: 

242 raise ValueError(f'Invalid declaration for "{member_path}".' 

243 " Symlinks must have a link_target") 

244 elif link_target is not None and link_target != '': 244 ↛ 246line 244 didn't jump to line 246, because the condition on line 244 was never true

245 # TODO: Eventually hardlinks should have them too. But that is a problem for a future programmer 

246 raise ValueError(f'Invalid declaration for "{member_path}".' 

247 " Only symlinks can have a link_target") 

248 else: 

249 link_target = '' 

250 

251 return cls( 

252 member_path=member_path, 

253 path_type=path_type, 

254 fs_path=fs_path, 

255 mode=mode, 

256 owner=d['owner'], 

257 uid=d['uid'], 

258 group=d['group'], 

259 gid=d['gid'], 

260 mtime=d['mtime'], 

261 link_target=link_target, 

262 is_virtual_entry=is_virtual_entry if is_virtual_entry is not None else False, 

263 )