Coverage for debputy/intermediate_manifest.py: 62%
154 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-01-22 14:29 +0100
« prev ^ index » next coverage.py v6.5.0, created at 2023-01-22 14:29 +0100
1import dataclasses
2import json
3import os
4import stat
5import sys
6import tarfile
7from enum import Enum
10try:
11 from typing import Union, NoReturn, Optional, List, FrozenSet, Iterable, IO, Dict, Any
12except ImportError:
13 pass
16class PathType(Enum):
17 FILE = ('file', tarfile.REGTYPE)
18 DIRECTORY = ('directory', tarfile.DIRTYPE)
19 SYMLINK = ('symlink', tarfile.SYMTYPE)
20 # TODO: Add hardlink, FIFO, Char device, BLK device, etc.
22 @property
23 def manifest_key(self) -> str:
24 return self.value[0]
26 @property
27 def tarinfo_type(self):
28 return self.value[1]
30 @property
31 def can_be_virtual(self):
32 return self in (PathType.DIRECTORY, PathType.SYMLINK)
35KEY2PATH_TYPE = {
36 pt.manifest_key: pt for pt in PathType
37}
40def _dirname(path: str) -> str:
41 path = path.rstrip('/')
42 if path == '.': 42 ↛ 44line 42 didn't jump to line 44, because the condition on line 42 was never false
43 return path
44 return os.path.dirname(path)
47@dataclasses.dataclass(slots=True)
48class TarMember:
50 member_path: str
51 path_type: PathType
52 fs_path: Optional[str]
53 mode: int
54 owner: str
55 uid: int
56 group: str
57 gid: int
58 mtime: int
59 link_target: str = ""
60 is_virtual_entry: bool = False
62 def create_tar_info(self, tar_fd: tarfile.TarFile) -> tarfile.TarInfo:
63 if self.is_virtual_entry:
64 assert self.path_type.can_be_virtual
65 tar_info: tarfile.TarInfo = tar_fd.tarinfo(self.member_path)
66 tar_info.size = 0
67 tar_info.type = self.path_type.tarinfo_type
68 tar_info.linkpath = self.link_target
69 else:
70 try:
71 tar_info: tarfile.TarInfo = tar_fd.gettarinfo(name=self.fs_path, arcname=self.member_path)
72 except (TypeError, ValueError) as e:
73 raise ValueError(f"Unable to prepare tar info for {self.member_path}") from e
74 # TODO: Eventually, we should be able to unconditionally rely on link_target. However,
75 # until we got symlinks and hardlinks correctly done in the JSON generator, it will be
76 # conditional for now.
77 if self.link_target != '': 77 ↛ 78line 77 didn't jump to line 78, because the condition on line 77 was never true
78 tar_info.linkpath = self.link_target
79 tar_info.mode = self.mode
80 tar_info.uname = self.owner
81 tar_info.uid = self.uid
82 tar_info.gname = self.group
83 tar_info.gid = self.gid
84 tar_info.mode = self.mode
85 tar_info.mtime = self.mtime
87 return tar_info
89 @classmethod
90 def from_file(cls,
91 member_path: str,
92 fs_path: str,
93 mode: Optional[int] = None,
94 owner: str = 'root',
95 uid: int = 0,
96 group: str = 'root',
97 gid: int = 0,
98 clamp_mtime_to: Optional[int] = None,
99 ):
100 st_result = os.lstat(fs_path)
101 st_mode = st_result.st_mode
102 if stat.S_ISREG(st_mode):
103 path_type = PathType.FILE
104 elif stat.S_ISDIR(st_mode):
105 path_type = PathType.DIRECTORY
106# elif stat.S_ISFIFO(st_result):
107# type = FIFOTYPE
108 elif stat.S_ISLNK(st_mode):
109 raise ValueError("Symlinks should have been rewritten to use the virtual rule. Otherwise, the link would"
110 " not be normalized according to Debian Policy.")
111# elif stat.S_ISCHR(st_result):
112# type = CHRTYPE
113# elif stat.S_ISBLK(st_result):
114# type = BLKTYPE
115 else:
116 raise ValueError(f"The path {fs_path} had an unsupported/unknown file type. Probably a bug in the tool")
118 if mode is None:
119 mode = stat.S_IMODE(st_mode)
120 mtime = st_result.st_mtime
121 if clamp_mtime_to is not None and mtime > clamp_mtime_to:
122 mtime = clamp_mtime_to
124 return cls(
125 member_path=member_path,
126 path_type=path_type,
127 fs_path=fs_path,
128 mode=mode,
129 owner=owner,
130 uid=uid,
131 group=group,
132 gid=gid,
133 mtime=int(mtime),
134 is_virtual_entry=False,
135 )
137 @classmethod
138 def virtual_path(cls,
139 member_path: str,
140 path_type: PathType,
141 mtime: int,
142 link_target: Optional[str] = None,
143 mode: Optional[int] = None,
144 owner: str = 'root',
145 uid: int = 0,
146 group: str = 'root',
147 gid: int = 0,
148 ):
149 if not path_type.can_be_virtual: 149 ↛ 150line 149 didn't jump to line 150, because the condition on line 149 was never true
150 raise ValueError(f"The path type {path_type.name} cannot be virtual")
151 if (path_type == PathType.SYMLINK) ^ bool(link_target): 151 ↛ 152line 151 didn't jump to line 152, because the condition on line 151 was never true
152 if not link_target:
153 raise ValueError("Symlinks must have a link target")
154 # TODO: Dear future programmer. Hardlinks will appear here some day and you will have to fix this
155 # code then!
156 raise ValueError("Non-symlinks must not have a link target")
157 return cls(
158 member_path=member_path,
159 path_type=path_type,
160 fs_path=None,
161 link_target=link_target,
162 mode=mode,
163 owner=owner,
164 uid=uid,
165 group=group,
166 gid=gid,
167 mtime=mtime,
168 is_virtual_entry=True,
169 )
171 def to_manifest(self) -> Dict[str, Any]:
172 d = dataclasses.asdict(self)
173 try:
174 d['mode'] = oct(self.mode)
175 except (TypeError, ValueError) as e:
176 raise TypeError(f"Bad mode in TarMember {self.member_path}") from e
177 d['path_type'] = self.path_type.manifest_key
178 # "compress" the output by removing redundant fields
179 if self.link_target is None or self.link_target == '': 179 ↛ 181line 179 didn't jump to line 181, because the condition on line 179 was never false
180 del d['link_target']
181 if self.is_virtual_entry: 181 ↛ 185line 181 didn't jump to line 185, because the condition on line 181 was never false
182 assert self.fs_path is None
183 del d['fs_path']
184 else:
185 del d['is_virtual_entry']
186 return d
188 @classmethod
189 def parse_intermediate_manifest(cls, manifest_path: str) -> List['TarMember']:
190 directories = {'.'}
191 if manifest_path == '-': 191 ↛ 192line 191 didn't jump to line 192, because the condition on line 191 was never true
192 with sys.stdin as fd:
193 data = json.load(fd)
194 contents = [TarMember.from_dict(m) for m in data]
195 else:
196 with open(manifest_path) as fd:
197 data = json.load(fd)
198 contents = [TarMember.from_dict(m) for m in data]
199 if not contents: 199 ↛ 200line 199 didn't jump to line 200, because the condition on line 199 was never true
200 raise ValueError("Empty manifest (note that the root directory should always be present")
201 if contents[0].member_path != './': 201 ↛ 202line 201 didn't jump to line 202, because the condition on line 201 was never true
202 raise ValueError('The first member must always be the root directory "./"')
203 for tar_member in contents:
204 directory = _dirname(tar_member.member_path)
205 if directory not in directories: 205 ↛ 206line 205 didn't jump to line 206, because the condition on line 205 was never true
206 raise ValueError(f'The path "{tar_member.member_path}" came before the directory it is in (or the path'
207 f' is not a directory). Either way leads to a broken deb.')
208 if tar_member.path_type == PathType.DIRECTORY: 208 ↛ 203line 208 didn't jump to line 203, because the condition on line 208 was never false
209 directories.add(tar_member.member_path.rstrip('/'))
210 return contents
212 @classmethod
213 def from_dict(cls, d) -> 'TarMember':
214 member_path = d['member_path']
215 raw_mode = d['mode']
216 if not raw_mode.startswith('0o'): 216 ↛ 217line 216 didn't jump to line 217, because the condition on line 216 was never true
217 raise ValueError(f"Bad mode for {member_path}")
218 is_virtual_entry = d.get('is_virtual_entry')
219 path_type = KEY2PATH_TYPE[d['path_type']]
220 fs_path = d.get('fs_path')
221 mode = int(raw_mode[2:], 8)
222 if is_virtual_entry: 222 ↛ 229line 222 didn't jump to line 229, because the condition on line 222 was never false
223 if not path_type.can_be_virtual: 223 ↛ 224line 223 didn't jump to line 224, because the condition on line 223 was never true
224 raise ValueError(f"Bad file type or is_virtual_entry for {d['member_path']}."
225 " The file type cannot be virtual")
226 if fs_path is not None: 226 ↛ 227line 226 didn't jump to line 227, because the condition on line 226 was never true
227 raise ValueError(f'Invalid declaration for "{member_path}".'
228 " The path is listed as a virtual entry but has a file system path")
229 elif fs_path is None:
230 raise ValueError(f'Invalid declaration for "{member_path}".'
231 " The path is neither a virtual path nor does it have a file system path!")
232 if path_type == PathType.DIRECTORY and not member_path.endswith('/'): 232 ↛ 233line 232 didn't jump to line 233, because the condition on line 232 was never true
233 raise ValueError(f'Invalid declaration for "{member_path}".'
234 " The path is listed as a directory but does not end with a slash")
236 link_target = d.get('link_target')
237 if path_type == PathType.SYMLINK: 237 ↛ 238line 237 didn't jump to line 238, because the condition on line 237 was never true
238 if mode != 0o777:
239 raise ValueError(f'Invalid declaration for "{member_path}".'
240 f" Symlinks must have mode 0o0777, got {oct(mode)[2:]}.")
241 if not link_target:
242 raise ValueError(f'Invalid declaration for "{member_path}".'
243 " Symlinks must have a link_target")
244 elif link_target is not None and link_target != '': 244 ↛ 246line 244 didn't jump to line 246, because the condition on line 244 was never true
245 # TODO: Eventually hardlinks should have them too. But that is a problem for a future programmer
246 raise ValueError(f'Invalid declaration for "{member_path}".'
247 " Only symlinks can have a link_target")
248 else:
249 link_target = ''
251 return cls(
252 member_path=member_path,
253 path_type=path_type,
254 fs_path=fs_path,
255 mode=mode,
256 owner=d['owner'],
257 uid=d['uid'],
258 group=d['group'],
259 gid=d['gid'],
260 mtime=d['mtime'],
261 link_target=link_target,
262 is_virtual_entry=is_virtual_entry if is_virtual_entry is not None else False,
263 )