Coverage for gws-app/gws/gis/cache/core.py: 0%

157 statements  

« prev     ^ index     » next       coverage.py v7.8.0, created at 2025-04-17 01:37 +0200

1"""Cache management.""" 

2 

3import math 

4import os 

5import re 

6 

7import yaml 

8 

9import gws 

10import gws.config 

11import gws.gis.mpx.config 

12import gws.lib.osx 

13import gws.lib.lock 

14import gws.lib.datetimex as datetimex 

15 

16DEFAULT_MAX_TIME = 600 

17DEFAULT_CONCURRENCY = 1 

18DEFAULT_MAX_AGE = 7 * 24 * 3600 

19DEFAULT_MAX_LEVEL = 3 

20 

21 

22class Config(gws.Config): 

23 """Global cache options""" 

24 

25 seedingMaxTime: gws.Duration = '600' 

26 """max. time for a seeding job""" 

27 seedingConcurrency: int = 1 

28 """number of concurrent seeding jobs""" 

29 

30 

31class Grid(gws.Data): 

32 uid: str 

33 z: int 

34 res: float 

35 maxX: int 

36 maxY: int 

37 totalTiles: int 

38 cachedTiles: int 

39 

40 

41class Entry(gws.Data): 

42 uid: str 

43 layers: list[gws.Layer] 

44 mpxCache: dict 

45 grids: dict[int, Grid] 

46 config: dict 

47 counts: dict 

48 dirname: str 

49 

50 

51class Status(gws.Data): 

52 entries: list[Entry] 

53 staleDirs: list[str] 

54 

55 

56def status(root: gws.Root, layer_uids=None, with_counts=True) -> Status: 

57 mpx_config = gws.gis.mpx.config.create(root) 

58 

59 entries = [] 

60 if mpx_config: 

61 entries = _enum_entries(root, mpx_config, layer_uids) 

62 

63 if entries and with_counts: 

64 _update_file_counts(entries) 

65 

66 all_dirs = list(gws.lib.osx.find_directories(gws.c.MAPPROXY_CACHE_DIR, deep=False)) 

67 valid_dirs = set(e.dirname for e in entries) 

68 

69 return Status( 

70 entries=entries, 

71 staleDirs=[d for d in all_dirs if d not in valid_dirs], 

72 ) 

73 

74 

75def cleanup(root: gws.Root): 

76 s = status(root, with_counts=False) 

77 for d in s.staleDirs: 

78 _remove_dir(d) 

79 

80 

81def drop(root: gws.Root, layer_uids=None): 

82 s = status(root, layer_uids=layer_uids, with_counts=False) 

83 for e in s.entries: 

84 _remove_dir(e.dirname) 

85 

86PIXEL_PNG8 = b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01\x08\x03\x00\x00\x00(\xcb4\xbb\x00\x00\x00\x06PLTE\xff\xff\xff\x00\x00\x00U\xc2\xd3~\x00\x00\x00\x01tRNS\x00@\xe6\xd8f\x00\x00\x00\x0cIDATx\xdab`\x00\x080\x00\x00\x02\x00\x01OmY\xe1\x00\x00\x00\x00IEND\xaeB`\x82' 

87 

88def seed(root: gws.Root, entries: list[Entry], levels: list[int]): 

89 # https://mapproxy.github.io/mapproxy/latest/seed.html#seeds 

90 seeds = {} 

91 

92 for e in entries: 

93 cache_uid = e.uid 

94 

95 c = e.layers[0].cache or gws.Data() 

96 max_age = c.get('maxAge') or DEFAULT_MAX_AGE 

97 max_level = c.get('maxLevel') or DEFAULT_MAX_LEVEL 

98 

99 seeds[cache_uid] = dict( 

100 caches=[cache_uid], 

101 # grids=e.mpxCache['grids'], 

102 levels=levels or range(max_level + 1), 

103 refresh_before={ 

104 'time': datetimex.to_iso_string(datetimex.to_utc(datetimex.add(seconds=-max_age)), with_tz=''), 

105 } 

106 ) 

107 

108 if not seeds: 

109 gws.log.info('no layers to seed') 

110 return 

111 

112 lock_path = gws.c.CONFIG_DIR + '/mapproxy.seed.lock' 

113 

114 with gws.lib.lock.SoftFileLock(lock_path) as ok: 

115 if not ok: 

116 gws.log.info('seeding already running') 

117 return 

118 

119 mpx_config = gws.gis.mpx.config.create(root) 

120 mpx_config_path = gws.c.CONFIG_DIR + '/mapproxy.seed.main.yml' 

121 gws.u.write_file(mpx_config_path, yaml.dump(mpx_config)) 

122 

123 seed_config_path = gws.c.CONFIG_DIR + '/mapproxy.seed.yml' 

124 gws.u.write_file(seed_config_path, yaml.dump(dict(seeds=seeds))) 

125 

126 max_time = root.app.cfg('cache.seedingMaxTime', default=DEFAULT_MAX_TIME) 

127 concurrency = root.app.cfg('cache.seedingConcurrency', default=DEFAULT_CONCURRENCY) 

128 

129 # monkeypatch mapproxy to simply store an empty image in case of error 

130 empty_pixel_path = gws.c.CONFIG_DIR + '/mapproxy.seed.empty.png' 

131 gws.u.write_file_b(empty_pixel_path, PIXEL_PNG8) 

132 py = '/usr/local/lib/python3.10/dist-packages/mapproxy/client/http.py' 

133 s = gws.u.read_file(py) 

134 s = re.sub(r"raise HTTPClientError\('response is not an image.+", f'return ImageSource({empty_pixel_path!r})', s) 

135 gws.u.write_file(py, s) 

136 

137 ts = gws.u.stime() 

138 gws.log.info(f'START SEEDING jobs={len(seeds)} {max_time=} {concurrency=}') 

139 gws.log.info(f'^C ANYTIME TO STOP...') 

140 

141 cmd = f''' 

142 /usr/local/bin/mapproxy-seed 

143 -f {mpx_config_path} 

144 -c {concurrency} 

145 {seed_config_path} 

146 ''' 

147 res = False 

148 try: 

149 gws.lib.osx.run(cmd, echo=True, timeout=max_time or DEFAULT_MAX_TIME) 

150 res = True 

151 except gws.lib.osx.TimeoutError: 

152 pass 

153 except KeyboardInterrupt: 

154 pass 

155 

156 try: 

157 for p in gws.lib.osx.find_directories(gws.c.MAPPROXY_CACHE_DIR, deep=False): 

158 gws.lib.osx.run(f'chown -R {gws.c.UID}:{gws.c.GID} {p}', echo=True) 

159 except Exception as exc: 

160 gws.log.error('failed to chown cache dir: {exc!r}') 

161 

162 gws.log.info(f'TIME: {gws.u.stime() - ts} sec') 

163 gws.log.info(f'SEEDING COMPLETE' if res else 'SEEDING INCOMPLETE, PLEASE TRY AGAIN') 

164 

165 

166def store_in_web_cache(url: str, img: bytes): 

167 path = gws.c.FASTCACHE_DIR + url 

168 dirname = os.path.dirname(path) 

169 tmp = dirname + '/' + gws.u.random_string(64) 

170 try: 

171 os.makedirs(dirname, 0o755, exist_ok=True) 

172 gws.u.write_file_b(tmp, img) 

173 os.rename(tmp, path) 

174 except OSError: 

175 gws.log.warning(f'store_in_web_cache FAILED path={path!r}') 

176 

177 

178def _update_file_counts(entries: list[Entry]): 

179 files = list(gws.lib.osx.find_files(gws.c.MAPPROXY_CACHE_DIR)) 

180 

181 for path in files: 

182 for e in entries: 

183 if path.startswith(e.dirname): 

184 # we use the mp layout all the way: zz/xxxx/xxxx/yyyy/yyyy.format 

185 m = re.search(r'(\d+)/(\d+)/(\d+)/(\d+)/(\d+)\.png$', path) 

186 z0, x1, x2, y1, y2 = m.groups() 

187 x = int(x1) * 1000 + int(x2) 

188 y = int(y1) * 1000 + int(y2) 

189 z = int(z0) 

190 g = e.grids.get(z) 

191 if g: 

192 g.cachedTiles += 1 

193 

194 

195def _enum_entries(root: gws.Root, mpx_config, layer_uids=None): 

196 entries_map: dict[str, Entry] = {} 

197 

198 for layer in root.find_all(gws.ext.object.layer): 

199 

200 if layer_uids and layer.uid not in layer_uids: 

201 continue 

202 

203 for uid, mpx_cache in mpx_config['caches'].items(): 

204 if mpx_cache.get('disable_storage') or gws.u.get(layer, 'mpxCacheUid') != uid: 

205 continue 

206 

207 if uid in entries_map: 

208 entries_map[uid].layers.append(layer) 

209 continue 

210 

211 mpx_grids = [mpx_config['grids'][guid] for guid in mpx_cache['grids']] 

212 crs = mpx_grids[0]['srs'].replace(':', '') 

213 

214 e = Entry( 

215 uid=uid, 

216 layers=[layer], 

217 mpxCache=mpx_cache, 

218 grids={}, 

219 config={}, 

220 dirname=f'{gws.c.MAPPROXY_CACHE_DIR}/{uid}_{crs}', 

221 ) 

222 

223 for g in mpx_grids: 

224 # see _calc_grids in mapproxy/grid.py 

225 bbox = g['bbox'] 

226 w = bbox[2] - bbox[0] 

227 h = bbox[3] - bbox[1] 

228 ts = g['tile_size'] 

229 for z, res in enumerate(sorted(g['res'], reverse=True)): 

230 maxx = max(math.ceil(w // res / ts[0]), 1) 

231 maxy = max(math.ceil(h // res / ts[1]), 1) 

232 e.grids[z] = Grid( 

233 z=z, 

234 res=res, 

235 maxX=maxx, 

236 maxY=maxy, 

237 totalTiles=maxx * maxy, 

238 cachedTiles=0, 

239 ) 

240 

241 entries_map[uid] = e 

242 

243 return list(entries_map.values()) 

244 

245 

246def _remove_dir(dirname): 

247 cmd = ['rm', '-fr', dirname] 

248 gws.lib.osx.run(cmd, echo=True) 

249 gws.log.info(f'removed {dirname}')