diff --git a/py/gencache.py b/py/gencache.py index 3c4d86828703383fa58fe20f672412d6edd338ca..977ec52f4a3779c883bce8a4f8defe3abeb05736 100644 --- a/py/gencache.py +++ b/py/gencache.py @@ -4,7 +4,7 @@ import plyvel, json, urllib.request, networkx, random, math, re, utils from math import * from utils import * -CESIUMPLUS_URL = "https://g1.data.presles.fr" +CESIUMPLUS_URL = "https://g1.data.le-sou.org,https://g1.data.e-is.pro,https://g1.data.adn.life,https://g1.data.presles.fr" DUNITER_PATH = "~/.config/duniter/duniter_default" OUTPUT_PATH = "www/data" OVERLAY_PRECISION = "0.5" @@ -65,7 +65,7 @@ Duniter must not be running while extracting LevelDB data. Only compatible with Duniter >=1.7.9. Options: - -c <url> Cesium+ URL (default={}) + -c <url> Cesium+ URLs separated by commas (default={}) -d <path> Duniter profile path (default={}) -e <path> Output dir (default={}) -o Do not generate image overlays @@ -81,6 +81,8 @@ Options: antispam = "--spam" in sys.argv + cesiumplus_urls = getargv("-c", CESIUMPLUS_URL).split(",") + output_dir = format_path(getargv("-e", OUTPUT_PATH)) try: os.mkdir(output_dir) @@ -116,32 +118,36 @@ Options: "size": 10000, "_source": ["title", "geoPoint", "avatar._content_type"] } - log("Request Cesium+", LOG_TRACE) - try: - resp = json.loads(urllib.request.urlopen( - getargv("-c", CESIUMPLUS_URL)+"/user/profile/_search?scroll=2m", - json.dumps(query).encode() - ).read().decode()) - except urllib.error.HTTPError as err: - log("Cesium+ HTTP Error: {}".format(err), LOG_ERROR) - results += resp["hits"]["hits"] - scroll_id = resp.get("_scroll_id") - for i in range(100): - log("Request Cesium+: scroll {}".format(i), LOG_TRACE) + + for cesiumplus_url in cesiumplus_urls: + log("Request Cesium+ {}".format(cesiumplus_url), LOG_TRACE) try: resp = json.loads(urllib.request.urlopen( - getargv("-c", CESIUMPLUS_URL)+"/_search/scroll", - json.dumps({ - "scroll": "1m", - "scroll_id": scroll_id - }).encode() + cesiumplus_url+"/user/profile/_search?scroll=2m", + json.dumps(query).encode() ).read().decode()) except urllib.error.HTTPError as err: log("Cesium+ HTTP Error: {}".format(err), LOG_ERROR) + continue results += resp["hits"]["hits"] - scroll_id = resp.get("scroll_id") - if not scroll_id: - break + scroll_id = resp.get("_scroll_id") + for i in range(100): + log("Request Cesium+: scroll {}".format(i), LOG_TRACE) + try: + resp = json.loads(urllib.request.urlopen( + cesiumplus_url+"/_search/scroll", + json.dumps({ + "scroll": "1m", + "scroll_id": scroll_id + }).encode() + ).read().decode()) + except urllib.error.HTTPError as err: + log("Cesium+ HTTP Error: {}".format(err), LOG_ERROR) + results += resp["hits"]["hits"] + scroll_id = resp.get("scroll_id") + if not scroll_id: + break + break log("Cesium+ accounts: {}".format(len(results)), LOG_INFO) log("Opening DBs")