this repo has no description
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

fix(nixos): retry when getting KUBECONFIG

+42 -18
+42 -18
infra/_modules/nixos/kubeconfig_datasource.py
··· 4 4 import json 5 5 import subprocess 6 6 import sys 7 + import time 7 8 import yaml 8 9 9 - def get_kubeconfig(host, user): 10 - try: 11 - result = subprocess.check_output( 12 - ["ssh", f"{user}@{host}", "cat /etc/rancher/k3s/k3s.yaml"], 13 - stderr=subprocess.STDOUT 14 - ).decode("utf-8") 10 + SSH_OPTIONS = [ 11 + "-o", "BatchMode=yes", 12 + "-o", "ConnectTimeout=10", 13 + "-o", "StrictHostKeyChecking=no", 14 + "-o", "UserKnownHostsFile=/dev/null", 15 + "-o", "GlobalKnownHostsFile=/dev/null", 16 + ] 15 17 16 - # Replace the value of the server field with the IP of the K3s server 17 - config = yaml.safe_load(result) 18 - config["clusters"][0]["cluster"]["server"] = f"https://[{host}]:6443" 19 18 20 - updated_yaml = yaml.dump(config, default_flow_style=False) 19 + def get_kubeconfig(host, user, retries=30, delay=2): 20 + last_error = None 21 21 22 - return {"kubeconfig": updated_yaml} 23 - # TODO fail hard when error 24 - except subprocess.CalledProcessError as e: 25 - return {"error": e.output.decode("utf-8")} 26 - except Exception as e: 27 - return {"error": str(e)} 22 + for _ in range(retries): 23 + try: 24 + result = subprocess.check_output( 25 + [ 26 + "ssh", 27 + *SSH_OPTIONS, 28 + f"{user}@{host}", 29 + "cat /etc/rancher/k3s/k3s.yaml", 30 + ], 31 + stderr=subprocess.STDOUT, 32 + ).decode("utf-8") 33 + 34 + # Replace the server value so clients connect to the public node IP. 35 + config = yaml.safe_load(result) 36 + config["clusters"][0]["cluster"]["server"] = f"https://[{host}]:6443" 37 + 38 + updated_yaml = yaml.dump(config, default_flow_style=False) 39 + return {"kubeconfig": updated_yaml} 40 + except subprocess.CalledProcessError as e: 41 + last_error = e.output.decode("utf-8").strip() 42 + except Exception as e: 43 + last_error = str(e) 44 + 45 + time.sleep(delay) 46 + 47 + raise RuntimeError(last_error or "timed out waiting for /etc/rancher/k3s/k3s.yaml") 28 48 29 49 if __name__ == "__main__": 30 50 args = json.load(sys.stdin) 31 51 host = args.get("host") 32 52 user = args.get("user", "root") 33 - output = get_kubeconfig(host, user) 34 - print(json.dumps(output)) 53 + try: 54 + output = get_kubeconfig(host, user) 55 + print(json.dumps(output)) 56 + except Exception as e: 57 + print(str(e), file=sys.stderr) 58 + sys.exit(1)