Skip to content
Snippets Groups Projects

multiple instances for one service; deployment scripts generator

Merged Zhiqiang Xie requested to merge zxie into main
7 files
+ 105
27
Compare changes
  • Side-by-side
  • Inline
Files
7
+ 202
0
import math
import json
FACTOR = 0.6
ROOT_FACTOR = 2
# todo: separate the server list into a config file
SERVERS = {'brain18': 48, 'brain19': 48, 'brain20': 48, 'pinky10': 16, 'pinky11': 16, 'pinky12': 16, 'pinky13': 16,
'pinky14': 16, 'pinky15': 16, 'pinky16': 16, 'pinky17': 16, 'pinky18': 16, 'pinky19': 16}
TMUX = """
send() {
tmux send "$1" ENTER;
}
server_setup () {
tmux select-pane -t $1
send "ssh $2"
send "docker load -i ~/docker_images/tracer_image.tar"
}
server_hindsight () {
send "docker run -it -v /home/zxie/hindsight-grpc/config/alibaba:/config/ \\
-d --name $1 --shm-size=2gb -p $2:$2 -p $5:$5 --rm tracer bash -c \\
\\"cd build; ./server -x $3 -c 128 -a /config/sub_alibaba_addresses.json \\
-t /config/sub_alibaba_topology.json -i $4 $1 \\
& sleep 10; cd /root/hindsight/agent/; \\
go run cmd/agent2/main.go -serv $1 -port $5 -lc pinky09:5252 -r pinky09:5253\\""
}
server_run () {
tmux select-pane -t $1
if [ $4 = "hindsight" ] || [ $4 = "ot-hindsight" ]
then
server_hindsight $2 $3 $4 $5 $6
else
send "docker run -it -v /home/zxie/hindsight-grpc/config/alibaba:/config/ \\
-d --name $2 -p $3:$3 --rm tracer bash -c \\
\\"cd build; ./server -x $4 -c 128 -a /config/sub_alibaba_addresses.json \\
-t /config/sub_alibaba_topology.json -h pinky09 -i $5 -p $7 $2\\""
fi
}
setup_servers () {
__setup__
}
deploy_servers () {
__deploy__
}
kill_servers () {
__kill__
}
"""
# directly change the policy in the generated config file
OTEK_CONFIG = """
receivers:
__jaegers__
processors:
tail_sampling:
decision_wait: 10s
num_traces: 10000000
policies: [
# current implementation is OR combination
# the trace will get sampled if any policy is triggered
{ name: always_sample, type: always_sample },
# { name: latency, type: latency, latency: { threshold_ms: 100 } },
# {
# name: probabilistic,
# type: probabilistic,
# probabilistic: { sampling_percentage: 0.1 },
# },
# {
# name: status_code,
# type: status_code,
# status_code: { status_codes: [ERROR] },
# },
# {
# # this one is not well defined
# name: rate_limiting,
# type: rate_limiting,
# rate_limiting: { spans_per_second: 35 },
# },
]
exporters:
file:
path: ./gateway.json
logging:
loglevel: info
# number of messages initially logged each second.
sampling_initial: 5
# sampling rate after the initial messages are logged (every Mth message is logged)
sampling_thereafter: 5
service:
pipelines:
traces:
receivers: [__receivers__]
processors: [tail_sampling]
exporters: [file, logging]
"""
jaeger = \
""" jaeger/{}:
protocols:
thrift_compact:
endpoint: 0.0.0.0:{}
"""
def mapping_services(service_pair, root):
server_plan = {s: {} for s in SERVERS}
cores_all = sum(SERVERS.values())
service_all = sum([i[1] for i in service_pair])
for service, estimation in service_pair[::-1]:
server_pairs = sorted(SERVERS.items(), key=lambda item: item[1])
servers = [i[0] for i in server_pairs]
quota = [i[1] for i in server_pairs]
needed_cores = math.ceil(estimation / service_all * cores_all * FACTOR)
if service == root:
# extra resources for root service
needed_cores *= ROOT_FACTOR
# needed_cores = round(estimation / service_all * cores_all * FACTOR)
assert sum(quota) >= needed_cores and "resource shortage"
# no more than 5 instance per service
for dup in range(1, 6):
if min(quota[-dup:]) >= int(needed_cores/dup):
for i, s in enumerate(servers[-dup:]):
server_plan[s][service] = int(needed_cores / dup)
SERVERS[s] -= server_plan[s][service]
break
else:
assert False and "failed to allocate sufficient resources"
print("{:.2f}% of cores are allocated".format(
1 - sum(SERVERS.values()) / cores_all))
return server_plan
def write_configs(server_plan):
sub_addr = []
pointer = 0
otel_port = 6832
setup_servers = ""
deploy_servers = ""
kill_servers = ""
jaegers = ""
receivers = []
service_entry = {}
for server in server_plan:
port = 50050
agent_port = 5050
for service in server_plan[server]:
instance_id = 0
if service in service_entry:
instance_id = len(
sub_addr[service_entry[service]]["instances"])
sub_addr[service_entry[service]]["instances"].append(
{"hostname": server, "port": str(port), "agent_port": str(agent_port)})
else:
sub_addr.append({"name": service, "deploy_addr": "0.0.0.0", "instances": [
{"hostname": server, "port": str(port), "agent_port": str(agent_port)}]})
service_entry[service] = len(service_entry)
deploy_servers += "\tserver_run {} {} {} {} {} {} {}\n".format(
pointer, service, port, "$1", instance_id, agent_port, otel_port)
jaegers += jaeger.format(otel_port, otel_port)
receivers.append("jaeger/"+str(otel_port))
port += 1
agent_port += 1
otel_port += 1
setup_servers += "\tserver_setup {} {}\n".format(pointer, server)
kill_servers += "\ttmux select-pane -t {};\tsend \"docker stop \$(docker ps -a -q)\"\n".format(
pointer)
pointer += 1
tmux = TMUX.replace("__setup__", setup_servers)
tmux = tmux.replace("__deploy__", deploy_servers)
tmux = tmux.replace("__kill__", kill_servers)
otel_config = OTEK_CONFIG.replace("__jaegers__", jaegers)
otel_config = otel_config.replace("__receivers__", ",".join(receivers))
with open("alibaba/sub_alibaba_addresses.json", "w") as f:
json.dump({"addresses": sub_addr}, f, indent=2)
with open("alibaba/tmux_launch.sh", "w") as f:
f.write(tmux)
with open("alibaba/sub_alibaba_otel_config.yaml", "w") as f:
f.write(otel_config)
Loading