-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathjustfile
More file actions
201 lines (163 loc) · 8.65 KB
/
Copy pathjustfile
File metadata and controls
201 lines (163 loc) · 8.65 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
mod nix
default:
just --list
# format & lint-fix code
fmt:
echo "deprecated, use 'nix fmt' instead..."
sleep 5
cargo clippy --fix --allow-staged --all-targets
cargo fmt
nixfmt .
# spin up a local testnet
local-testnet *args='':
OLTP_METRICS_URL="http://localhost:4318/v1/metrics" OLTP_TRACING_URL="http://localhost:4318/v1/traces" OLTP_LOGS_URL="http://localhost:4318/v1/logs" cargo run -p psyche-centralized-local-testnet -- start {{ args }}
# run integration tests
integration-test test_name="":
if [ "{{ test_name }}" = "" ]; then \
cargo test --release -p psyche-centralized-testing --test integration_tests; \
else \
cargo test --release -p psyche-centralized-testing --test integration_tests -- --nocapture "{{ test_name }}"; \
fi
# Determine whether to use Python support based on environment variable
use_python := env("USE_PYTHON", "0")
# Run decentralized integration tests with optional Python support and test filtering
decentralized-integration-tests test_name="":
#!/usr/bin/env bash
set -euo pipefail
if [[ "{{ use_python }}" == "1" ]]; then
echo "Running tests with Python support"
just setup_python_test_infra
if [[ -z "{{ test_name }}" ]]; then
cargo test --release \
-p psyche-decentralized-testing \
--features python,parallelism \
--test integration_tests \
-- --nocapture
else
cargo test --release \
-p psyche-decentralized-testing \
--features python,parallelism \
--test integration_tests \
-- --nocapture "{{ test_name }}"
fi
else
echo "Running tests without Python support"
just setup_test_infra
if [[ -z "{{ test_name }}" ]]; then
cargo test --release \
-p psyche-decentralized-testing \
--test integration_tests \
-- --nocapture
else
cargo test --release \
-p psyche-decentralized-testing \
--test integration_tests \
-- --nocapture "{{ test_name }}"
fi
fi
# run integration decentralized chaos tests
decentralized-chaos-integration-test test_name="":
if [ "{{ test_name }}" = "" ]; then \
cargo test --release -p psyche-decentralized-testing --test chaos_tests -- --nocapture; \
else \
cargo test --release -p psyche-decentralized-testing --test chaos_tests -- --nocapture "{{ test_name }}"; \
fi
# Deploy coordinator on localnet and create a "test" run for 1.1b model.
setup-solana-localnet-test-run run_id="test" *args='':
RUN_ID={{ run_id }} ./scripts/setup-and-deploy-solana-test.sh {{ args }}
# Deploy coordinator on localnet and create a "test" run for 20m model.
setup-solana-localnet-light-test-run run_id="test" *args='':
RUN_ID={{ run_id }} CONFIG_FILE=./config/solana-test/light-config.toml ./scripts/setup-and-deploy-solana-test.sh {{ args }}
# Start client for training on localnet.
start-training-localnet-client run_id="test" *args='':
RUN_ID={{ run_id }} ./scripts/train-solana-test.sh {{ args }}
# Start client for training on localnet without data parallelism features and using light model.
start-training-localnet-light-client run_id="test" *args='':
RUN_ID={{ run_id }} BATCH_SIZE=1 DP=1 ./scripts/train-solana-test.sh {{ args }}
OTLP_METRICS_URL := "http://localhost:4318/v1/metrics"
OTLP_LOGS_URL := "http://localhost:4318/v1/logs"
# The same command as above but with arguments set to export telemetry data
start-training-localnet-light-client-telemetry run_id="test" *args='':
OTLP_METRICS_URL={{ OTLP_METRICS_URL }} OTLP_LOGS_URL={{ OTLP_LOGS_URL }} RUN_ID={{ run_id }} BATCH_SIZE=1 DP=1 ./scripts/train-solana-test.sh {{ args }}
DEVNET_RPC := "https://api.devnet.solana.com"
DEVNET_WS_RPC := "wss://api.devnet.solana.com"
# Deploy coordinator on Devnet and create a "test" run for 1.1b model.
setup-solana-devnet-test-run run_id="test" *args='':
RUN_ID={{ run_id }} RPC={{ DEVNET_RPC }} WS_RPC={{ DEVNET_WS_RPC }} ./scripts/deploy-solana-test.sh {{ args }}
# Deploy coordinator on Devnet and create a "test" run for 20m model.
setup-solana-devnet-light-test-run run_id="test" *args='':
RUN_ID={{ run_id }} RPC={{ DEVNET_RPC }} WS_RPC={{ DEVNET_WS_RPC }} CONFIG_FILE=./config/solana-test/light-config.toml ./scripts/deploy-solana-test.sh {{ args }}
# Start client for training on Devnet.
start-training-devnet-client run_id="test" *args='':
RUN_ID={{ run_id }} RPC={{ DEVNET_RPC }} WS_RPC={{ DEVNET_WS_RPC }} ./scripts/train-solana-test.sh {{ args }}
# Start client for training on localnet without data parallelism features and using light model.
start-training-devnet-light-client run_id="test" *args='':
RUN_ID={{ run_id }} RPC={{ DEVNET_RPC }} WS_RPC={{ DEVNET_WS_RPC }} BATCH_SIZE=1 DP=1 ./scripts/train-solana-test.sh {{ args }}
# Run the run-manager with an env file
run-manager env_file *args='':
cargo run --release -p run-manager -- --env-file {{ env_file }} {{ args }}
solana-client-tests:
cargo test --package psyche-solana-client --features solana-localnet-tests
# install deps for building mdbook
book_deps:
cargo install mdbook mdbook-mermaid mdbook-linkcheck
build_book output-dir="../book": generate_cli_docs
mdbook build psyche-book -d {{ output-dir }}
# run an interactive development server for psyche-book
serve_book: generate_cli_docs
mdbook serve psyche-book --open
generate_cli_docs:
echo "generating CLI --help outputs for mdbook..."
mkdir -p psyche-book/generated/cli/
cargo run -p psyche-centralized-client print-all-help --markdown > psyche-book/generated/cli/psyche-centralized-client.md
cargo run -p psyche-centralized-server print-all-help --markdown > psyche-book/generated/cli/psyche-centralized-server.md
cargo run -p psyche-centralized-local-testnet print-all-help --markdown > psyche-book/generated/cli/psyche-centralized-local-testnet.md
cargo run -p psyche-sidecar print-all-help --markdown > psyche-book/generated/cli/psyche-sidecar.md
run_docker_client *ARGS:
just nix build_docker_solana_client
docker run -d {{ ARGS }} --gpus all psyche-prod-solana-client
# Setup clients assigning one available GPU to each of them.
# There's no way to do this using the replicas from docker compose file, so we have to do it manually.
setup_gpu_clients num_clients="1":
./scripts/coordinator-address-check.sh
just nix build_docker_solana_test_client
./scripts/train-multiple-gpu-localnet.sh {{ num_clients }}
clean_stale_images:
docker rmi $(docker images -f dangling=true -q)
# Build & push the centralized client Docker image
docker_push_centralized_client:
just nix docker_build_centralized_client
docker push docker.io/nousresearch/psyche-centralized-client
# Setup the infrastructure for testing locally using Docker.
setup_test_infra:
cd architectures/decentralized/solana-coordinator && anchor build
cd architectures/decentralized/solana-authorizer && anchor build
just nix build_docker_solana_test_client_no_python
just nix build_docker_solana_test_validator
# Setup the infrastructure for testing locally using Docker.
setup_python_test_infra:
cd architectures/decentralized/solana-coordinator && anchor build
cd architectures/decentralized/solana-authorizer && anchor build
just nix build_docker_solana_test_client
just nix build_docker_solana_test_validator
run_test_infra num_clients="1":
#!/usr/bin/env bash
cd docker/test
if [ "${USE_GPU}" != "0" ] && command -v nvidia-smi &> /dev/null; then
echo "GPU detected and USE_GPU not set to 0, enabling GPU support"
NUM_REPLICAS={{ num_clients }} docker compose -f docker-compose.yml -f docker-compose.gpu.yml up -d --force-recreate
else
echo "Running without GPU support"
NUM_REPLICAS={{ num_clients }} docker compose -f docker-compose.yml up -d --force-recreate
fi
run_test_infra_with_proxies_validator num_clients="1":
#!/usr/bin/env bash
if [ "${USE_GPU}" != "0" ] && command -v nvidia-smi &> /dev/null; then
echo "GPU detected and USE_GPU not set to 0, enabling GPU support"
cd docker/test/subscriptions_test && NUM_REPLICAS={{ num_clients }} docker compose -f ../docker-compose.yml -f docker-compose.yml -f ../docker-compose.gpu.yml up -d --force-recreate
else
echo "Running without GPU support"
cd docker/test/subscriptions_test && NUM_REPLICAS={{ num_clients }} docker compose -f ../docker-compose.yml -f docker-compose.yml up -d --force-recreate
fi
stop_test_infra:
cd docker/test && docker compose -f docker-compose.yml -f subscriptions_test/docker-compose.yml down