Compare commits
442 Commits
maintenanc
...
91fa59e397
| Author | SHA1 | Date | |
|---|---|---|---|
|
91fa59e397
|
|||
| f3bfe89f27 | |||
| ee6f981006 | |||
| b040bebd1d | |||
| f69629d2da | |||
| 0668f0db74 | |||
| 5fcd57a061 | |||
| ad1544759f | |||
| e1c950a530 | |||
| f9632c37f8 | |||
| 1f0cb4ae76 | |||
| d49d078bed | |||
| e98fdb89ab | |||
| 6afe05b5fd | |||
| 7d5aebf882 | |||
| 94cbfd38a6 | |||
| 4da7780472 | |||
| a6dfc267fd | |||
| d6126501ba | |||
| ac0deb47b6 | |||
| f7d676de77 | |||
| cf1db201b2 | |||
| e6e4846529 | |||
| 084d556c56 | |||
| ff0fc18d0a | |||
| 19c7e32678 | |||
| 017c19e7d0 | |||
| a36eff8749 | |||
| df17b11458 | |||
| 0dc7b7eb3d | |||
| dff6eaf587 | |||
| 4b6b67b587 | |||
| 20e7d244d1 | |||
| c5d3b8e7f0 | |||
| 6bbfb0d124 | |||
| 46d03d5ca7 | |||
| e366e6ce87 | |||
| e415f70bbb | |||
| 200c727bbf | |||
| 7413021440 | |||
| 20b4805335 | |||
| f7dff9deab | |||
| f569933732 | |||
| ee895d2e4f | |||
| 5ee8623af2 | |||
| a0e4b209b0 | |||
| ce25867421 | |||
| f89bba35a6 | |||
| d591721a61 | |||
| 343b4f155e | |||
| 39a211a846 | |||
| 142985c505 | |||
| 3f3dc2d037 | |||
| 3269d763aa | |||
| f2d8ee8552 | |||
| 8d984a0672 | |||
| f3733418b2 | |||
| ce8b05b142 | |||
| 4a5787e0c6 | |||
| 6c11093033 | |||
| 750504744f | |||
| c26ec1b6f1 | |||
| 2ef32f773c | |||
| fc9fcd602a | |||
| 0e37ab5fe1 | |||
| a1b387e454 | |||
| 380abe9957 | |||
| 37c12783bb | |||
| 7379e84e79 | |||
| b802f88df9 | |||
| bd94c4ad00 | |||
| 570c6e175d | |||
| 96661dd0d4 | |||
| 28db7799ea | |||
| 508059c99e | |||
| b9f9cc7d7a | |||
| eae0c7cb59 | |||
| 2280635cd6 | |||
| 16ada09600 | |||
| 0d291d715c | |||
| 66001f76f7 | |||
| 1e3b85067d | |||
| 36ee1f3adc | |||
| 25e9c071b0 | |||
| 80cee2dbd0 | |||
| ee92934c74 | |||
| db0f3fed91 | |||
| adeaa0484d | |||
| 815810830e | |||
| 7a52e1907c | |||
| 22a2e1b9e8 | |||
| f29461ae32 | |||
| 208197f099 | |||
| 479ca1b671 | |||
| 40529fbdcb | |||
| 9b0d3fb21e | |||
| d8444131d8 | |||
| af540456a6 | |||
| 42d6734da8 | |||
| 071a8084a0 | |||
| 24a0c58592 | |||
| 810a6dfcec | |||
| 47ad89dee1 | |||
| 8af1b259f5 | |||
| 560003d4fd | |||
| 68ff45075c | |||
| fc68d16197 | |||
| f6ec1293f4 | |||
| 4feeff978c | |||
| 7b19292912 | |||
| 0627db0eb9 | |||
| ae2f6dde41 | |||
| 3bf70656dc | |||
| 1cf989d727 | |||
| 19f734e622 | |||
| d6e3d9626c | |||
| 9c32e42dcc | |||
| 61e6d3232b | |||
| d0fd8cde46 | |||
| 5223ea53f6 | |||
| 253426ce00 | |||
| df67b6cd26 | |||
| 766da21097 | |||
| 18461c0d59 | |||
| 028b151c78 | |||
| 7176b066bb | |||
| c3c3614f63 | |||
| e13288fc29 | |||
| e9e3704b67 | |||
| 7d3c7342ae | |||
| 8f80ed2cce | |||
| d00f996f59 | |||
| e40fd24f26 | |||
| 83efd6c876 | |||
| f0c4206ab8 | |||
| 8b43a6ffb6 | |||
| 2bca10b0e4 | |||
| eec3e27d66 | |||
| e51ef52721 | |||
| 9dc67d402f | |||
| 62ec4e014a | |||
| 4d03842f7c | |||
| 8fedc5518e | |||
| 43dc336638 | |||
| 2b08fcd21a | |||
| 557618d43f | |||
| e8ac6cf0f3 | |||
| f8fc391cae | |||
| 6c1afa3fd8 | |||
| 008584b465 | |||
| a22c862192 | |||
| cd0c070439 | |||
| 201ff64b25 | |||
| 9bee145e25 | |||
| 4528b7c2a6 | |||
| 1eac0fcad8 | |||
| dd15f9c943 | |||
| 4048b3327a | |||
| f4229e34f6 | |||
| 5208a3483b | |||
| 92eacfad20 | |||
| 80309d107b | |||
| d0f151595f | |||
| 93f8d3aa89 | |||
| d84645f3e1 | |||
| 55b71d6901 | |||
| 89c65ea578 | |||
| 129273e8d8 | |||
| fdac196c6c | |||
| 3f4b4fb810 | |||
| 2c7211ffa3 | |||
| 18f25307ab | |||
| 7c55d10ceb | |||
| 5c549faaa8 | |||
| 9fd35a9ce4 | |||
| 5487a93972 | |||
| fe16ea373f | |||
| 163434af09 | |||
| 71164400d4 | |||
| f887dacdea | |||
| 4f5c8dbbaf | |||
| 14b192b1d9 | |||
| 2b04812320 | |||
| 2f6f6ba703 | |||
| 371b0c7e76 | |||
| ae34eacf4a | |||
| dab6f08d89 | |||
| 8190523c30 | |||
| d335d69ba6 | |||
| cec49eb5fc | |||
| 22db38c98f | |||
| 0d4eebbb59 | |||
| 025f6a0c0c | |||
| abc74c5445 | |||
| 6942f09f69 | |||
| 56f6855af7 | |||
| 81c822e68e | |||
| 53e80b1f19 | |||
| 21feb01e7b | |||
| 9ea7b2b475 | |||
| fce4d89e1d | |||
| 6b282375f8 | |||
| 260986b9f2 | |||
| 15afbe94bd | |||
| efd35a9cd1 | |||
| 50ad1d637c | |||
| c299d53146 | |||
| 152b71e718 | |||
| 0911d5b92a | |||
| 5ddae068af | |||
| d17be714ec | |||
| 28ce15d74d | |||
| 504f9bb570 | |||
| f158cb63e8 | |||
| 8860f76cad | |||
| b86798cd69 | |||
| 7ed74931cf | |||
| 6e9d33b483 | |||
| 58abaefbc4 | |||
| 5ea7827a8a | |||
| b17e4a13f9 | |||
| 9c4e60c2c2 | |||
| e7376917bd | |||
| 130e191d37 | |||
| 349f69e30a | |||
| 59ab6405c5 | |||
| a0dab66aa5 | |||
| 525cad4117 | |||
| 24ee74d614 | |||
| 15b4b28d2c | |||
| b1ce302e4b | |||
| b8b85f55cd | |||
| 1189626a6f | |||
| dbd95dd7b8 | |||
| 81b680a7d2 | |||
| ba60e121df | |||
| 432e6c8521 | |||
| c8160122b3 | |||
| 3863fc25a5 | |||
| 2b26cd2f46 | |||
| 30f2079f0b | |||
| 366436b6d3 | |||
| 9f1cd02144 | |||
| 82ccae1315 | |||
| 1df80460d2 | |||
| 7f17fe8874 | |||
| 5880a6e5f6 | |||
| ecbb45d6ac | |||
| c564d945d4 | |||
| ed887b0412 | |||
| fe1d3fbb80 | |||
| 5234ca32fd | |||
| cfe0c0e6e6 | |||
| 7afe7344ac | |||
| bd83ca53ab | |||
| 0d9c99a24e | |||
| db98b1f698 | |||
| 84c4b6b81c | |||
| 19e195b894 | |||
| 54c2bd119f | |||
| e5d85c1b38 | |||
| f1486b84c1 | |||
| 472f4b0334 | |||
| 425dca3e00 | |||
| e4080cf931 | |||
| fc9285f89d | |||
| fbe238f5b6 | |||
| 9874da566d | |||
| ebc5c4d84f | |||
| 8634a9e133 | |||
| 0ce79ed79e | |||
| 5f492ee1d7 | |||
| 9071a4de8b | |||
| 3040a803b2 | |||
| 70a9e855cf | |||
| aa64e9ef24 | |||
| ba2b74fd5a | |||
| 1ae5d9e25e | |||
| ff98ba47c4 | |||
| 599b23ef52 | |||
| 8dbee06d1d | |||
| d522113cb9 | |||
| 7bfd786c01 | |||
| 5a5f4672cd | |||
| 2646ad4b70 | |||
| b120a7ca85 | |||
| 2a0254b684 | |||
| e3e6e7662d | |||
| 868f825e26 | |||
| f231dc81f1 | |||
| a758eef354 | |||
| 9c9c41fb57 | |||
| 1a1708f16f | |||
| efe1b7e399 | |||
| eb9876aff6 | |||
| 8d31c552f5 | |||
| 68f4d54dd1 | |||
| 2042d58b72 | |||
| 2c8c90e6e4 | |||
| 208dcb7dde | |||
| e2f82a6383 | |||
| d704816de9 | |||
| 74ec4eb22a | |||
| 0a5f9b55f5 | |||
| 900de39e2f | |||
| 1e466d07df | |||
| 13807c5e8f | |||
| d8d6d6d421 | |||
| a242ddd39c | |||
| a2c5fe1f5e | |||
| 2c52ef9ff0 | |||
| acb91695ac | |||
| 9d93760e6f | |||
| aad67b9d99 | |||
| e1d406023d | |||
| db6bb90af8 | |||
| 1266c8f04e | |||
| 2b7823788c | |||
| 86eacdd3e5 | |||
| 4fa074f893 | |||
| a260a1bc1b | |||
| 8912d2b9bc | |||
| b4015ded86 | |||
| 0f54d63a46 | |||
| 6c656182f1 | |||
| be4187de3c | |||
| 0b22a1b8a4 | |||
| f18f1937ae | |||
| 4b78ec9134 | |||
| 6c0c26b3aa | |||
| fb1744306d | |||
| 394c7ecd7b | |||
| 3276f54e86 | |||
| 4c806b8ae9 | |||
| 832866cbfa | |||
| 9fc393bb6a | |||
| d81d9d58e1 | |||
| d54dcc8d8f | |||
| a5fae4a289 | |||
| a355926cf0 | |||
| d7a4420205 | |||
| 0b55ce3d02 | |||
| 0ce574800e | |||
| a7e09e55df | |||
| 1622b3e7fc | |||
| 3424cac761 | |||
| f98af9aeef | |||
| 8c14b75e44 | |||
| e497e1b88b | |||
| 07411beb49 | |||
| e8bab9928d | |||
| 544d5a3d69 | |||
| 312f2cb368 | |||
| 45ac6e95e9 | |||
| e6bb6e735d | |||
| cfbfcdbe8c | |||
| c31bfd6b4d | |||
| d20fa359d9 | |||
| 9be15fdad2 | |||
| 13e365002c | |||
| a38072762f | |||
| adf1ff29a7 | |||
| 1ec8d7a625 | |||
| f78f4f5822 | |||
| 67a57cb3e5 | |||
| 85896f8546 | |||
| 5e728773c3 | |||
| 0a06cf564b | |||
| db26b2ae37 | |||
| f7d00dec25 | |||
| 2053ec82b7 | |||
| f2434a17c2 | |||
| 1f7045fcfe | |||
| 0c4a1efa27 | |||
| 530958496b | |||
| df378a2933 | |||
| 2a0fe5a137 | |||
| dfbeafa2b2 | |||
| 7d4281a5c1 | |||
| dfea0be2d9 | |||
| df91da8c34 | |||
| 30c21155af | |||
| a43016ebee | |||
| 801bb4ba3c | |||
| a9d740e95a | |||
| 08eaf312f2 | |||
| 0b57bbc6e3 | |||
| 6558a6ab77 | |||
| 0d196af473 | |||
| d35becb663 | |||
| 5421eab09a | |||
| 1c7de2f7c9 | |||
| c7692995f4 | |||
| 0af185afd8 | |||
| 470b3d2512 | |||
| 1bf6747b3a | |||
| 59bf51dfde | |||
| b72d9936a2 | |||
| 5ebb57deff | |||
| 5b82a72647 | |||
| a5c7205481 | |||
| fd1b467a60 | |||
| 882161b21e | |||
| 5e8ff50c98 | |||
| cdb0688ec1 | |||
| ebb5e94416 | |||
| 89049d0b1f | |||
| 6d16772d07 | |||
| e37f9e2b0f | |||
| 9767238c76 | |||
| a5a0fd9b6f | |||
| be69070f61 | |||
| 53f6dcec8d | |||
| 87c4521de3 | |||
| 461d6d2f34 | |||
| ef2ffa61c3 | |||
| c0b23ad450 | |||
| f12ba9f8b0 | |||
| a211e9ebee | |||
| 5dbbb27c43 | |||
| 69bb2128db | |||
| de7cae6208 | |||
| de4ac8cbd6 | |||
| e1dcad50d0 | |||
| 0120be66fb | |||
| 6cb079a44e | |||
| a5449067a7 | |||
| 1009736d81 | |||
| a94765e8ae | |||
| 9630b23ce2 | |||
| ed158ee87f | |||
| 480dd95d9b | |||
| f7b18098b1 | |||
| c580254dde | |||
| 7e6c395ff8 | |||
| 6978677cb5 | |||
| f5b4580dae | |||
| 035becd018 | |||
| a7fb69ab92 | |||
| 733eb93f23 | |||
| b60e821eaa | |||
| f43d549294 |
@@ -1,156 +0,0 @@
|
|||||||
# Maintenance purchase 2025-05
|
|
||||||
|
|
||||||
We need to buy some components to replace broken parts or to have spare ones for
|
|
||||||
when they break. We also need some tools to do basic repairs.
|
|
||||||
|
|
||||||
Here is the list:
|
|
||||||
|
|
||||||
- 11 x Power supply DELTA DPS-750XB A (700 W) (this is critical)
|
|
||||||
- 57.69€/unit, 634.59€ total <https://es.aliexpress.com/item/1005004090017186.html>
|
|
||||||
|
|
||||||
- 8 x RAM DDR4 2400MHz PC4-19200 ECC Registered
|
|
||||||
- 128.85€/pair, 515.40€ total <https://www.amazon.es/PC4-19200-REGISTERED-MEMORY-WORKSTATIONS-MOTHERBOARDS/dp/B06W9P3RKF>
|
|
||||||
|
|
||||||
- 1 x Set of screwdrivers
|
|
||||||
- 23.99€ <https://www.amazon.es/BLOSTM-Juego-Destornilladores-Profesionales-Destornillador/dp/B09W9R8J3S>
|
|
||||||
|
|
||||||
- 1 x UART adaptor
|
|
||||||
- 14.99€ <https://www.amazon.es/DSD-TECH-SH-U09C5-convertidor-Soporte/dp/B07WX2DSVB>
|
|
||||||
|
|
||||||
- 1 x SSD SATA disk of 2 TB
|
|
||||||
- 135.99€ <https://www.amazon.es/Crucial-BX500-pulgadas-interno-CT2000BX500SSD101/dp/B0CCN9QWKT>
|
|
||||||
|
|
||||||
Total: 1324.96 €
|
|
||||||
|
|
||||||
# Rationale
|
|
||||||
|
|
||||||
Below is the search procedure I followed to come up with that list.
|
|
||||||
|
|
||||||
## Power supplies
|
|
||||||
|
|
||||||
They are the first components to fail. We already have some problems with the
|
|
||||||
monitoring of some power supplies. They will soon stop being manufactured, so we
|
|
||||||
should increase out stack.
|
|
||||||
|
|
||||||
Most Xeon nodes use the DELTA DPS-750XB A:
|
|
||||||
|
|
||||||
hut% sudo ipmitool fru
|
|
||||||
...
|
|
||||||
FRU Device Description : Pwr Supply 1 FRU (ID 2)
|
|
||||||
Product Manufacturer : DELTA
|
|
||||||
Product Name : DPS-750XB A
|
|
||||||
Product Part Number : E98791-010
|
|
||||||
Product Version : 05
|
|
||||||
Product Serial : XXXXXXXXXXXXXXXXX
|
|
||||||
|
|
||||||
And we only have one per node. We should make the power supply redundant so we
|
|
||||||
can tolerate it to fail without bringing down the node.
|
|
||||||
|
|
||||||
They are available on Amazon, but they are very expensive (287.54 €):
|
|
||||||
|
|
||||||
<https://www.amazon.es/DPS-750XB-E98791-010-alimentaci%C3%B3n-conmutada-Platinum/dp/B0DB65G4VT>
|
|
||||||
|
|
||||||
On Aliexpress they are much cheaper (57.69 €):
|
|
||||||
|
|
||||||
<https://es.aliexpress.com/item/1005004090017186.html>
|
|
||||||
|
|
||||||
We have 11 nodes plus the login, but I'm not able to figure out which power
|
|
||||||
supply the login is using.
|
|
||||||
|
|
||||||
The login uses another one, AXX1100PCRPS, and only has one slot populated. We
|
|
||||||
may want to also we another one, but I would need to reset the FRU and I don't
|
|
||||||
have access to the login node. So I will leave this for Operations to deal with.
|
|
||||||
We can live without the login if needed.
|
|
||||||
|
|
||||||
## RAM DIMM
|
|
||||||
|
|
||||||
The DIMM modules also experience errors, which are monitored by Linux. In some
|
|
||||||
nodes we see non-recoverable errors that are no longer corrected by the ECC. We
|
|
||||||
need to replace the bad modules.
|
|
||||||
|
|
||||||
Having two spare modules per node would be enough to cover most problems in the
|
|
||||||
future.
|
|
||||||
|
|
||||||
> 16 GB, 2400 MHz RDIMM
|
|
||||||
|
|
||||||
The module from dmidecode:
|
|
||||||
|
|
||||||
Handle 0x0026, DMI type 17, 40 bytes
|
|
||||||
Memory Device
|
|
||||||
Array Handle: 0x0020
|
|
||||||
Error Information Handle: Not Provided
|
|
||||||
Total Width: 72 bits
|
|
||||||
Data Width: 64 bits
|
|
||||||
Size: 16 GB
|
|
||||||
Form Factor: DIMM
|
|
||||||
Set: None
|
|
||||||
Locator: DIMM_B1
|
|
||||||
Bank Locator: NODE 1
|
|
||||||
Type: DDR4
|
|
||||||
Type Detail: Synchronous
|
|
||||||
Speed: 2400 MT/s
|
|
||||||
Manufacturer: Micron
|
|
||||||
Serial Number: XXXXXXXX
|
|
||||||
Asset Tag:
|
|
||||||
Part Number: 36ASF2G72PZ-2G3B1
|
|
||||||
Rank: 2
|
|
||||||
Configured Memory Speed: 2400 MT/s
|
|
||||||
Minimum Voltage: Unknown
|
|
||||||
Maximum Voltage: Unknown
|
|
||||||
Configured Voltage: Unknown
|
|
||||||
|
|
||||||
Which is this module:
|
|
||||||
|
|
||||||
<https://www.amazon.com/Micron-PC4-19200-DDR4-2400MHz-Registered-MTA36ASF2G72PZ-2G3B1/dp/B01KBCNEGI>
|
|
||||||
|
|
||||||
But they have only one in stock. Here is more details:
|
|
||||||
|
|
||||||
> 16GB PC4-19200 DDR4-2400MHz
|
|
||||||
|
|
||||||
The must have the following features:
|
|
||||||
|
|
||||||
- 16 GB
|
|
||||||
- DDR4
|
|
||||||
- Speed at least 2400 MT/s
|
|
||||||
- ECC
|
|
||||||
- Registered
|
|
||||||
- Best if from Micron
|
|
||||||
|
|
||||||
I would say having 8 spare modules would be enough for now, as we only have a
|
|
||||||
few that are currently failing. We could upgrade the modules later, as they
|
|
||||||
don't have much risk of stopping being manufactured like the power supplies.
|
|
||||||
|
|
||||||
These may work:
|
|
||||||
|
|
||||||
- 1 x 16GB, 69,11€ <https://www.amazon.es/PC4-19200-REGISTRADO-SERVIDORES-Estaciones-CHIPKILL/dp/B06X42HC9N>
|
|
||||||
|
|
||||||
- 2 x 16GB, 128,85€ <https://www.amazon.es/PC4-19200-REGISTERED-MEMORY-WORKSTATIONS-MOTHERBOARDS/dp/B06W9P3RKF>
|
|
||||||
|
|
||||||
It is cheaper to buy them by pairs, so let's use the last one.
|
|
||||||
|
|
||||||
## Screwdriver set
|
|
||||||
|
|
||||||
In order to change and replace the machine parts we need a set of screwdrivers.
|
|
||||||
Instead of having to bring my own from home, I want to have one at BSC. These
|
|
||||||
are enough and come in a nice box so I don't lose them:
|
|
||||||
|
|
||||||
<https://www.amazon.es/BLOSTM-Juego-Destornilladores-Profesionales-Destornillador/dp/B09W9R8J3S>
|
|
||||||
|
|
||||||
## Serial port adaptor
|
|
||||||
|
|
||||||
In order to debug problems with several components, we need to be able to plug
|
|
||||||
to the serial port of the CPU. As we may deal with different voltages and
|
|
||||||
pinouts, the most versatile option is to just be able to select the voltage and
|
|
||||||
expose a pin interface.
|
|
||||||
|
|
||||||
This one would do:
|
|
||||||
|
|
||||||
<https://www.amazon.es/DSD-TECH-SH-U09C5-convertidor-Soporte/dp/B07WX2DSVB>
|
|
||||||
|
|
||||||
## Storage for raccoon
|
|
||||||
|
|
||||||
Given that we are currently using raccoon for builds too, we would need to
|
|
||||||
increase its current storage. We only have available 270 GB, so we can benefit
|
|
||||||
from another disk. Using 2 TiB would be plenty. This one seems enough:
|
|
||||||
|
|
||||||
- 135,99€ <https://www.amazon.es/Crucial-BX500-pulgadas-interno-CT2000BX500SSD101/dp/B0CCN9QWKT>
|
|
||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
doc/bsc-ssf.pdf
BIN
doc/bsc-ssf.pdf
Binary file not shown.
46
doc/trim.sh
Executable file
46
doc/trim.sh
Executable file
@@ -0,0 +1,46 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
# Trims the jungle repository by moving the website to its own repository and
|
||||||
|
# removing it from jungle. It also removes big pdf files and kernel
|
||||||
|
# configurations so the jungle repository is small.
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
if [ -e oldjungle -o -e newjungle -o -e website ]; then
|
||||||
|
echo "remove oldjungle/, newjungle/ and website/ first"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Clone the old jungle repo
|
||||||
|
git clone gitea@tent:rarias/jungle.git oldjungle
|
||||||
|
|
||||||
|
# First split the website into a new repository
|
||||||
|
mkdir website && git -C website init -b master
|
||||||
|
git-filter-repo \
|
||||||
|
--path web \
|
||||||
|
--subdirectory-filter web \
|
||||||
|
--source oldjungle \
|
||||||
|
--target website
|
||||||
|
|
||||||
|
# Then remove the website, pdf files and big kernel configs
|
||||||
|
mkdir newjungle && git -C newjungle init -b master
|
||||||
|
git-filter-repo \
|
||||||
|
--invert-paths \
|
||||||
|
--path web \
|
||||||
|
--path-glob 'doc*.pdf' \
|
||||||
|
--path-glob '**/kernel/configs/lockdep' \
|
||||||
|
--path-glob '**/kernel/configs/defconfig' \
|
||||||
|
--source oldjungle \
|
||||||
|
--target newjungle
|
||||||
|
|
||||||
|
set -x
|
||||||
|
|
||||||
|
du -sh oldjungle newjungle website
|
||||||
|
# 57M oldjungle
|
||||||
|
# 2,3M newjungle
|
||||||
|
# 6,4M website
|
||||||
|
|
||||||
|
du -sh --exclude=.git oldjungle newjungle website
|
||||||
|
# 30M oldjungle
|
||||||
|
# 700K newjungle
|
||||||
|
# 3,5M website
|
||||||
34
flake.lock
generated
34
flake.lock
generated
@@ -10,11 +10,11 @@
|
|||||||
"systems": "systems"
|
"systems": "systems"
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1723293904,
|
"lastModified": 1750173260,
|
||||||
"narHash": "sha256-b+uqzj+Wa6xgMS9aNbX4I+sXeb5biPDi39VgvSFqFvU=",
|
"narHash": "sha256-9P1FziAwl5+3edkfFcr5HeGtQUtrSdk/MksX39GieoA=",
|
||||||
"owner": "ryantm",
|
"owner": "ryantm",
|
||||||
"repo": "agenix",
|
"repo": "agenix",
|
||||||
"rev": "f6291c5935fdc4e0bef208cfc0dcab7e3f7a1c41",
|
"rev": "531beac616433bac6f9e2a19feb8e99a22a66baf",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
@@ -30,11 +30,11 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1732868163,
|
"lastModified": 1749650500,
|
||||||
"narHash": "sha256-qck4h298AgcNI6BnGhEwl26MTLXjumuJVr+9kak7uPo=",
|
"narHash": "sha256-2MHfVPV6RA7qPSCtXh4+KK0F0UjN+J4z8//+n6NK7Xs=",
|
||||||
"ref": "refs/heads/master",
|
"ref": "refs/heads/master",
|
||||||
"rev": "6782fc6c5b5a29e84a7f2c2d1064f4bcb1288c0f",
|
"rev": "9d1944c658929b6f98b3f3803fead4d1b91c4405",
|
||||||
"revCount": 952,
|
"revCount": 961,
|
||||||
"type": "git",
|
"type": "git",
|
||||||
"url": "https://git.sr.ht/~rodarima/bscpkgs"
|
"url": "https://git.sr.ht/~rodarima/bscpkgs"
|
||||||
},
|
},
|
||||||
@@ -51,11 +51,11 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1700795494,
|
"lastModified": 1744478979,
|
||||||
"narHash": "sha256-gzGLZSiOhf155FW7262kdHo2YDeugp3VuIFb4/GGng0=",
|
"narHash": "sha256-dyN+teG9G82G+m+PX/aSAagkC+vUv0SgUw3XkPhQodQ=",
|
||||||
"owner": "lnl7",
|
"owner": "lnl7",
|
||||||
"repo": "nix-darwin",
|
"repo": "nix-darwin",
|
||||||
"rev": "4b9b83d5a92e8c1fbfd8eb27eda375908c11ec4d",
|
"rev": "43975d782b418ebf4969e9ccba82466728c2851b",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
@@ -73,11 +73,11 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1703113217,
|
"lastModified": 1745494811,
|
||||||
"narHash": "sha256-7ulcXOk63TIT2lVDSExj7XzFx09LpdSAPtvgtM7yQPE=",
|
"narHash": "sha256-YZCh2o9Ua1n9uCvrvi5pRxtuVNml8X2a03qIFfRKpFs=",
|
||||||
"owner": "nix-community",
|
"owner": "nix-community",
|
||||||
"repo": "home-manager",
|
"repo": "home-manager",
|
||||||
"rev": "3bfaacf46133c037bb356193bd2f1765d9dc82c1",
|
"rev": "abfad3d2958c9e6300a883bd443512c55dfeb1be",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
@@ -88,16 +88,16 @@
|
|||||||
},
|
},
|
||||||
"nixpkgs": {
|
"nixpkgs": {
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1736867362,
|
"lastModified": 1752436162,
|
||||||
"narHash": "sha256-i/UJ5I7HoqmFMwZEH6vAvBxOrjjOJNU739lnZnhUln8=",
|
"narHash": "sha256-Kt1UIPi7kZqkSc5HVj6UY5YLHHEzPBkgpNUByuyxtlw=",
|
||||||
"owner": "NixOS",
|
"owner": "NixOS",
|
||||||
"repo": "nixpkgs",
|
"repo": "nixpkgs",
|
||||||
"rev": "9c6b49aeac36e2ed73a8c472f1546f6d9cf1addc",
|
"rev": "dfcd5b901dbab46c9c6e80b265648481aafb01f8",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
"owner": "NixOS",
|
"owner": "NixOS",
|
||||||
"ref": "nixos-24.11",
|
"ref": "nixos-25.05",
|
||||||
"repo": "nixpkgs",
|
"repo": "nixpkgs",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
inputs = {
|
inputs = {
|
||||||
nixpkgs.url = "github:NixOS/nixpkgs/nixos-24.11";
|
nixpkgs.url = "github:NixOS/nixpkgs/nixos-25.05";
|
||||||
agenix.url = "github:ryantm/agenix";
|
agenix.url = "github:ryantm/agenix";
|
||||||
agenix.inputs.nixpkgs.follows = "nixpkgs";
|
agenix.inputs.nixpkgs.follows = "nixpkgs";
|
||||||
bscpkgs.url = "git+https://git.sr.ht/~rodarima/bscpkgs";
|
bscpkgs.url = "git+https://git.sr.ht/~rodarima/bscpkgs";
|
||||||
@@ -18,6 +18,7 @@ in
|
|||||||
{
|
{
|
||||||
nixosConfigurations = {
|
nixosConfigurations = {
|
||||||
hut = mkConf "hut";
|
hut = mkConf "hut";
|
||||||
|
tent = mkConf "tent";
|
||||||
owl1 = mkConf "owl1";
|
owl1 = mkConf "owl1";
|
||||||
owl2 = mkConf "owl2";
|
owl2 = mkConf "owl2";
|
||||||
eudy = mkConf "eudy";
|
eudy = mkConf "eudy";
|
||||||
@@ -26,6 +27,8 @@ in
|
|||||||
lake2 = mkConf "lake2";
|
lake2 = mkConf "lake2";
|
||||||
raccoon = mkConf "raccoon";
|
raccoon = mkConf "raccoon";
|
||||||
fox = mkConf "fox";
|
fox = mkConf "fox";
|
||||||
|
apex = mkConf "apex";
|
||||||
|
weasel = mkConf "weasel";
|
||||||
};
|
};
|
||||||
|
|
||||||
packages.x86_64-linux = self.nixosConfigurations.hut.pkgs // {
|
packages.x86_64-linux = self.nixosConfigurations.hut.pkgs // {
|
||||||
|
|||||||
33
keys.nix
33
keys.nix
@@ -2,29 +2,36 @@
|
|||||||
# here all the public keys
|
# here all the public keys
|
||||||
rec {
|
rec {
|
||||||
hosts = {
|
hosts = {
|
||||||
hut = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICO7jIp6JRnRWTMDsTB/aiaICJCl4x8qmKMPSs4lCqP1 hut";
|
hut = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICO7jIp6JRnRWTMDsTB/aiaICJCl4x8qmKMPSs4lCqP1 hut";
|
||||||
owl1 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMqMEXO0ApVsBA6yjmb0xP2kWyoPDIWxBB0Q3+QbHVhv owl1";
|
owl1 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMqMEXO0ApVsBA6yjmb0xP2kWyoPDIWxBB0Q3+QbHVhv owl1";
|
||||||
owl2 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHurEYpQzNHqWYF6B9Pd7W8UPgF3BxEg0BvSbsA7BAdK owl2";
|
owl2 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHurEYpQzNHqWYF6B9Pd7W8UPgF3BxEg0BvSbsA7BAdK owl2";
|
||||||
eudy = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIL+WYPRRvZupqLAG0USKmd/juEPmisyyJaP8hAgYwXsG eudy";
|
eudy = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIL+WYPRRvZupqLAG0USKmd/juEPmisyyJaP8hAgYwXsG eudy";
|
||||||
koro = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIImiTFDbxyUYPumvm8C4mEnHfuvtBY1H8undtd6oDd67 koro";
|
koro = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIImiTFDbxyUYPumvm8C4mEnHfuvtBY1H8undtd6oDd67 koro";
|
||||||
bay = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICvGBzpRQKuQYHdlUQeAk6jmdbkrhmdLwTBqf3el7IgU bay";
|
bay = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICvGBzpRQKuQYHdlUQeAk6jmdbkrhmdLwTBqf3el7IgU bay";
|
||||||
lake2 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINo66//S1yatpQHE/BuYD/Gfq64TY7ZN5XOGXmNchiO0 lake2";
|
lake2 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINo66//S1yatpQHE/BuYD/Gfq64TY7ZN5XOGXmNchiO0 lake2";
|
||||||
fox = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIDa9lId4rB/EKGkkCCVOy0cuId2SYLs+8W8kx0kmpO1y fox";
|
fox = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIDwItIk5uOJcQEVPoy/CVGRzfmE1ojrdDcI06FrU4NFT fox";
|
||||||
|
tent = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFAtTpHtdYoelbknD/IcfBlThwLKJv/dSmylOgpg3FRM tent";
|
||||||
|
apex = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIBvUFjSfoxXnKwXhEFXx5ckRKJ0oewJ82mRitSMNMKjh apex";
|
||||||
|
weasel = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFLJrQ8BF6KcweQV8pLkSbFT+tbDxSG9qxrdQE65zJZp weasel";
|
||||||
|
raccoon = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGNQttFvL0dNEyy7klIhLoK4xXOeM2/K9R7lPMTG3qvK raccoon";
|
||||||
};
|
};
|
||||||
|
|
||||||
hostGroup = with hosts; rec {
|
hostGroup = with hosts; rec {
|
||||||
compute = [ owl1 owl2 fox ];
|
compute = [ owl1 owl2 fox raccoon ];
|
||||||
playground = [ eudy koro ];
|
playground = [ eudy koro weasel ];
|
||||||
storage = [ bay lake2 ];
|
storage = [ bay lake2 ];
|
||||||
monitor = [ hut ];
|
monitor = [ hut ];
|
||||||
|
login = [ apex ];
|
||||||
|
|
||||||
system = storage ++ monitor;
|
system = storage ++ monitor ++ login;
|
||||||
safe = system ++ compute;
|
safe = system ++ compute;
|
||||||
all = safe ++ playground;
|
all = safe ++ playground;
|
||||||
};
|
};
|
||||||
|
|
||||||
admins = {
|
admins = {
|
||||||
rarias = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIE1oZTPtlEXdGt0Ak+upeCIiBdaDQtcmuWoTUCVuSVIR rarias@hut";
|
"rarias@hut" = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIE1oZTPtlEXdGt0Ak+upeCIiBdaDQtcmuWoTUCVuSVIR rarias@hut";
|
||||||
root = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIII/1TNArcwA6D47mgW4TArwlxQRpwmIGiZDysah40Gb root@hut";
|
"rarias@tent" = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIwlWSBTZi74WTz5xn6gBvTmCoVltmtIAeM3RMmkh4QZ rarias@tent";
|
||||||
|
"rarias@fox" = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIDSbw3REAKECV7E2c/e2XJITudJQWq2qDSe2N1JHqHZd rarias@fox";
|
||||||
|
root = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIII/1TNArcwA6D47mgW4TArwlxQRpwmIGiZDysah40Gb root@hut";
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
69
m/apex/configuration.nix
Normal file
69
m/apex/configuration.nix
Normal file
@@ -0,0 +1,69 @@
|
|||||||
|
{ lib, config, pkgs, ... }:
|
||||||
|
|
||||||
|
{
|
||||||
|
imports = [
|
||||||
|
../common/xeon.nix
|
||||||
|
../common/ssf/hosts.nix
|
||||||
|
../module/ceph.nix
|
||||||
|
../module/hut-substituter.nix
|
||||||
|
../module/slurm-server.nix
|
||||||
|
./nfs.nix
|
||||||
|
./wireguard.nix
|
||||||
|
];
|
||||||
|
|
||||||
|
# Don't install grub MBR for now
|
||||||
|
boot.loader.grub.device = "nodev";
|
||||||
|
|
||||||
|
boot.initrd.kernelModules = [
|
||||||
|
"megaraid_sas" # For HW RAID
|
||||||
|
];
|
||||||
|
|
||||||
|
environment.systemPackages = with pkgs; [
|
||||||
|
storcli # To manage HW RAID
|
||||||
|
];
|
||||||
|
|
||||||
|
fileSystems."/home" = {
|
||||||
|
device = "/dev/disk/by-label/home";
|
||||||
|
fsType = "ext4";
|
||||||
|
};
|
||||||
|
|
||||||
|
# No swap, there is plenty of RAM
|
||||||
|
swapDevices = lib.mkForce [];
|
||||||
|
|
||||||
|
networking = {
|
||||||
|
hostName = "apex";
|
||||||
|
defaultGateway = "84.88.53.233";
|
||||||
|
nameservers = [ "8.8.8.8" ];
|
||||||
|
|
||||||
|
# Public facing interface
|
||||||
|
interfaces.eno1.ipv4.addresses = [ {
|
||||||
|
address = "84.88.53.236";
|
||||||
|
prefixLength = 29;
|
||||||
|
} ];
|
||||||
|
|
||||||
|
# Internal LAN to our Ethernet switch
|
||||||
|
interfaces.eno2.ipv4.addresses = [ {
|
||||||
|
address = "10.0.40.30";
|
||||||
|
prefixLength = 24;
|
||||||
|
} ];
|
||||||
|
|
||||||
|
# Infiniband over Omnipath switch (disconnected for now)
|
||||||
|
# interfaces.ibp5s0 = {};
|
||||||
|
|
||||||
|
nat = {
|
||||||
|
enable = true;
|
||||||
|
internalInterfaces = [ "eno2" ];
|
||||||
|
externalInterface = "eno1";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
networking.firewall = {
|
||||||
|
extraCommands = ''
|
||||||
|
# Blackhole BSC vulnerability scanner (OpenVAS) as it is spamming our
|
||||||
|
# logs. Insert as first position so we also protect SSH.
|
||||||
|
iptables -I nixos-fw 1 -p tcp -s 192.168.8.16 -j nixos-fw-refuse
|
||||||
|
# Same with opsmonweb01.bsc.es which seems to be trying to access via SSH
|
||||||
|
iptables -I nixos-fw 2 -p tcp -s 84.88.52.176 -j nixos-fw-refuse
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
}
|
||||||
48
m/apex/nfs.nix
Normal file
48
m/apex/nfs.nix
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
{ ... }:
|
||||||
|
|
||||||
|
{
|
||||||
|
services.nfs.server = {
|
||||||
|
enable = true;
|
||||||
|
lockdPort = 4001;
|
||||||
|
mountdPort = 4002;
|
||||||
|
statdPort = 4000;
|
||||||
|
exports = ''
|
||||||
|
/home 10.0.40.0/24(rw,async,no_subtree_check,no_root_squash)
|
||||||
|
/home 10.106.0.0/24(rw,async,no_subtree_check,no_root_squash)
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
networking.firewall = {
|
||||||
|
# Check with `rpcinfo -p`
|
||||||
|
extraCommands = ''
|
||||||
|
# Accept NFS traffic from compute nodes but not from the outside
|
||||||
|
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 111 -j nixos-fw-accept
|
||||||
|
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 2049 -j nixos-fw-accept
|
||||||
|
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 4000 -j nixos-fw-accept
|
||||||
|
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 4001 -j nixos-fw-accept
|
||||||
|
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 4002 -j nixos-fw-accept
|
||||||
|
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 20048 -j nixos-fw-accept
|
||||||
|
# Same but UDP
|
||||||
|
iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 111 -j nixos-fw-accept
|
||||||
|
iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 2049 -j nixos-fw-accept
|
||||||
|
iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 4000 -j nixos-fw-accept
|
||||||
|
iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 4001 -j nixos-fw-accept
|
||||||
|
iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 4002 -j nixos-fw-accept
|
||||||
|
iptables -A nixos-fw -p udp -s 10.0.40.0/24 --dport 20048 -j nixos-fw-accept
|
||||||
|
|
||||||
|
# Accept NFS traffic from wg0
|
||||||
|
iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.0/24 --dport 111 -j nixos-fw-accept
|
||||||
|
iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.0/24 --dport 2049 -j nixos-fw-accept
|
||||||
|
iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.0/24 --dport 4000 -j nixos-fw-accept
|
||||||
|
iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.0/24 --dport 4001 -j nixos-fw-accept
|
||||||
|
iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.0/24 --dport 4002 -j nixos-fw-accept
|
||||||
|
iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.0/24 --dport 20048 -j nixos-fw-accept
|
||||||
|
# Same but UDP
|
||||||
|
iptables -A nixos-fw -p udp -i wg0 -s 10.106.0.0/24 --dport 111 -j nixos-fw-accept
|
||||||
|
iptables -A nixos-fw -p udp -i wg0 -s 10.106.0.0/24 --dport 2049 -j nixos-fw-accept
|
||||||
|
iptables -A nixos-fw -p udp -i wg0 -s 10.106.0.0/24 --dport 4000 -j nixos-fw-accept
|
||||||
|
iptables -A nixos-fw -p udp -i wg0 -s 10.106.0.0/24 --dport 4001 -j nixos-fw-accept
|
||||||
|
iptables -A nixos-fw -p udp -i wg0 -s 10.106.0.0/24 --dport 4002 -j nixos-fw-accept
|
||||||
|
iptables -A nixos-fw -p udp -i wg0 -s 10.106.0.0/24 --dport 20048 -j nixos-fw-accept
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
}
|
||||||
42
m/apex/wireguard.nix
Normal file
42
m/apex/wireguard.nix
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
{ config, ... }:
|
||||||
|
|
||||||
|
{
|
||||||
|
networking.firewall = {
|
||||||
|
allowedUDPPorts = [ 666 ];
|
||||||
|
};
|
||||||
|
|
||||||
|
age.secrets.wgApex.file = ../../secrets/wg-apex.age;
|
||||||
|
|
||||||
|
# Enable WireGuard
|
||||||
|
networking.wireguard.enable = true;
|
||||||
|
networking.wireguard.interfaces = {
|
||||||
|
# "wg0" is the network interface name. You can name the interface arbitrarily.
|
||||||
|
wg0 = {
|
||||||
|
ips = [ "10.106.0.30/24" ];
|
||||||
|
listenPort = 666;
|
||||||
|
privateKeyFile = config.age.secrets.wgApex.path;
|
||||||
|
# Public key: VwhcN8vSOzdJEotQTpmPHBC52x3Hbv1lkFIyKubrnUA=
|
||||||
|
peers = [
|
||||||
|
{
|
||||||
|
name = "fox";
|
||||||
|
publicKey = "VfMPBQLQTKeyXJSwv8wBhc6OV0j2qAxUpX3kLHunK2Y=";
|
||||||
|
allowedIPs = [ "10.106.0.1/32" ];
|
||||||
|
endpoint = "fox.ac.upc.edu:666";
|
||||||
|
# Send keepalives every 25 seconds. Important to keep NAT tables alive.
|
||||||
|
persistentKeepalive = 25;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "raccoon";
|
||||||
|
publicKey = "QUfnGXSMEgu2bviglsaSdCjidB51oEDBFpnSFcKGfDI=";
|
||||||
|
allowedIPs = [ "10.106.0.236/32" "192.168.0.0/16" "10.0.44.0/24" ];
|
||||||
|
}
|
||||||
|
];
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
networking.hosts = {
|
||||||
|
"10.106.0.1" = [ "fox" ];
|
||||||
|
"10.106.0.236" = [ "raccoon" ];
|
||||||
|
"10.0.44.4" = [ "tent" ];
|
||||||
|
};
|
||||||
|
}
|
||||||
@@ -2,7 +2,8 @@
|
|||||||
|
|
||||||
{
|
{
|
||||||
imports = [
|
imports = [
|
||||||
../common/xeon.nix
|
../common/ssf.nix
|
||||||
|
../module/hut-substituter.nix
|
||||||
../module/monitoring.nix
|
../module/monitoring.nix
|
||||||
];
|
];
|
||||||
|
|
||||||
|
|||||||
@@ -3,6 +3,7 @@
|
|||||||
# Includes the basic configuration for an Intel server.
|
# Includes the basic configuration for an Intel server.
|
||||||
imports = [
|
imports = [
|
||||||
./base/agenix.nix
|
./base/agenix.nix
|
||||||
|
./base/always-power-on.nix
|
||||||
./base/august-shutdown.nix
|
./base/august-shutdown.nix
|
||||||
./base/boot.nix
|
./base/boot.nix
|
||||||
./base/env.nix
|
./base/env.nix
|
||||||
@@ -10,6 +11,7 @@
|
|||||||
./base/hw.nix
|
./base/hw.nix
|
||||||
./base/net.nix
|
./base/net.nix
|
||||||
./base/nix.nix
|
./base/nix.nix
|
||||||
|
./base/nosv.nix
|
||||||
./base/ntp.nix
|
./base/ntp.nix
|
||||||
./base/rev.nix
|
./base/rev.nix
|
||||||
./base/ssh.nix
|
./base/ssh.nix
|
||||||
|
|||||||
8
m/common/base/always-power-on.nix
Normal file
8
m/common/base/always-power-on.nix
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
{
|
||||||
|
imports = [
|
||||||
|
../../module/power-policy.nix
|
||||||
|
];
|
||||||
|
|
||||||
|
# Turn on as soon as we have power
|
||||||
|
power.policy = "always-on";
|
||||||
|
}
|
||||||
@@ -1,12 +1,12 @@
|
|||||||
{
|
{
|
||||||
# Shutdown all machines on August 2nd at 11:00 AM, so we can protect the
|
# Shutdown all machines on August 3rd at 22:00, so we can protect the
|
||||||
# hardware from spurious electrical peaks on the yearly electrical cut for
|
# hardware from spurious electrical peaks on the yearly electrical cut for
|
||||||
# manteinance that starts on August 4th.
|
# manteinance that starts on August 4th.
|
||||||
systemd.timers.august-shutdown = {
|
systemd.timers.august-shutdown = {
|
||||||
description = "Shutdown on August 2nd for maintenance";
|
description = "Shutdown on August 3rd for maintenance";
|
||||||
wantedBy = [ "timers.target" ];
|
wantedBy = [ "timers.target" ];
|
||||||
timerConfig = {
|
timerConfig = {
|
||||||
OnCalendar = "*-08-02 11:00:00";
|
OnCalendar = "*-08-03 22:00:00";
|
||||||
RandomizedDelaySec = "10min";
|
RandomizedDelaySec = "10min";
|
||||||
Unit = "systemd-poweroff.service";
|
Unit = "systemd-poweroff.service";
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -3,8 +3,8 @@
|
|||||||
{
|
{
|
||||||
environment.systemPackages = with pkgs; [
|
environment.systemPackages = with pkgs; [
|
||||||
vim wget git htop tmux pciutils tcpdump ripgrep nix-index nixos-option
|
vim wget git htop tmux pciutils tcpdump ripgrep nix-index nixos-option
|
||||||
nix-diff ipmitool freeipmi ethtool lm_sensors ix cmake gnumake file tree
|
nix-diff ipmitool freeipmi ethtool lm_sensors cmake gnumake file tree
|
||||||
ncdu config.boot.kernelPackages.perf ldns
|
ncdu config.boot.kernelPackages.perf ldns pv
|
||||||
# From bsckgs overlay
|
# From bsckgs overlay
|
||||||
osumb
|
osumb
|
||||||
];
|
];
|
||||||
@@ -21,6 +21,8 @@
|
|||||||
}
|
}
|
||||||
];
|
];
|
||||||
|
|
||||||
|
environment.enableAllTerminfo = true;
|
||||||
|
|
||||||
environment.variables = {
|
environment.variables = {
|
||||||
EDITOR = "vim";
|
EDITOR = "vim";
|
||||||
VISUAL = "vim";
|
VISUAL = "vim";
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
{ pkgs, ... }:
|
{ pkgs, lib, ... }:
|
||||||
|
|
||||||
{
|
{
|
||||||
networking = {
|
networking = {
|
||||||
@@ -10,10 +10,14 @@
|
|||||||
allowedTCPPorts = [ 22 ];
|
allowedTCPPorts = [ 22 ];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
# Make sure we use iptables
|
||||||
|
nftables.enable = lib.mkForce false;
|
||||||
|
|
||||||
hosts = {
|
hosts = {
|
||||||
"84.88.53.236" = [ "ssfhead.bsc.es" "ssfhead" ];
|
"84.88.53.236" = [ "ssfhead.bsc.es" "ssfhead" ];
|
||||||
"84.88.51.152" = [ "raccoon" ];
|
|
||||||
"84.88.51.142" = [ "raccoon-ipmi" ];
|
"84.88.51.142" = [ "raccoon-ipmi" ];
|
||||||
|
"192.168.11.12" = [ "bscpm04.bsc.es" ];
|
||||||
|
"192.168.11.15" = [ "gitlab-internal.bsc.es" ];
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,6 +6,8 @@
|
|||||||
(import ../../../pkgs/overlay.nix)
|
(import ../../../pkgs/overlay.nix)
|
||||||
];
|
];
|
||||||
|
|
||||||
|
nixpkgs.config.allowUnfree = true;
|
||||||
|
|
||||||
nix = {
|
nix = {
|
||||||
nixPath = [
|
nixPath = [
|
||||||
"nixpkgs=${nixpkgs}"
|
"nixpkgs=${nixpkgs}"
|
||||||
|
|||||||
9
m/common/base/nosv.nix
Normal file
9
m/common/base/nosv.nix
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{ ... }:
|
||||||
|
{
|
||||||
|
nix.settings.system-features = [ "nosv" ];
|
||||||
|
programs.nix-required-mounts.enable = true;
|
||||||
|
programs.nix-required-mounts.allowedPatterns.nosv.paths = [
|
||||||
|
"/sys/devices/system/cpu"
|
||||||
|
"/sys/devices/system/node"
|
||||||
|
];
|
||||||
|
}
|
||||||
@@ -8,17 +8,6 @@ in
|
|||||||
# Enable the OpenSSH daemon.
|
# Enable the OpenSSH daemon.
|
||||||
services.openssh.enable = true;
|
services.openssh.enable = true;
|
||||||
|
|
||||||
# Connect to intranet git hosts via proxy
|
|
||||||
programs.ssh.extraConfig = ''
|
|
||||||
Host bscpm02.bsc.es bscpm03.bsc.es bscpm04.bsc.es gitlab-internal.bsc.es alya.gitlab.bsc.es
|
|
||||||
User git
|
|
||||||
ProxyCommand nc -X connect -x hut:23080 %h %p
|
|
||||||
|
|
||||||
# Connect to BSC machines via hut proxy too
|
|
||||||
Host amdlogin1.bsc.es armlogin1.bsc.es hualogin1.bsc.es glogin1.bsc.es glogin2.bsc.es fpgalogin1.bsc.es
|
|
||||||
ProxyCommand nc -X connect -x hut:23080 %h %p
|
|
||||||
'';
|
|
||||||
|
|
||||||
programs.ssh.knownHosts = hostsKeys // {
|
programs.ssh.knownHosts = hostsKeys // {
|
||||||
"gitlab-internal.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF9arsAOSRB06hdy71oTvJHG2Mg8zfebADxpvc37lZo3";
|
"gitlab-internal.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF9arsAOSRB06hdy71oTvJHG2Mg8zfebADxpvc37lZo3";
|
||||||
"bscpm03.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIM2NuSUPsEhqz1j5b4Gqd+MWFnRqyqY57+xMvBUqHYUS";
|
"bscpm03.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIM2NuSUPsEhqz1j5b4Gqd+MWFnRqyqY57+xMvBUqHYUS";
|
||||||
|
|||||||
@@ -20,6 +20,7 @@
|
|||||||
rarias = {
|
rarias = {
|
||||||
uid = 1880;
|
uid = 1880;
|
||||||
isNormalUser = true;
|
isNormalUser = true;
|
||||||
|
linger = true;
|
||||||
home = "/home/Computational/rarias";
|
home = "/home/Computational/rarias";
|
||||||
description = "Rodrigo Arias";
|
description = "Rodrigo Arias";
|
||||||
group = "Computational";
|
group = "Computational";
|
||||||
@@ -39,7 +40,7 @@
|
|||||||
home = "/home/Computational/arocanon";
|
home = "/home/Computational/arocanon";
|
||||||
description = "Aleix Roca";
|
description = "Aleix Roca";
|
||||||
group = "Computational";
|
group = "Computational";
|
||||||
extraGroups = [ "wheel" ];
|
extraGroups = [ "wheel" "tracing" ];
|
||||||
hashedPassword = "$6$hliZiW4tULC/tH7p$pqZarwJkNZ7vS0G5llWQKx08UFG9DxDYgad7jplMD8WkZh5k58i4dfPoWtnEShfjTO6JHiIin05ny5lmSXzGM/";
|
hashedPassword = "$6$hliZiW4tULC/tH7p$pqZarwJkNZ7vS0G5llWQKx08UFG9DxDYgad7jplMD8WkZh5k58i4dfPoWtnEShfjTO6JHiIin05ny5lmSXzGM/";
|
||||||
openssh.authorizedKeys.keys = [
|
openssh.authorizedKeys.keys = [
|
||||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF3zeB5KSimMBAjvzsp1GCkepVaquVZGPYwRIzyzaCba aleix@bsc"
|
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF3zeB5KSimMBAjvzsp1GCkepVaquVZGPYwRIzyzaCba aleix@bsc"
|
||||||
@@ -55,7 +56,7 @@
|
|||||||
home = "/home/Computational/rpenacob";
|
home = "/home/Computational/rpenacob";
|
||||||
description = "Raúl Peñacoba";
|
description = "Raúl Peñacoba";
|
||||||
group = "Computational";
|
group = "Computational";
|
||||||
hosts = [ "owl1" "owl2" "hut" ];
|
hosts = [ "apex" "owl1" "owl2" "hut" "tent" "fox" ];
|
||||||
hashedPassword = "$6$TZm3bDIFyPrMhj1E$uEDXoYYd1z2Wd5mMPfh3DZAjP7ztVjJ4ezIcn82C0ImqafPA.AnTmcVftHEzLB3tbe2O4SxDyPSDEQgJ4GOtj/";
|
hashedPassword = "$6$TZm3bDIFyPrMhj1E$uEDXoYYd1z2Wd5mMPfh3DZAjP7ztVjJ4ezIcn82C0ImqafPA.AnTmcVftHEzLB3tbe2O4SxDyPSDEQgJ4GOtj/";
|
||||||
openssh.authorizedKeys.keys = [
|
openssh.authorizedKeys.keys = [
|
||||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFYfXg37mauGeurqsLpedgA2XQ9d4Nm0ZGo/hI1f7wwH rpenacob@bsc"
|
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFYfXg37mauGeurqsLpedgA2XQ9d4Nm0ZGo/hI1f7wwH rpenacob@bsc"
|
||||||
@@ -68,10 +69,10 @@
|
|||||||
home = "/home/Computational/anavarro";
|
home = "/home/Computational/anavarro";
|
||||||
description = "Antoni Navarro";
|
description = "Antoni Navarro";
|
||||||
group = "Computational";
|
group = "Computational";
|
||||||
hosts = [ "hut" "raccoon" "fox" ];
|
hosts = [ "apex" "hut" "tent" "raccoon" "fox" "weasel" ];
|
||||||
hashedPassword = "$6$QdNDsuLehoZTYZlb$CDhCouYDPrhoiB7/seu7RF.Gqg4zMQz0n5sA4U1KDgHaZOxy2as9pbIGeF8tOHJKRoZajk5GiaZv0rZMn7Oq31";
|
hashedPassword = "$6$EgturvVYXlKgP43g$gTN78LLHIhaF8hsrCXD.O6mKnZSASWSJmCyndTX8QBWT6wTlUhcWVAKz65lFJPXjlJA4u7G1ydYQ0GG6Wk07b1";
|
||||||
openssh.authorizedKeys.keys = [
|
openssh.authorizedKeys.keys = [
|
||||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAILWjRSlKgzBPZQhIeEtk6Lvws2XNcYwHcwPv4osSgst5 anavarro@ssfhead"
|
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMsbM21uepnJwPrRe6jYFz8zrZ6AYMtSEvvt4c9spmFP toni@delltoni"
|
||||||
];
|
];
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -81,7 +82,7 @@
|
|||||||
home = "/home/Computational/abonerib";
|
home = "/home/Computational/abonerib";
|
||||||
description = "Aleix Boné";
|
description = "Aleix Boné";
|
||||||
group = "Computational";
|
group = "Computational";
|
||||||
hosts = [ "owl1" "owl2" "hut" "raccoon" "fox" ];
|
hosts = [ "apex" "owl1" "owl2" "hut" "tent" "raccoon" "fox" "weasel" ];
|
||||||
hashedPassword = "$6$V1EQWJr474whv7XJ$OfJ0wueM2l.dgiJiiah0Tip9ITcJ7S7qDvtSycsiQ43QBFyP4lU0e0HaXWps85nqB4TypttYR4hNLoz3bz662/";
|
hashedPassword = "$6$V1EQWJr474whv7XJ$OfJ0wueM2l.dgiJiiah0Tip9ITcJ7S7qDvtSycsiQ43QBFyP4lU0e0HaXWps85nqB4TypttYR4hNLoz3bz662/";
|
||||||
openssh.authorizedKeys.keys = [
|
openssh.authorizedKeys.keys = [
|
||||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIIFiqXqt88VuUfyANkZyLJNiuroIITaGlOOTMhVDKjf abonerib@bsc"
|
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIIFiqXqt88VuUfyANkZyLJNiuroIITaGlOOTMhVDKjf abonerib@bsc"
|
||||||
@@ -94,7 +95,7 @@
|
|||||||
home = "/home/Computational/vlopez";
|
home = "/home/Computational/vlopez";
|
||||||
description = "Victor López";
|
description = "Victor López";
|
||||||
group = "Computational";
|
group = "Computational";
|
||||||
hosts = [ "koro" ];
|
hosts = [ "apex" "koro" ];
|
||||||
hashedPassword = "$6$0ZBkgIYE/renVqtt$1uWlJsb0FEezRVNoETTzZMx4X2SvWiOsKvi0ppWCRqI66S6TqMBXBdP4fcQyvRRBt0e4Z7opZIvvITBsEtO0f0";
|
hashedPassword = "$6$0ZBkgIYE/renVqtt$1uWlJsb0FEezRVNoETTzZMx4X2SvWiOsKvi0ppWCRqI66S6TqMBXBdP4fcQyvRRBt0e4Z7opZIvvITBsEtO0f0";
|
||||||
openssh.authorizedKeys.keys = [
|
openssh.authorizedKeys.keys = [
|
||||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGMwlUZRf9jfG666Qa5Sb+KtEhXqkiMlBV2su3x/dXHq victor@arch"
|
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGMwlUZRf9jfG666Qa5Sb+KtEhXqkiMlBV2su3x/dXHq victor@arch"
|
||||||
@@ -107,7 +108,7 @@
|
|||||||
home = "/home/Computational/dbautist";
|
home = "/home/Computational/dbautist";
|
||||||
description = "Dylan Bautista Cases";
|
description = "Dylan Bautista Cases";
|
||||||
group = "Computational";
|
group = "Computational";
|
||||||
hosts = [ "hut" ];
|
hosts = [ "apex" "hut" "tent" "raccoon" ];
|
||||||
hashedPassword = "$6$a2lpzMRVkG9nSgIm$12G6.ka0sFX1YimqJkBAjbvhRKZ.Hl090B27pdbnQOW0wzyxVWySWhyDDCILjQELky.HKYl9gqOeVXW49nW7q/";
|
hashedPassword = "$6$a2lpzMRVkG9nSgIm$12G6.ka0sFX1YimqJkBAjbvhRKZ.Hl090B27pdbnQOW0wzyxVWySWhyDDCILjQELky.HKYl9gqOeVXW49nW7q/";
|
||||||
openssh.authorizedKeys.keys = [
|
openssh.authorizedKeys.keys = [
|
||||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIAb+EQBoS98zrCwnGKkHKwMLdYABMTqv7q9E0+T0QmkS dbautist@bsc-848818791"
|
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIAb+EQBoS98zrCwnGKkHKwMLdYABMTqv7q9E0+T0QmkS dbautist@bsc-848818791"
|
||||||
@@ -120,7 +121,7 @@
|
|||||||
home = "/home/Computational/dalvare1";
|
home = "/home/Computational/dalvare1";
|
||||||
description = "David Álvarez";
|
description = "David Álvarez";
|
||||||
group = "Computational";
|
group = "Computational";
|
||||||
hosts = [ "hut" "fox" ];
|
hosts = [ "apex" "hut" "tent" "fox" ];
|
||||||
hashedPassword = "$6$mpyIsV3mdq.rK8$FvfZdRH5OcEkUt5PnIUijWyUYZvB1SgeqxpJ2p91TTe.3eQIDTcLEQ5rxeg.e5IEXAZHHQ/aMsR5kPEujEghx0";
|
hashedPassword = "$6$mpyIsV3mdq.rK8$FvfZdRH5OcEkUt5PnIUijWyUYZvB1SgeqxpJ2p91TTe.3eQIDTcLEQ5rxeg.e5IEXAZHHQ/aMsR5kPEujEghx0";
|
||||||
openssh.authorizedKeys.keys = [
|
openssh.authorizedKeys.keys = [
|
||||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGEfy6F4rF80r4Cpo2H5xaWqhuUZzUsVsILSKGJzt5jF dalvare1@ssfhead"
|
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGEfy6F4rF80r4Cpo2H5xaWqhuUZzUsVsILSKGJzt5jF dalvare1@ssfhead"
|
||||||
@@ -133,16 +134,57 @@
|
|||||||
home = "/home/Computational/varcila";
|
home = "/home/Computational/varcila";
|
||||||
description = "Vincent Arcila";
|
description = "Vincent Arcila";
|
||||||
group = "Computational";
|
group = "Computational";
|
||||||
hosts = [ "hut" "fox" ];
|
hosts = [ "apex" "hut" "tent" "fox" ];
|
||||||
hashedPassword = "$6$oB0Tcn99DcM4Ch$Vn1A0ulLTn/8B2oFPi9wWl/NOsJzaFAWjqekwcuC9sMC7cgxEVb.Nk5XSzQ2xzYcNe5MLtmzkVYnRS1CqP39Y0";
|
hashedPassword = "$6$oB0Tcn99DcM4Ch$Vn1A0ulLTn/8B2oFPi9wWl/NOsJzaFAWjqekwcuC9sMC7cgxEVb.Nk5XSzQ2xzYcNe5MLtmzkVYnRS1CqP39Y0";
|
||||||
openssh.authorizedKeys.keys = [
|
openssh.authorizedKeys.keys = [
|
||||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKGt0ESYxekBiHJQowmKpfdouw0hVm3N7tUMtAaeLejK vincent@varch"
|
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKGt0ESYxekBiHJQowmKpfdouw0hVm3N7tUMtAaeLejK vincent@varch"
|
||||||
];
|
];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
pmartin1 = {
|
||||||
|
# Arbitrary UID but large so it doesn't collide with other users on ssfhead.
|
||||||
|
uid = 9652;
|
||||||
|
isNormalUser = true;
|
||||||
|
home = "/home/Computational/pmartin1";
|
||||||
|
description = "Pedro J. Martinez-Ferrer";
|
||||||
|
group = "Computational";
|
||||||
|
hosts = [ "fox" ];
|
||||||
|
hashedPassword = "$6$nIgDMGnt4YIZl3G.$.JQ2jXLtDPRKsbsJfJAXdSvjDIzRrg7tNNjPkLPq3KJQhMjfDXRUvzagUHUU2TrE2hHM8/6uq8ex0UdxQ0ysl.";
|
||||||
|
openssh.authorizedKeys.keys = [
|
||||||
|
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIV5LEAII5rfe1hYqDYIIrhb1gOw7RcS1p2mhOTqG+zc pedro@pedro-ThinkPad-P14s-Gen-2a"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
|
||||||
|
csiringo = {
|
||||||
|
uid = 9653;
|
||||||
|
isNormalUser = true;
|
||||||
|
home = "/home/Computational/csiringo";
|
||||||
|
description = "Cesare Siringo";
|
||||||
|
group = "Computational";
|
||||||
|
hosts = [ ];
|
||||||
|
hashedPassword = "$6$0IsZlju8jFukLlAw$VKm0FUXbS.mVmPm3rcJeizTNU4IM5Nmmy21BvzFL.cQwvlGwFI1YWRQm6gsbd4nbg47mPDvYkr/ar0SlgF6GO1";
|
||||||
|
openssh.authorizedKeys.keys = [
|
||||||
|
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHA65zvvG50iuFEMf+guRwZB65jlGXfGLF4HO+THFaed csiringo@bsc.es"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
|
||||||
|
acinca = {
|
||||||
|
uid = 9654;
|
||||||
|
isNormalUser = true;
|
||||||
|
home = "/home/Computational/acinca";
|
||||||
|
description = "Arnau Cinca";
|
||||||
|
group = "Computational";
|
||||||
|
hosts = [ "apex" "hut" "fox" "owl1" "owl2" ];
|
||||||
|
hashedPassword = "$6$S6PUeRpdzYlidxzI$szyvWejQ4hEN76yBYhp1diVO5ew1FFg.cz4lKiXt2Idy4XdpifwrFTCIzLTs5dvYlR62m7ekA5MrhcVxR5F/q/";
|
||||||
|
openssh.authorizedKeys.keys = [
|
||||||
|
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFmMqKqPg4uocNOr3O41kLbZMOMJn3m2ZdN1JvTR96z3 bsccns@arnau-bsc"
|
||||||
|
];
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
groups = {
|
groups = {
|
||||||
Computational = { gid = 564; };
|
Computational = { gid = 564; };
|
||||||
|
tracing = { };
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
10
m/common/ssf.nix
Normal file
10
m/common/ssf.nix
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
{
|
||||||
|
# Provides the base system for a xeon node in the SSF rack.
|
||||||
|
imports = [
|
||||||
|
./xeon.nix
|
||||||
|
./ssf/fs.nix
|
||||||
|
./ssf/hosts.nix
|
||||||
|
./ssf/hosts-remote.nix
|
||||||
|
./ssf/net.nix
|
||||||
|
];
|
||||||
|
}
|
||||||
9
m/common/ssf/hosts-remote.nix
Normal file
9
m/common/ssf/hosts-remote.nix
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{ pkgs, ... }:
|
||||||
|
|
||||||
|
{
|
||||||
|
networking.hosts = {
|
||||||
|
# Remote hosts visible from compute nodes
|
||||||
|
"10.106.0.236" = [ "raccoon" ];
|
||||||
|
"10.0.44.4" = [ "tent" ];
|
||||||
|
};
|
||||||
|
}
|
||||||
23
m/common/ssf/hosts.nix
Normal file
23
m/common/ssf/hosts.nix
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
{ pkgs, ... }:
|
||||||
|
|
||||||
|
{
|
||||||
|
networking.hosts = {
|
||||||
|
# Login
|
||||||
|
"10.0.40.30" = [ "apex" ];
|
||||||
|
|
||||||
|
# Storage
|
||||||
|
"10.0.40.40" = [ "bay" ]; "10.0.42.40" = [ "bay-ib" ]; "10.0.40.141" = [ "bay-ipmi" ];
|
||||||
|
"10.0.40.41" = [ "oss01" ]; "10.0.42.41" = [ "oss01-ib0" ]; "10.0.40.142" = [ "oss01-ipmi" ];
|
||||||
|
"10.0.40.42" = [ "lake2" ]; "10.0.42.42" = [ "lake2-ib" ]; "10.0.40.143" = [ "lake2-ipmi" ];
|
||||||
|
|
||||||
|
# Xeon compute
|
||||||
|
"10.0.40.1" = [ "owl1" ]; "10.0.42.1" = [ "owl1-ib" ]; "10.0.40.101" = [ "owl1-ipmi" ];
|
||||||
|
"10.0.40.2" = [ "owl2" ]; "10.0.42.2" = [ "owl2-ib" ]; "10.0.40.102" = [ "owl2-ipmi" ];
|
||||||
|
"10.0.40.3" = [ "xeon03" ]; "10.0.42.3" = [ "xeon03-ib" ]; "10.0.40.103" = [ "xeon03-ipmi" ];
|
||||||
|
#"10.0.40.4" = [ "tent" ]; "10.0.42.4" = [ "tent-ib" ]; "10.0.40.104" = [ "tent-ipmi" ];
|
||||||
|
"10.0.40.5" = [ "koro" ]; "10.0.42.5" = [ "koro-ib" ]; "10.0.40.105" = [ "koro-ipmi" ];
|
||||||
|
"10.0.40.6" = [ "weasel" ]; "10.0.42.6" = [ "weasel-ib" ]; "10.0.40.106" = [ "weasel-ipmi" ];
|
||||||
|
"10.0.40.7" = [ "hut" ]; "10.0.42.7" = [ "hut-ib" ]; "10.0.40.107" = [ "hut-ipmi" ];
|
||||||
|
"10.0.40.8" = [ "eudy" ]; "10.0.42.8" = [ "eudy-ib" ]; "10.0.40.108" = [ "eudy-ipmi" ];
|
||||||
|
};
|
||||||
|
}
|
||||||
23
m/common/ssf/net.nix
Normal file
23
m/common/ssf/net.nix
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
{ pkgs, ... }:
|
||||||
|
|
||||||
|
{
|
||||||
|
# Infiniband (IPoIB)
|
||||||
|
environment.systemPackages = [ pkgs.rdma-core ];
|
||||||
|
boot.kernelModules = [ "ib_umad" "ib_ipoib" ];
|
||||||
|
|
||||||
|
networking = {
|
||||||
|
defaultGateway = "10.0.40.30";
|
||||||
|
nameservers = ["8.8.8.8"];
|
||||||
|
|
||||||
|
firewall = {
|
||||||
|
extraCommands = ''
|
||||||
|
# Prevent ssfhead from contacting our slurmd daemon
|
||||||
|
iptables -A nixos-fw -p tcp -s ssfhead --dport 6817:6819 -j nixos-fw-refuse
|
||||||
|
# But accept traffic to slurm ports from any other node in the subnet
|
||||||
|
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 6817:6819 -j nixos-fw-accept
|
||||||
|
# We also need to open the srun port range
|
||||||
|
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 60000:61000 -j nixos-fw-accept
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
};
|
||||||
|
}
|
||||||
@@ -1,9 +1,7 @@
|
|||||||
{
|
{
|
||||||
# Provides the base system for a xeon node.
|
# Provides the base system for a xeon node, not necessarily in the SSF rack.
|
||||||
imports = [
|
imports = [
|
||||||
./base.nix
|
./base.nix
|
||||||
./xeon/fs.nix
|
|
||||||
./xeon/console.nix
|
./xeon/console.nix
|
||||||
./xeon/net.nix
|
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,94 +0,0 @@
|
|||||||
{ pkgs, ... }:
|
|
||||||
|
|
||||||
{
|
|
||||||
# Infiniband (IPoIB)
|
|
||||||
environment.systemPackages = [ pkgs.rdma-core ];
|
|
||||||
boot.kernelModules = [ "ib_umad" "ib_ipoib" ];
|
|
||||||
|
|
||||||
networking = {
|
|
||||||
defaultGateway = "10.0.40.30";
|
|
||||||
nameservers = ["8.8.8.8"];
|
|
||||||
|
|
||||||
proxy = {
|
|
||||||
default = "http://hut:23080/";
|
|
||||||
noProxy = "127.0.0.1,localhost,internal.domain,10.0.40.40,hut";
|
|
||||||
# Don't set all_proxy as go complains and breaks the gitlab runner, see:
|
|
||||||
# https://github.com/golang/go/issues/16715
|
|
||||||
allProxy = null;
|
|
||||||
};
|
|
||||||
|
|
||||||
firewall = {
|
|
||||||
extraCommands = ''
|
|
||||||
# Prevent ssfhead from contacting our slurmd daemon
|
|
||||||
iptables -A nixos-fw -p tcp -s ssfhead --dport 6817:6819 -j nixos-fw-refuse
|
|
||||||
# But accept traffic to slurm ports from any other node in the subnet
|
|
||||||
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 6817:6819 -j nixos-fw-accept
|
|
||||||
# We also need to open the srun port range
|
|
||||||
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 60000:61000 -j nixos-fw-accept
|
|
||||||
'';
|
|
||||||
};
|
|
||||||
|
|
||||||
extraHosts = ''
|
|
||||||
10.0.40.30 ssfhead
|
|
||||||
|
|
||||||
# Node Entry for node: mds01 (ID=72)
|
|
||||||
10.0.40.40 bay mds01 mds01-eth0
|
|
||||||
10.0.42.40 bay-ib mds01-ib0
|
|
||||||
10.0.40.141 bay-ipmi mds01-ipmi0 mds01-ipmi
|
|
||||||
|
|
||||||
# Node Entry for node: oss01 (ID=73)
|
|
||||||
10.0.40.41 oss01 oss01-eth0
|
|
||||||
10.0.42.41 oss01-ib0
|
|
||||||
10.0.40.142 oss01-ipmi0 oss01-ipmi
|
|
||||||
|
|
||||||
# Node Entry for node: oss02 (ID=74)
|
|
||||||
10.0.40.42 lake2 oss02 oss02-eth0
|
|
||||||
10.0.42.42 lake2-ib oss02-ib0
|
|
||||||
10.0.40.143 lake2-ipmi oss02-ipmi0 oss02-ipmi
|
|
||||||
|
|
||||||
# Node Entry for node: xeon01 (ID=15)
|
|
||||||
10.0.40.1 owl1 xeon01 xeon01-eth0
|
|
||||||
10.0.42.1 owl1-ib xeon01-ib0
|
|
||||||
10.0.40.101 owl1-ipmi xeon01-ipmi0 xeon01-ipmi
|
|
||||||
|
|
||||||
# Node Entry for node: xeon02 (ID=16)
|
|
||||||
10.0.40.2 owl2 xeon02 xeon02-eth0
|
|
||||||
10.0.42.2 owl2-ib xeon02-ib0
|
|
||||||
10.0.40.102 owl2-ipmi xeon02-ipmi0 xeon02-ipmi
|
|
||||||
|
|
||||||
# Node Entry for node: xeon03 (ID=17)
|
|
||||||
10.0.40.3 xeon03 xeon03-eth0
|
|
||||||
10.0.42.3 xeon03-ib0
|
|
||||||
10.0.40.103 xeon03-ipmi0 xeon03-ipmi
|
|
||||||
|
|
||||||
# Node Entry for node: xeon04 (ID=18)
|
|
||||||
10.0.40.4 xeon04 xeon04-eth0
|
|
||||||
10.0.42.4 xeon04-ib0
|
|
||||||
10.0.40.104 xeon04-ipmi0 xeon04-ipmi
|
|
||||||
|
|
||||||
# Node Entry for node: xeon05 (ID=19)
|
|
||||||
10.0.40.5 koro xeon05 xeon05-eth0
|
|
||||||
10.0.42.5 koro-ib xeon05-ib0
|
|
||||||
10.0.40.105 koro-ipmi xeon05-ipmi0
|
|
||||||
|
|
||||||
# Node Entry for node: xeon06 (ID=20)
|
|
||||||
10.0.40.6 xeon06 xeon06-eth0
|
|
||||||
10.0.42.6 xeon06-ib0
|
|
||||||
10.0.40.106 xeon06-ipmi0 xeon06-ipmi
|
|
||||||
|
|
||||||
# Node Entry for node: xeon07 (ID=21)
|
|
||||||
10.0.40.7 hut xeon07 xeon07-eth0
|
|
||||||
10.0.42.7 hut-ib xeon07-ib0
|
|
||||||
10.0.40.107 hut-ipmi xeon07-ipmi0 xeon07-ipmi
|
|
||||||
|
|
||||||
# Node Entry for node: xeon08 (ID=22)
|
|
||||||
10.0.40.8 eudy xeon08 xeon08-eth0
|
|
||||||
10.0.42.8 eudy-ib xeon08-ib0
|
|
||||||
10.0.40.108 eudy-ipmi xeon08-ipmi0 xeon08-ipmi
|
|
||||||
|
|
||||||
# fox
|
|
||||||
10.0.40.26 fox
|
|
||||||
10.0.40.126 fox-ipmi
|
|
||||||
'';
|
|
||||||
};
|
|
||||||
}
|
|
||||||
@@ -2,13 +2,14 @@
|
|||||||
|
|
||||||
{
|
{
|
||||||
imports = [
|
imports = [
|
||||||
../common/xeon.nix
|
../common/ssf.nix
|
||||||
#(modulesPath + "/installer/netboot/netboot-minimal.nix")
|
#(modulesPath + "/installer/netboot/netboot-minimal.nix")
|
||||||
|
|
||||||
./kernel/kernel.nix
|
./kernel/kernel.nix
|
||||||
./cpufreq.nix
|
./cpufreq.nix
|
||||||
./fs.nix
|
./fs.nix
|
||||||
./users.nix
|
./users.nix
|
||||||
|
../module/hut-substituter.nix
|
||||||
../module/debuginfod.nix
|
../module/debuginfod.nix
|
||||||
];
|
];
|
||||||
|
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -2,13 +2,20 @@
|
|||||||
|
|
||||||
{
|
{
|
||||||
imports = [
|
imports = [
|
||||||
../common/xeon.nix
|
../common/base.nix
|
||||||
../module/ceph.nix
|
../common/xeon/console.nix
|
||||||
|
../module/amd-uprof.nix
|
||||||
../module/emulation.nix
|
../module/emulation.nix
|
||||||
|
../module/nvidia.nix
|
||||||
../module/slurm-client.nix
|
../module/slurm-client.nix
|
||||||
../module/slurm-firewall.nix
|
../module/hut-substituter.nix
|
||||||
|
./wireguard.nix
|
||||||
];
|
];
|
||||||
|
|
||||||
|
# Don't turn off on August as UPC has different dates.
|
||||||
|
# Fox works fine on power cuts.
|
||||||
|
systemd.timers.august-shutdown.enable = false;
|
||||||
|
|
||||||
# Select the this using the ID to avoid mismatches
|
# Select the this using the ID to avoid mismatches
|
||||||
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x500a07514b0c1103";
|
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x500a07514b0c1103";
|
||||||
|
|
||||||
@@ -16,30 +23,60 @@
|
|||||||
swapDevices = lib.mkForce [];
|
swapDevices = lib.mkForce [];
|
||||||
|
|
||||||
boot.initrd.availableKernelModules = [ "xhci_pci" "ahci" "nvme" "usbhid" "usb_storage" "sd_mod" ];
|
boot.initrd.availableKernelModules = [ "xhci_pci" "ahci" "nvme" "usbhid" "usb_storage" "sd_mod" ];
|
||||||
boot.kernelModules = [ "kvm-amd" ];
|
boot.kernelModules = [ "kvm-amd" "amd_uncore" "amd_hsmp" ];
|
||||||
|
|
||||||
hardware.cpu.amd.updateMicrocode = lib.mkDefault config.hardware.enableRedistributableFirmware;
|
hardware.cpu.amd.updateMicrocode = lib.mkDefault config.hardware.enableRedistributableFirmware;
|
||||||
hardware.cpu.intel.updateMicrocode = lib.mkForce false;
|
hardware.cpu.intel.updateMicrocode = lib.mkForce false;
|
||||||
|
|
||||||
|
# Use performance for benchmarks
|
||||||
|
powerManagement.cpuFreqGovernor = "performance";
|
||||||
|
|
||||||
|
services.amd-uprof.enable = true;
|
||||||
|
|
||||||
|
# Disable NUMA balancing
|
||||||
|
boot.kernel.sysctl."kernel.numa_balancing" = 0;
|
||||||
|
|
||||||
|
# Expose kernel addresses
|
||||||
|
boot.kernel.sysctl."kernel.kptr_restrict" = 0;
|
||||||
|
|
||||||
|
# Disable NMI watchdog to save one hw counter (for AMD uProf)
|
||||||
|
boot.kernel.sysctl."kernel.nmi_watchdog" = 0;
|
||||||
|
|
||||||
|
services.openssh.settings.X11Forwarding = true;
|
||||||
|
|
||||||
|
services.fail2ban.enable = true;
|
||||||
|
|
||||||
networking = {
|
networking = {
|
||||||
|
timeServers = [ "ntp1.upc.edu" "ntp2.upc.edu" ];
|
||||||
hostName = "fox";
|
hostName = "fox";
|
||||||
interfaces.enp1s0f0np0.ipv4.addresses = [ {
|
# UPC network (may change over time, use DHCP)
|
||||||
address = "10.0.40.26";
|
# Public IP configuration:
|
||||||
prefixLength = 24;
|
# - Hostname: fox.ac.upc.edu
|
||||||
} ];
|
# - IP: 147.83.30.141
|
||||||
|
# - Gateway: 147.83.30.130
|
||||||
|
# - NetMask: 255.255.255.192
|
||||||
|
# Private IP configuration for BMC:
|
||||||
|
# - Hostname: fox-ipmi.ac.upc.edu
|
||||||
|
# - IP: 147.83.35.27
|
||||||
|
# - Gateway: 147.83.35.2
|
||||||
|
# - NetMask: 255.255.255.0
|
||||||
|
interfaces.enp1s0f0np0.useDHCP = true;
|
||||||
};
|
};
|
||||||
|
|
||||||
# Configure Nvidia driver to use with CUDA
|
# Recommended for new graphics cards
|
||||||
hardware.nvidia.package = config.boot.kernelPackages.nvidiaPackages.production;
|
hardware.nvidia.open = true;
|
||||||
hardware.graphics.enable = true;
|
|
||||||
nixpkgs.config.allowUnfree = true;
|
|
||||||
nixpkgs.config.nvidia.acceptLicense = true;
|
|
||||||
services.xserver.videoDrivers = [ "nvidia" ];
|
|
||||||
|
|
||||||
# Mount NVME disks
|
# Mount NVME disks
|
||||||
fileSystems."/nvme0" = { device = "/dev/disk/by-label/nvme0"; fsType = "ext4"; };
|
fileSystems."/nvme0" = { device = "/dev/disk/by-label/nvme0"; fsType = "ext4"; };
|
||||||
fileSystems."/nvme1" = { device = "/dev/disk/by-label/nvme1"; fsType = "ext4"; };
|
fileSystems."/nvme1" = { device = "/dev/disk/by-label/nvme1"; fsType = "ext4"; };
|
||||||
|
|
||||||
|
# Mount the NFS home
|
||||||
|
fileSystems."/nfs/home" = {
|
||||||
|
device = "10.106.0.30:/home";
|
||||||
|
fsType = "nfs";
|
||||||
|
options = [ "nfsvers=3" "rsize=1024" "wsize=1024" "cto" "nofail" ];
|
||||||
|
};
|
||||||
|
|
||||||
# Make a /nvme{0,1}/$USER directory for each user.
|
# Make a /nvme{0,1}/$USER directory for each user.
|
||||||
systemd.services.create-nvme-dirs = let
|
systemd.services.create-nvme-dirs = let
|
||||||
# Take only normal users in fox
|
# Take only normal users in fox
|
||||||
|
|||||||
53
m/fox/wireguard.nix
Normal file
53
m/fox/wireguard.nix
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
{ config, ... }:
|
||||||
|
|
||||||
|
{
|
||||||
|
networking.firewall = {
|
||||||
|
allowedUDPPorts = [ 666 ];
|
||||||
|
};
|
||||||
|
|
||||||
|
age.secrets.wgFox.file = ../../secrets/wg-fox.age;
|
||||||
|
|
||||||
|
networking.wireguard.enable = true;
|
||||||
|
networking.wireguard.interfaces = {
|
||||||
|
# "wg0" is the network interface name. You can name the interface arbitrarily.
|
||||||
|
wg0 = {
|
||||||
|
# Determines the IP address and subnet of the server's end of the tunnel interface.
|
||||||
|
ips = [ "10.106.0.1/24" ];
|
||||||
|
|
||||||
|
# The port that WireGuard listens to. Must be accessible by the client.
|
||||||
|
listenPort = 666;
|
||||||
|
|
||||||
|
# Path to the private key file.
|
||||||
|
privateKeyFile = config.age.secrets.wgFox.path;
|
||||||
|
# Public key: VfMPBQLQTKeyXJSwv8wBhc6OV0j2qAxUpX3kLHunK2Y=
|
||||||
|
|
||||||
|
peers = [
|
||||||
|
# List of allowed peers.
|
||||||
|
{
|
||||||
|
name = "apex";
|
||||||
|
publicKey = "VwhcN8vSOzdJEotQTpmPHBC52x3Hbv1lkFIyKubrnUA=";
|
||||||
|
# List of IPs assigned to this peer within the tunnel subnet. Used to configure routing.
|
||||||
|
allowedIPs = [ "10.106.0.30/32" ];
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "raccoon";
|
||||||
|
publicKey = "QUfnGXSMEgu2bviglsaSdCjidB51oEDBFpnSFcKGfDI=";
|
||||||
|
allowedIPs = [ "10.106.0.236/32" "192.168.0.0/16" "10.0.44.0/24" ];
|
||||||
|
}
|
||||||
|
];
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
networking.hosts = {
|
||||||
|
"10.106.0.30" = [ "apex" ];
|
||||||
|
"10.106.0.236" = [ "raccoon" ];
|
||||||
|
"10.0.44.4" = [ "tent" ];
|
||||||
|
};
|
||||||
|
|
||||||
|
networking.firewall = {
|
||||||
|
extraCommands = ''
|
||||||
|
# Accept slurm connections to slurmd from apex (via wireguard)
|
||||||
|
iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.30/32 -d 10.106.0.1/32 --dport 6818 -j nixos-fw-accept
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
}
|
||||||
@@ -3,160 +3,12 @@ modules:
|
|||||||
prober: http
|
prober: http
|
||||||
timeout: 5s
|
timeout: 5s
|
||||||
http:
|
http:
|
||||||
proxy_url: "http://127.0.0.1:23080"
|
|
||||||
skip_resolve_phase_with_proxy: true
|
|
||||||
follow_redirects: true
|
|
||||||
valid_status_codes: [] # Defaults to 2xx
|
|
||||||
method: GET
|
|
||||||
http_with_proxy:
|
|
||||||
prober: http
|
|
||||||
http:
|
|
||||||
proxy_url: "http://127.0.0.1:3128"
|
|
||||||
skip_resolve_phase_with_proxy: true
|
|
||||||
http_with_proxy_and_headers:
|
|
||||||
prober: http
|
|
||||||
http:
|
|
||||||
proxy_url: "http://127.0.0.1:3128"
|
|
||||||
proxy_connect_header:
|
|
||||||
Proxy-Authorization:
|
|
||||||
- Bearer token
|
|
||||||
http_post_2xx:
|
|
||||||
prober: http
|
|
||||||
timeout: 5s
|
|
||||||
http:
|
|
||||||
method: POST
|
|
||||||
headers:
|
|
||||||
Content-Type: application/json
|
|
||||||
body: '{}'
|
|
||||||
http_post_body_file:
|
|
||||||
prober: http
|
|
||||||
timeout: 5s
|
|
||||||
http:
|
|
||||||
method: POST
|
|
||||||
body_file: "/files/body.txt"
|
|
||||||
http_basic_auth_example:
|
|
||||||
prober: http
|
|
||||||
timeout: 5s
|
|
||||||
http:
|
|
||||||
method: POST
|
|
||||||
headers:
|
|
||||||
Host: "login.example.com"
|
|
||||||
basic_auth:
|
|
||||||
username: "username"
|
|
||||||
password: "mysecret"
|
|
||||||
http_2xx_oauth_client_credentials:
|
|
||||||
prober: http
|
|
||||||
timeout: 5s
|
|
||||||
http:
|
|
||||||
valid_http_versions: ["HTTP/1.1", "HTTP/2"]
|
|
||||||
follow_redirects: true
|
follow_redirects: true
|
||||||
preferred_ip_protocol: "ip4"
|
preferred_ip_protocol: "ip4"
|
||||||
valid_status_codes:
|
valid_status_codes: [] # Defaults to 2xx
|
||||||
- 200
|
|
||||||
- 201
|
|
||||||
oauth2:
|
|
||||||
client_id: "client_id"
|
|
||||||
client_secret: "client_secret"
|
|
||||||
token_url: "https://api.example.com/token"
|
|
||||||
endpoint_params:
|
|
||||||
grant_type: "client_credentials"
|
|
||||||
http_custom_ca_example:
|
|
||||||
prober: http
|
|
||||||
http:
|
|
||||||
method: GET
|
method: GET
|
||||||
tls_config:
|
|
||||||
ca_file: "/certs/my_cert.crt"
|
|
||||||
http_gzip:
|
|
||||||
prober: http
|
|
||||||
http:
|
|
||||||
method: GET
|
|
||||||
compression: gzip
|
|
||||||
http_gzip_with_accept_encoding:
|
|
||||||
prober: http
|
|
||||||
http:
|
|
||||||
method: GET
|
|
||||||
compression: gzip
|
|
||||||
headers:
|
|
||||||
Accept-Encoding: gzip
|
|
||||||
tls_connect:
|
|
||||||
prober: tcp
|
|
||||||
timeout: 5s
|
|
||||||
tcp:
|
|
||||||
tls: true
|
|
||||||
tcp_connect_example:
|
|
||||||
prober: tcp
|
|
||||||
timeout: 5s
|
|
||||||
imap_starttls:
|
|
||||||
prober: tcp
|
|
||||||
timeout: 5s
|
|
||||||
tcp:
|
|
||||||
query_response:
|
|
||||||
- expect: "OK.*STARTTLS"
|
|
||||||
- send: ". STARTTLS"
|
|
||||||
- expect: "OK"
|
|
||||||
- starttls: true
|
|
||||||
- send: ". capability"
|
|
||||||
- expect: "CAPABILITY IMAP4rev1"
|
|
||||||
smtp_starttls:
|
|
||||||
prober: tcp
|
|
||||||
timeout: 5s
|
|
||||||
tcp:
|
|
||||||
query_response:
|
|
||||||
- expect: "^220 ([^ ]+) ESMTP (.+)$"
|
|
||||||
- send: "EHLO prober\r"
|
|
||||||
- expect: "^250-STARTTLS"
|
|
||||||
- send: "STARTTLS\r"
|
|
||||||
- expect: "^220"
|
|
||||||
- starttls: true
|
|
||||||
- send: "EHLO prober\r"
|
|
||||||
- expect: "^250-AUTH"
|
|
||||||
- send: "QUIT\r"
|
|
||||||
irc_banner_example:
|
|
||||||
prober: tcp
|
|
||||||
timeout: 5s
|
|
||||||
tcp:
|
|
||||||
query_response:
|
|
||||||
- send: "NICK prober"
|
|
||||||
- send: "USER prober prober prober :prober"
|
|
||||||
- expect: "PING :([^ ]+)"
|
|
||||||
send: "PONG ${1}"
|
|
||||||
- expect: "^:[^ ]+ 001"
|
|
||||||
icmp:
|
icmp:
|
||||||
prober: icmp
|
prober: icmp
|
||||||
timeout: 5s
|
timeout: 5s
|
||||||
icmp:
|
icmp:
|
||||||
preferred_ip_protocol: "ip4"
|
preferred_ip_protocol: "ip4"
|
||||||
dns_udp_example:
|
|
||||||
prober: dns
|
|
||||||
timeout: 5s
|
|
||||||
dns:
|
|
||||||
query_name: "www.prometheus.io"
|
|
||||||
query_type: "A"
|
|
||||||
valid_rcodes:
|
|
||||||
- NOERROR
|
|
||||||
validate_answer_rrs:
|
|
||||||
fail_if_matches_regexp:
|
|
||||||
- ".*127.0.0.1"
|
|
||||||
fail_if_all_match_regexp:
|
|
||||||
- ".*127.0.0.1"
|
|
||||||
fail_if_not_matches_regexp:
|
|
||||||
- "www.prometheus.io.\t300\tIN\tA\t127.0.0.1"
|
|
||||||
fail_if_none_matches_regexp:
|
|
||||||
- "127.0.0.1"
|
|
||||||
validate_authority_rrs:
|
|
||||||
fail_if_matches_regexp:
|
|
||||||
- ".*127.0.0.1"
|
|
||||||
validate_additional_rrs:
|
|
||||||
fail_if_matches_regexp:
|
|
||||||
- ".*127.0.0.1"
|
|
||||||
dns_soa:
|
|
||||||
prober: dns
|
|
||||||
dns:
|
|
||||||
query_name: "prometheus.io"
|
|
||||||
query_type: "SOA"
|
|
||||||
dns_tcp_example:
|
|
||||||
prober: dns
|
|
||||||
dns:
|
|
||||||
transport_protocol: "tcp" # defaults to "udp"
|
|
||||||
preferred_ip_protocol: "ip4" # defaults to "ip6"
|
|
||||||
query_name: "www.prometheus.io"
|
|
||||||
|
|||||||
@@ -2,16 +2,14 @@
|
|||||||
|
|
||||||
{
|
{
|
||||||
imports = [
|
imports = [
|
||||||
../common/xeon.nix
|
../common/ssf.nix
|
||||||
|
|
||||||
../module/ceph.nix
|
../module/ceph.nix
|
||||||
../module/debuginfod.nix
|
../module/debuginfod.nix
|
||||||
../module/emulation.nix
|
../module/emulation.nix
|
||||||
../module/slurm-client.nix
|
|
||||||
./gitlab-runner.nix
|
./gitlab-runner.nix
|
||||||
./monitoring.nix
|
./monitoring.nix
|
||||||
./nfs.nix
|
./nfs.nix
|
||||||
./slurm-server.nix
|
|
||||||
./nix-serve.nix
|
./nix-serve.nix
|
||||||
./public-inbox.nix
|
./public-inbox.nix
|
||||||
./gitea.nix
|
./gitea.nix
|
||||||
|
|||||||
@@ -3,7 +3,10 @@
|
|||||||
{
|
{
|
||||||
imports = [
|
imports = [
|
||||||
../module/slurm-exporter.nix
|
../module/slurm-exporter.nix
|
||||||
|
../module/meteocat-exporter.nix
|
||||||
|
../module/upc-qaire-exporter.nix
|
||||||
./gpfs-probe.nix
|
./gpfs-probe.nix
|
||||||
|
../module/nix-daemon-exporter.nix
|
||||||
];
|
];
|
||||||
|
|
||||||
age.secrets.grafanaJungleRobotPassword = {
|
age.secrets.grafanaJungleRobotPassword = {
|
||||||
@@ -108,6 +111,9 @@
|
|||||||
"127.0.0.1:${toString config.services.prometheus.exporters.smartctl.port}"
|
"127.0.0.1:${toString config.services.prometheus.exporters.smartctl.port}"
|
||||||
"127.0.0.1:9341" # Slurm exporter
|
"127.0.0.1:9341" # Slurm exporter
|
||||||
"127.0.0.1:9966" # GPFS custom exporter
|
"127.0.0.1:9966" # GPFS custom exporter
|
||||||
|
"127.0.0.1:9999" # Nix-daemon custom exporter
|
||||||
|
"127.0.0.1:9929" # Meteocat custom exporter
|
||||||
|
"127.0.0.1:9928" # UPC Qaire custom exporter
|
||||||
"127.0.0.1:${toString config.services.prometheus.exporters.blackbox.port}"
|
"127.0.0.1:${toString config.services.prometheus.exporters.blackbox.port}"
|
||||||
];
|
];
|
||||||
}];
|
}];
|
||||||
@@ -163,6 +169,9 @@
|
|||||||
"8.8.8.8"
|
"8.8.8.8"
|
||||||
"ssfhead"
|
"ssfhead"
|
||||||
"anella-bsc.cesca.cat"
|
"anella-bsc.cesca.cat"
|
||||||
|
"upc-anella.cesca.cat"
|
||||||
|
"fox.ac.upc.edu"
|
||||||
|
"arenys5.ac.upc.edu"
|
||||||
];
|
];
|
||||||
}];
|
}];
|
||||||
relabel_configs = [
|
relabel_configs = [
|
||||||
@@ -258,17 +267,6 @@
|
|||||||
}
|
}
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
{
|
|
||||||
job_name = "ipmi-fox";
|
|
||||||
metrics_path = "/ipmi";
|
|
||||||
static_configs = [
|
|
||||||
{ targets = [ "127.0.0.1:9290" ]; }
|
|
||||||
];
|
|
||||||
params = {
|
|
||||||
target = [ "fox-ipmi" ];
|
|
||||||
module = [ "fox" ];
|
|
||||||
};
|
|
||||||
}
|
|
||||||
];
|
];
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,10 +2,13 @@
|
|||||||
let
|
let
|
||||||
website = pkgs.stdenv.mkDerivation {
|
website = pkgs.stdenv.mkDerivation {
|
||||||
name = "jungle-web";
|
name = "jungle-web";
|
||||||
src = theFlake;
|
src = pkgs.fetchgit {
|
||||||
|
url = "https://jungle.bsc.es/git/rarias/jungle-website.git";
|
||||||
|
rev = "739bf0175a7f05380fe7ad7023ff1d60db1710e1";
|
||||||
|
hash = "sha256-ea5DzhYTzZ9TmqD+x95rdNdLbxPnBluqlYH2NmBYmc4=";
|
||||||
|
};
|
||||||
buildInputs = [ pkgs.hugo ];
|
buildInputs = [ pkgs.hugo ];
|
||||||
buildPhase = ''
|
buildPhase = ''
|
||||||
cd web
|
|
||||||
rm -rf public/
|
rm -rf public/
|
||||||
hugo
|
hugo
|
||||||
'';
|
'';
|
||||||
|
|||||||
@@ -1,7 +0,0 @@
|
|||||||
{ ... }:
|
|
||||||
|
|
||||||
{
|
|
||||||
services.slurm = {
|
|
||||||
server.enable = true;
|
|
||||||
};
|
|
||||||
}
|
|
||||||
@@ -4,7 +4,7 @@
|
|||||||
- xeon03-ipmi
|
- xeon03-ipmi
|
||||||
- xeon04-ipmi
|
- xeon04-ipmi
|
||||||
- koro-ipmi
|
- koro-ipmi
|
||||||
- xeon06-ipmi
|
- weasel-ipmi
|
||||||
- hut-ipmi
|
- hut-ipmi
|
||||||
- eudy-ipmi
|
- eudy-ipmi
|
||||||
# Storage
|
# Storage
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
{
|
{
|
||||||
imports = [
|
imports = [
|
||||||
../common/xeon.nix
|
../common/ssf.nix
|
||||||
#(modulesPath + "/installer/netboot/netboot-minimal.nix")
|
#(modulesPath + "/installer/netboot/netboot-minimal.nix")
|
||||||
|
|
||||||
../eudy/cpufreq.nix
|
../eudy/cpufreq.nix
|
||||||
|
|||||||
@@ -2,8 +2,9 @@
|
|||||||
|
|
||||||
{
|
{
|
||||||
imports = [
|
imports = [
|
||||||
../common/xeon.nix
|
../common/ssf.nix
|
||||||
../module/monitoring.nix
|
../module/monitoring.nix
|
||||||
|
../module/hut-substituter.nix
|
||||||
];
|
];
|
||||||
|
|
||||||
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d53563a";
|
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d53563a";
|
||||||
|
|||||||
70
m/map.nix
Normal file
70
m/map.nix
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
{
|
||||||
|
# In physical order from top to bottom (see note below)
|
||||||
|
ssf = {
|
||||||
|
# Switches for Ethernet and OmniPath
|
||||||
|
switch-C6-S1A-05 = { pos=42; size=1; model="Dell S3048-ON"; };
|
||||||
|
switch-opa = { pos=41; size=1; };
|
||||||
|
|
||||||
|
# SSF login
|
||||||
|
apex = { pos=39; size=2; label="SSFHEAD"; board="R2208WTTYSR"; contact="rodrigo.arias@bsc.es"; };
|
||||||
|
|
||||||
|
# Storage
|
||||||
|
bay = { pos=38; size=1; label="MDS01"; board="S2600WT2R"; sn="BQWL64850303"; contact="rodrigo.arias@bsc.es"; };
|
||||||
|
lake1 = { pos=37; size=1; label="OSS01"; board="S2600WT2R"; sn="BQWL64850234"; contact="rodrigo.arias@bsc.es"; };
|
||||||
|
lake2 = { pos=36; size=1; label="OSS02"; board="S2600WT2R"; sn="BQWL64850266"; contact="rodrigo.arias@bsc.es"; };
|
||||||
|
|
||||||
|
# Compute xeon
|
||||||
|
owl1 = { pos=35; size=1; label="SSF-XEON01"; board="S2600WTTR"; sn="BQWL64954172"; contact="rodrigo.arias@bsc.es"; };
|
||||||
|
owl2 = { pos=34; size=1; label="SSF-XEON02"; board="S2600WTTR"; sn="BQWL64756560"; contact="rodrigo.arias@bsc.es"; };
|
||||||
|
xeon03 = { pos=33; size=1; label="SSF-XEON03"; board="S2600WTTR"; sn="BQWL64750826"; contact="rodrigo.arias@bsc.es"; };
|
||||||
|
# Slot 34 empty
|
||||||
|
koro = { pos=31; size=1; label="SSF-XEON05"; board="S2600WTTR"; sn="BQWL64954293"; contact="rodrigo.arias@bsc.es"; };
|
||||||
|
weasel = { pos=30; size=1; label="SSF-XEON06"; board="S2600WTTR"; sn="BQWL64750846"; contact="antoni.navarro@bsc.es"; };
|
||||||
|
hut = { pos=29; size=1; label="SSF-XEON07"; board="S2600WTTR"; sn="BQWL64751184"; contact="rodrigo.arias@bsc.es"; };
|
||||||
|
eudy = { pos=28; size=1; label="SSF-XEON08"; board="S2600WTTR"; sn="BQWL64756586"; contact="aleix.rocanonell@bsc.es"; };
|
||||||
|
|
||||||
|
# 16 KNL nodes, 4 per chassis
|
||||||
|
knl01_04 = { pos=26; size=2; label="KNL01..KNL04"; board="HNS7200APX"; };
|
||||||
|
knl05_08 = { pos=24; size=2; label="KNL05..KNL18"; board="HNS7200APX"; };
|
||||||
|
knl09_12 = { pos=22; size=2; label="KNL09..KNL12"; board="HNS7200APX"; };
|
||||||
|
knl13_16 = { pos=20; size=2; label="KNL13..KNL16"; board="HNS7200APX"; };
|
||||||
|
|
||||||
|
# Slot 19 empty
|
||||||
|
|
||||||
|
# EPI (hw team, guessed order)
|
||||||
|
epi01 = { pos=18; size=1; contact="joan.cabre@bsc.es"; };
|
||||||
|
epi02 = { pos=17; size=1; contact="joan.cabre@bsc.es"; };
|
||||||
|
epi03 = { pos=16; size=1; contact="joan.cabre@bsc.es"; };
|
||||||
|
anon = { pos=14; size=2; }; # Unlabeled machine. Operative
|
||||||
|
|
||||||
|
# These are old and decommissioned (off)
|
||||||
|
power8 = { pos=12; size=2; label="BSCPOWER8N3"; decommissioned=true; };
|
||||||
|
powern1 = { pos=8; size=4; label="BSCPOWERN1"; decommissioned=true; };
|
||||||
|
gustafson = { pos=7; size=1; label="gustafson"; decommissioned=true; };
|
||||||
|
odap01 = { pos=3; size=4; label="ODAP01"; decommissioned=true; };
|
||||||
|
amhdal = { pos=2; size=1; label="AMHDAL"; decommissioned=true; }; # sic
|
||||||
|
moore = { pos=1; size=1; label="moore (earth)"; decommissioned=true; };
|
||||||
|
};
|
||||||
|
|
||||||
|
bsc2218 = {
|
||||||
|
raccoon = { board="W2600CR"; sn="QSIP22500829"; contact="rodrigo.arias@bsc.es"; };
|
||||||
|
tent = { label="SSF-XEON04"; board="S2600WTTR"; sn="BQWL64751229"; contact="rodrigo.arias@bsc.es"; };
|
||||||
|
};
|
||||||
|
|
||||||
|
upc = {
|
||||||
|
fox = { board="H13DSG-O-CPU"; sn="UM24CS600392"; prod="AS-4125GS-TNRT"; prod_sn="E508839X5103339"; contact="rodrigo.arias@bsc.es"; };
|
||||||
|
};
|
||||||
|
|
||||||
|
# NOTE: Position is specified in "U" units (44.45 mm) and starts at 1 from the
|
||||||
|
# bottom. Example:
|
||||||
|
#
|
||||||
|
# | ... | - [pos+size] <--- Label in chassis
|
||||||
|
# +--------+
|
||||||
|
# | node | - [pos+1]
|
||||||
|
# | 2U | - [pos]
|
||||||
|
# +------- +
|
||||||
|
# | ... | - [pos-1]
|
||||||
|
#
|
||||||
|
# NOTE: The board and sn refers to the FRU information (Board Product and
|
||||||
|
# Board Serial) via `ipmitool fru print 0`.
|
||||||
|
}
|
||||||
49
m/module/amd-uprof.nix
Normal file
49
m/module/amd-uprof.nix
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
{ config, lib, pkgs, ... }:
|
||||||
|
|
||||||
|
{
|
||||||
|
options = {
|
||||||
|
services.amd-uprof = {
|
||||||
|
enable = lib.mkOption {
|
||||||
|
type = lib.types.bool;
|
||||||
|
default = false;
|
||||||
|
description = "Whether to enable AMD uProf.";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
# Only setup amd-uprof if enabled
|
||||||
|
config = lib.mkIf config.services.amd-uprof.enable {
|
||||||
|
|
||||||
|
# First make sure that we add the module to the list of available modules
|
||||||
|
# in the kernel matching the same kernel version of this configuration.
|
||||||
|
boot.extraModulePackages = with config.boot.kernelPackages; [ amd-uprof-driver ];
|
||||||
|
boot.kernelModules = [ "AMDPowerProfiler" ];
|
||||||
|
|
||||||
|
# Make the userspace tools available in $PATH.
|
||||||
|
environment.systemPackages = with pkgs; [ amd-uprof ];
|
||||||
|
|
||||||
|
# The AMDPowerProfiler module doesn't create the /dev device nor it emits
|
||||||
|
# any uevents, so we cannot use udev rules to automatically create the
|
||||||
|
# device. Instead, we run a systemd unit that does it after loading the
|
||||||
|
# modules.
|
||||||
|
systemd.services.amd-uprof-device = {
|
||||||
|
description = "Create /dev/AMDPowerProfiler device";
|
||||||
|
after = [ "systemd-modules-load.service" ];
|
||||||
|
wantedBy = [ "multi-user.target" ];
|
||||||
|
unitConfig.ConditionPathExists = [
|
||||||
|
"/proc/AMDPowerProfiler/device"
|
||||||
|
"!/dev/AMDPowerProfiler"
|
||||||
|
];
|
||||||
|
serviceConfig = {
|
||||||
|
Type = "oneshot";
|
||||||
|
RemainAfterExit = true;
|
||||||
|
ExecStart = pkgs.writeShellScript "add-amd-uprof-dev.sh" ''
|
||||||
|
mknod /dev/AMDPowerProfiler -m 666 c $(< /proc/AMDPowerProfiler/device) 0
|
||||||
|
'';
|
||||||
|
ExecStop = pkgs.writeShellScript "remove-amd-uprof-dev.sh" ''
|
||||||
|
rm -f /dev/AMDPowerProfiler
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
}
|
||||||
@@ -4,7 +4,10 @@
|
|||||||
# Don't add hut as a cache to itself
|
# Don't add hut as a cache to itself
|
||||||
assert config.networking.hostName != "hut";
|
assert config.networking.hostName != "hut";
|
||||||
{
|
{
|
||||||
substituters = [ "http://hut/cache" ];
|
extra-substituters = [ "http://hut/cache" ];
|
||||||
trusted-public-keys = [ "jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=" ];
|
extra-trusted-public-keys = [ "jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=" ];
|
||||||
|
|
||||||
|
# Set a low timeout in case hut is down
|
||||||
|
connect-timeout = 3; # seconds
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
17
m/module/meteocat-exporter.nix
Normal file
17
m/module/meteocat-exporter.nix
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
{ config, lib, pkgs, ... }:
|
||||||
|
|
||||||
|
with lib;
|
||||||
|
|
||||||
|
{
|
||||||
|
systemd.services."prometheus-meteocat-exporter" = {
|
||||||
|
wantedBy = [ "multi-user.target" ];
|
||||||
|
after = [ "network.target" ];
|
||||||
|
serviceConfig = {
|
||||||
|
Restart = mkDefault "always";
|
||||||
|
PrivateTmp = mkDefault true;
|
||||||
|
WorkingDirectory = mkDefault "/tmp";
|
||||||
|
DynamicUser = mkDefault true;
|
||||||
|
ExecStart = "${pkgs.meteocat-exporter}/bin/meteocat-exporter";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
}
|
||||||
26
m/module/nix-daemon-builds.sh
Executable file
26
m/module/nix-daemon-builds.sh
Executable file
@@ -0,0 +1,26 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
# Locate nix daemon pid
|
||||||
|
nd=$(pgrep -o nix-daemon)
|
||||||
|
|
||||||
|
# Locate children of nix-daemon
|
||||||
|
pids1=$(tr ' ' '\n' < "/proc/$nd/task/$nd/children")
|
||||||
|
|
||||||
|
# For each children, locate 2nd level children
|
||||||
|
pids2=$(echo "$pids1" | xargs -I @ /bin/sh -c 'cat /proc/@/task/*/children' | tr ' ' '\n')
|
||||||
|
|
||||||
|
cat <<EOF
|
||||||
|
HTTP/1.1 200 OK
|
||||||
|
Content-Type: text/plain; version=0.0.4; charset=utf-8; escaping=values
|
||||||
|
|
||||||
|
# HELP nix_daemon_build Nix daemon derivation build state.
|
||||||
|
# TYPE nix_daemon_build gauge
|
||||||
|
EOF
|
||||||
|
|
||||||
|
for pid in $pids2; do
|
||||||
|
name=$(cat /proc/$pid/environ 2>/dev/null | tr '\0' '\n' | rg "^name=(.+)" - --replace '$1' | tr -dc ' [:alnum:]_\-\.')
|
||||||
|
user=$(ps -o uname= -p "$pid")
|
||||||
|
if [ -n "$name" -a -n "$user" ]; then
|
||||||
|
printf 'nix_daemon_build{user="%s",name="%s"} 1\n' "$user" "$name"
|
||||||
|
fi
|
||||||
|
done
|
||||||
23
m/module/nix-daemon-exporter.nix
Normal file
23
m/module/nix-daemon-exporter.nix
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
{ pkgs, config, lib, ... }:
|
||||||
|
let
|
||||||
|
script = pkgs.runCommand "nix-daemon-exporter.sh" { }
|
||||||
|
''
|
||||||
|
cp ${./nix-daemon-builds.sh} $out;
|
||||||
|
chmod +x $out
|
||||||
|
''
|
||||||
|
;
|
||||||
|
in
|
||||||
|
{
|
||||||
|
systemd.services.nix-daemon-exporter = {
|
||||||
|
description = "Daemon to export nix-daemon metrics";
|
||||||
|
path = [ pkgs.procps pkgs.ripgrep ];
|
||||||
|
wantedBy = [ "default.target" ];
|
||||||
|
serviceConfig = {
|
||||||
|
Type = "simple";
|
||||||
|
ExecStart = "${pkgs.socat}/bin/socat TCP4-LISTEN:9999,fork EXEC:${script}";
|
||||||
|
# Needed root to read the environment, potentially unsafe
|
||||||
|
User = "root";
|
||||||
|
Group = "root";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
}
|
||||||
20
m/module/nvidia.nix
Normal file
20
m/module/nvidia.nix
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
{ lib, config, pkgs, ... }:
|
||||||
|
{
|
||||||
|
# Configure Nvidia driver to use with CUDA
|
||||||
|
hardware.nvidia.package = config.boot.kernelPackages.nvidiaPackages.production;
|
||||||
|
hardware.nvidia.open = lib.mkDefault (builtins.abort "hardware.nvidia.open not set");
|
||||||
|
hardware.graphics.enable = true;
|
||||||
|
nixpkgs.config.nvidia.acceptLicense = true;
|
||||||
|
services.xserver.videoDrivers = [ "nvidia" ];
|
||||||
|
|
||||||
|
# enable support for derivations which require nvidia-gpu to be available
|
||||||
|
# > requiredSystemFeatures = [ "cuda" ];
|
||||||
|
programs.nix-required-mounts.enable = true;
|
||||||
|
programs.nix-required-mounts.presets.nvidia-gpu.enable = true;
|
||||||
|
# They forgot to add the symlink
|
||||||
|
programs.nix-required-mounts.allowedPatterns.nvidia-gpu.paths = [
|
||||||
|
config.systemd.tmpfiles.settings.graphics-driver."/run/opengl-driver"."L+".argument
|
||||||
|
];
|
||||||
|
|
||||||
|
environment.systemPackages = [ pkgs.cudainfo ];
|
||||||
|
}
|
||||||
68
m/module/p.nix
Normal file
68
m/module/p.nix
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
{ config, lib, pkgs, ... }:
|
||||||
|
|
||||||
|
let
|
||||||
|
cfg = config.services.p;
|
||||||
|
in
|
||||||
|
{
|
||||||
|
options = {
|
||||||
|
services.p = {
|
||||||
|
enable = lib.mkOption {
|
||||||
|
type = lib.types.bool;
|
||||||
|
default = false;
|
||||||
|
description = "Whether to enable the p service.";
|
||||||
|
};
|
||||||
|
path = lib.mkOption {
|
||||||
|
type = lib.types.str;
|
||||||
|
default = "/var/lib/p";
|
||||||
|
description = "Where to save the pasted files on disk.";
|
||||||
|
};
|
||||||
|
url = lib.mkOption {
|
||||||
|
type = lib.types.str;
|
||||||
|
default = "https://jungle.bsc.es/p";
|
||||||
|
description = "URL prefix for the printed file.";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
config = lib.mkIf cfg.enable {
|
||||||
|
environment.systemPackages = let
|
||||||
|
p = pkgs.writeShellScriptBin "p" ''
|
||||||
|
set -e
|
||||||
|
pastedir="${cfg.path}/$USER"
|
||||||
|
cd "$pastedir"
|
||||||
|
|
||||||
|
ext="txt"
|
||||||
|
if [ -n "$1" ]; then
|
||||||
|
ext="$1"
|
||||||
|
fi
|
||||||
|
|
||||||
|
out=$(mktemp "XXXXXXXX.$ext")
|
||||||
|
cat > "$out"
|
||||||
|
chmod go+r "$out"
|
||||||
|
echo "${cfg.url}/$USER/$out"
|
||||||
|
'';
|
||||||
|
in [ p ];
|
||||||
|
|
||||||
|
systemd.services.p = let
|
||||||
|
# Take only normal users
|
||||||
|
users = lib.filterAttrs (_: v: v.isNormalUser) config.users.users;
|
||||||
|
# Create a directory for each user
|
||||||
|
commands = lib.concatLists (lib.mapAttrsToList (_: user: [
|
||||||
|
"install -d -o ${user.name} -g ${user.group} -m 0755 ${cfg.path}/${user.name}"
|
||||||
|
]) users);
|
||||||
|
in {
|
||||||
|
description = "P service setup";
|
||||||
|
requires = [ "network-online.target" ];
|
||||||
|
#wants = [ "remote-fs.target" ];
|
||||||
|
#after = [ "remote-fs.target" ];
|
||||||
|
wantedBy = [ "multi-user.target" ];
|
||||||
|
serviceConfig = {
|
||||||
|
ExecStart = pkgs.writeShellScript "p-init.sh" (''
|
||||||
|
|
||||||
|
install -d -o root -g root -m 0755 ${cfg.path}
|
||||||
|
|
||||||
|
'' + (lib.concatLines commands));
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
}
|
||||||
33
m/module/power-policy.nix
Normal file
33
m/module/power-policy.nix
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
{ config, lib, pkgs, ... }:
|
||||||
|
|
||||||
|
with lib;
|
||||||
|
|
||||||
|
let
|
||||||
|
cfg = config.power.policy;
|
||||||
|
in
|
||||||
|
{
|
||||||
|
options = {
|
||||||
|
power.policy = mkOption {
|
||||||
|
type = types.nullOr (types.enum [ "always-on" "previous" "always-off" ]);
|
||||||
|
default = null;
|
||||||
|
description = "Set power policy to use via IPMI.";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
config = mkIf (cfg != null) {
|
||||||
|
systemd.services."power-policy" = {
|
||||||
|
description = "Set power policy to use via IPMI";
|
||||||
|
wantedBy = [ "multi-user.target" ];
|
||||||
|
unitConfig = {
|
||||||
|
StartLimitBurst = "10";
|
||||||
|
StartLimitIntervalSec = "10m";
|
||||||
|
};
|
||||||
|
serviceConfig = {
|
||||||
|
ExecStart = "${pkgs.ipmitool}/bin/ipmitool chassis policy ${cfg}";
|
||||||
|
Type = "oneshot";
|
||||||
|
Restart = "on-failure";
|
||||||
|
RestartSec = "5s";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
}
|
||||||
@@ -1,33 +1,10 @@
|
|||||||
{ config, pkgs, lib, ... }:
|
{ lib, ... }:
|
||||||
|
|
||||||
let
|
{
|
||||||
suspendProgram = pkgs.writeScript "suspend.sh" ''
|
imports = [
|
||||||
#!/usr/bin/env bash
|
./slurm-common.nix
|
||||||
exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log
|
];
|
||||||
set -x
|
|
||||||
export "PATH=/run/current-system/sw/bin:$PATH"
|
|
||||||
echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log
|
|
||||||
hosts=$(scontrol show hostnames $1)
|
|
||||||
for host in $hosts; do
|
|
||||||
echo Shutting down host: $host
|
|
||||||
ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power off
|
|
||||||
done
|
|
||||||
'';
|
|
||||||
|
|
||||||
resumeProgram = pkgs.writeScript "resume.sh" ''
|
|
||||||
#!/usr/bin/env bash
|
|
||||||
exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log
|
|
||||||
set -x
|
|
||||||
export "PATH=/run/current-system/sw/bin:$PATH"
|
|
||||||
echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log
|
|
||||||
hosts=$(scontrol show hostnames $1)
|
|
||||||
for host in $hosts; do
|
|
||||||
echo Starting host: $host
|
|
||||||
ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power on
|
|
||||||
done
|
|
||||||
'';
|
|
||||||
|
|
||||||
in {
|
|
||||||
systemd.services.slurmd.serviceConfig = {
|
systemd.services.slurmd.serviceConfig = {
|
||||||
# Kill all processes in the control group on stop/restart. This will kill
|
# Kill all processes in the control group on stop/restart. This will kill
|
||||||
# all the jobs running, so ensure that we only upgrade when the nodes are
|
# all the jobs running, so ensure that we only upgrade when the nodes are
|
||||||
@@ -35,94 +12,13 @@ in {
|
|||||||
# https://github.com/NixOS/nixpkgs/commit/ae93ed0f0d4e7be0a286d1fca86446318c0c6ffb
|
# https://github.com/NixOS/nixpkgs/commit/ae93ed0f0d4e7be0a286d1fca86446318c0c6ffb
|
||||||
# https://bugs.schedmd.com/show_bug.cgi?id=2095#c24
|
# https://bugs.schedmd.com/show_bug.cgi?id=2095#c24
|
||||||
KillMode = lib.mkForce "control-group";
|
KillMode = lib.mkForce "control-group";
|
||||||
|
|
||||||
|
# If slurmd fails to contact the control server it will fail, causing the
|
||||||
|
# node to remain out of service until manually restarted. Always try to
|
||||||
|
# restart it.
|
||||||
|
Restart = "always";
|
||||||
|
RestartSec = "30s";
|
||||||
};
|
};
|
||||||
|
|
||||||
services.slurm = {
|
services.slurm.client.enable = true;
|
||||||
client.enable = true;
|
|
||||||
controlMachine = "hut";
|
|
||||||
clusterName = "jungle";
|
|
||||||
nodeName = [
|
|
||||||
"owl[1,2] Sockets=2 CoresPerSocket=14 ThreadsPerCore=2 Feature=owl"
|
|
||||||
"fox Sockets=2 CoresPerSocket=96 ThreadsPerCore=1 Feature=fox"
|
|
||||||
"hut Sockets=2 CoresPerSocket=14 ThreadsPerCore=2"
|
|
||||||
];
|
|
||||||
|
|
||||||
partitionName = [
|
|
||||||
"owl Nodes=owl[1-2] Default=YES DefaultTime=01:00:00 MaxTime=INFINITE State=UP"
|
|
||||||
"fox Nodes=fox Default=NO DefaultTime=01:00:00 MaxTime=INFINITE State=UP"
|
|
||||||
];
|
|
||||||
|
|
||||||
# See slurm.conf(5) for more details about these options.
|
|
||||||
extraConfig = ''
|
|
||||||
# Use PMIx for MPI by default. It works okay with MPICH and OpenMPI, but
|
|
||||||
# not with Intel MPI. For that use the compatibility shim libpmi.so
|
|
||||||
# setting I_MPI_PMI_LIBRARY=$pmix/lib/libpmi.so while maintaining the PMIx
|
|
||||||
# library in SLURM (--mpi=pmix). See more details here:
|
|
||||||
# https://pm.bsc.es/gitlab/rarias/jungle/-/issues/16
|
|
||||||
MpiDefault=pmix
|
|
||||||
|
|
||||||
# When a node reboots return that node to the slurm queue as soon as it
|
|
||||||
# becomes operative again.
|
|
||||||
ReturnToService=2
|
|
||||||
|
|
||||||
# Track all processes by using a cgroup
|
|
||||||
ProctrackType=proctrack/cgroup
|
|
||||||
|
|
||||||
# Enable task/affinity to allow the jobs to run in a specified subset of
|
|
||||||
# the resources. Use the task/cgroup plugin to enable process containment.
|
|
||||||
TaskPlugin=task/affinity,task/cgroup
|
|
||||||
|
|
||||||
# Power off unused nodes until they are requested
|
|
||||||
SuspendProgram=${suspendProgram}
|
|
||||||
SuspendTimeout=60
|
|
||||||
ResumeProgram=${resumeProgram}
|
|
||||||
ResumeTimeout=300
|
|
||||||
SuspendExcNodes=hut,fox
|
|
||||||
|
|
||||||
# Turn the nodes off after 1 hour of inactivity
|
|
||||||
SuspendTime=3600
|
|
||||||
|
|
||||||
# Reduce port range so we can allow only this range in the firewall
|
|
||||||
SrunPortRange=60000-61000
|
|
||||||
|
|
||||||
# Use cores as consumable resources. In SLURM terms, a core may have
|
|
||||||
# multiple hardware threads (or CPUs).
|
|
||||||
SelectType=select/cons_tres
|
|
||||||
|
|
||||||
# Ignore memory constraints and only use unused cores to share a node with
|
|
||||||
# other jobs.
|
|
||||||
SelectTypeParameters=CR_Core
|
|
||||||
|
|
||||||
# Required for pam_slurm_adopt, see https://slurm.schedmd.com/pam_slurm_adopt.html
|
|
||||||
# This sets up the "extern" step into which ssh-launched processes will be
|
|
||||||
# adopted. Alloc runs the prolog at job allocation (salloc) rather than
|
|
||||||
# when a task runs (srun) so we can ssh early.
|
|
||||||
PrologFlags=Alloc,Contain,X11
|
|
||||||
|
|
||||||
# LaunchParameters=ulimit_pam_adopt will set RLIMIT_RSS in processes
|
|
||||||
# adopted by the external step, similar to tasks running in regular steps
|
|
||||||
# LaunchParameters=ulimit_pam_adopt
|
|
||||||
SlurmdDebug=debug5
|
|
||||||
#DebugFlags=Protocol,Cgroup
|
|
||||||
'';
|
|
||||||
|
|
||||||
extraCgroupConfig = ''
|
|
||||||
CgroupPlugin=cgroup/v2
|
|
||||||
#ConstrainCores=yes
|
|
||||||
'';
|
|
||||||
};
|
|
||||||
|
|
||||||
# Place the slurm config in /etc as this will be required by PAM
|
|
||||||
environment.etc.slurm.source = config.services.slurm.etcSlurm;
|
|
||||||
|
|
||||||
age.secrets.mungeKey = {
|
|
||||||
file = ../../secrets/munge-key.age;
|
|
||||||
owner = "munge";
|
|
||||||
group = "munge";
|
|
||||||
};
|
|
||||||
|
|
||||||
services.munge = {
|
|
||||||
enable = true;
|
|
||||||
password = config.age.secrets.mungeKey.path;
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|||||||
115
m/module/slurm-common.nix
Normal file
115
m/module/slurm-common.nix
Normal file
@@ -0,0 +1,115 @@
|
|||||||
|
{ config, pkgs, ... }:
|
||||||
|
|
||||||
|
let
|
||||||
|
suspendProgram = pkgs.writeShellScript "suspend.sh" ''
|
||||||
|
exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log
|
||||||
|
set -x
|
||||||
|
export "PATH=/run/current-system/sw/bin:$PATH"
|
||||||
|
echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log
|
||||||
|
hosts=$(scontrol show hostnames $1)
|
||||||
|
for host in $hosts; do
|
||||||
|
echo Shutting down host: $host
|
||||||
|
ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power off
|
||||||
|
done
|
||||||
|
'';
|
||||||
|
|
||||||
|
resumeProgram = pkgs.writeShellScript "resume.sh" ''
|
||||||
|
exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log
|
||||||
|
set -x
|
||||||
|
export "PATH=/run/current-system/sw/bin:$PATH"
|
||||||
|
echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log
|
||||||
|
hosts=$(scontrol show hostnames $1)
|
||||||
|
for host in $hosts; do
|
||||||
|
echo Starting host: $host
|
||||||
|
ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power on
|
||||||
|
done
|
||||||
|
'';
|
||||||
|
|
||||||
|
in {
|
||||||
|
services.slurm = {
|
||||||
|
controlMachine = "apex";
|
||||||
|
clusterName = "jungle";
|
||||||
|
nodeName = [
|
||||||
|
"owl[1,2] Sockets=2 CoresPerSocket=14 ThreadsPerCore=2 Feature=owl"
|
||||||
|
"fox Sockets=8 CoresPerSocket=24 ThreadsPerCore=1"
|
||||||
|
];
|
||||||
|
|
||||||
|
partitionName = [
|
||||||
|
"owl Nodes=owl[1-2] Default=YES DefaultTime=01:00:00 MaxTime=INFINITE State=UP"
|
||||||
|
"fox Nodes=fox Default=NO DefaultTime=01:00:00 MaxTime=INFINITE State=UP"
|
||||||
|
];
|
||||||
|
|
||||||
|
# See slurm.conf(5) for more details about these options.
|
||||||
|
extraConfig = ''
|
||||||
|
# Use PMIx for MPI by default. It works okay with MPICH and OpenMPI, but
|
||||||
|
# not with Intel MPI. For that use the compatibility shim libpmi.so
|
||||||
|
# setting I_MPI_PMI_LIBRARY=$pmix/lib/libpmi.so while maintaining the PMIx
|
||||||
|
# library in SLURM (--mpi=pmix). See more details here:
|
||||||
|
# https://pm.bsc.es/gitlab/rarias/jungle/-/issues/16
|
||||||
|
MpiDefault=pmix
|
||||||
|
|
||||||
|
# When a node reboots return that node to the slurm queue as soon as it
|
||||||
|
# becomes operative again.
|
||||||
|
ReturnToService=2
|
||||||
|
|
||||||
|
# Track all processes by using a cgroup
|
||||||
|
ProctrackType=proctrack/cgroup
|
||||||
|
|
||||||
|
# Enable task/affinity to allow the jobs to run in a specified subset of
|
||||||
|
# the resources. Use the task/cgroup plugin to enable process containment.
|
||||||
|
TaskPlugin=task/affinity,task/cgroup
|
||||||
|
|
||||||
|
# Power off unused nodes until they are requested
|
||||||
|
SuspendProgram=${suspendProgram}
|
||||||
|
SuspendTimeout=60
|
||||||
|
ResumeProgram=${resumeProgram}
|
||||||
|
ResumeTimeout=300
|
||||||
|
SuspendExcNodes=fox
|
||||||
|
|
||||||
|
# Turn the nodes off after 1 hour of inactivity
|
||||||
|
SuspendTime=3600
|
||||||
|
|
||||||
|
# Reduce port range so we can allow only this range in the firewall
|
||||||
|
SrunPortRange=60000-61000
|
||||||
|
|
||||||
|
# Use cores as consumable resources. In SLURM terms, a core may have
|
||||||
|
# multiple hardware threads (or CPUs).
|
||||||
|
SelectType=select/cons_tres
|
||||||
|
|
||||||
|
# Ignore memory constraints and only use unused cores to share a node with
|
||||||
|
# other jobs.
|
||||||
|
SelectTypeParameters=CR_Core
|
||||||
|
|
||||||
|
# Required for pam_slurm_adopt, see https://slurm.schedmd.com/pam_slurm_adopt.html
|
||||||
|
# This sets up the "extern" step into which ssh-launched processes will be
|
||||||
|
# adopted. Alloc runs the prolog at job allocation (salloc) rather than
|
||||||
|
# when a task runs (srun) so we can ssh early.
|
||||||
|
PrologFlags=Alloc,Contain,X11
|
||||||
|
|
||||||
|
# LaunchParameters=ulimit_pam_adopt will set RLIMIT_RSS in processes
|
||||||
|
# adopted by the external step, similar to tasks running in regular steps
|
||||||
|
# LaunchParameters=ulimit_pam_adopt
|
||||||
|
SlurmdDebug=debug5
|
||||||
|
#DebugFlags=Protocol,Cgroup
|
||||||
|
'';
|
||||||
|
|
||||||
|
extraCgroupConfig = ''
|
||||||
|
CgroupPlugin=cgroup/v2
|
||||||
|
#ConstrainCores=yes
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
|
||||||
|
# Place the slurm config in /etc as this will be required by PAM
|
||||||
|
environment.etc.slurm.source = config.services.slurm.etcSlurm;
|
||||||
|
|
||||||
|
age.secrets.mungeKey = {
|
||||||
|
file = ../../secrets/munge-key.age;
|
||||||
|
owner = "munge";
|
||||||
|
group = "munge";
|
||||||
|
};
|
||||||
|
|
||||||
|
services.munge = {
|
||||||
|
enable = true;
|
||||||
|
password = config.age.secrets.mungeKey.path;
|
||||||
|
};
|
||||||
|
}
|
||||||
23
m/module/slurm-server.nix
Normal file
23
m/module/slurm-server.nix
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
{ ... }:
|
||||||
|
|
||||||
|
{
|
||||||
|
imports = [
|
||||||
|
./slurm-common.nix
|
||||||
|
];
|
||||||
|
|
||||||
|
services.slurm.server.enable = true;
|
||||||
|
|
||||||
|
networking.firewall = {
|
||||||
|
extraCommands = ''
|
||||||
|
# Accept slurm connections to controller from compute nodes
|
||||||
|
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 6817 -j nixos-fw-accept
|
||||||
|
# Accept slurm connections from compute nodes for srun
|
||||||
|
iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 60000:61000 -j nixos-fw-accept
|
||||||
|
|
||||||
|
# Accept slurm connections to controller from fox (via wireguard)
|
||||||
|
iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.1/32 --dport 6817 -j nixos-fw-accept
|
||||||
|
# Accept slurm connections from fox for srun (via wireguard)
|
||||||
|
iptables -A nixos-fw -p tcp -i wg0 -s 10.106.0.1/32 --dport 60000:61000 -j nixos-fw-accept
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
}
|
||||||
17
m/module/upc-qaire-exporter.nix
Normal file
17
m/module/upc-qaire-exporter.nix
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
{ config, lib, pkgs, ... }:
|
||||||
|
|
||||||
|
with lib;
|
||||||
|
|
||||||
|
{
|
||||||
|
systemd.services."prometheus-upc-qaire-exporter" = {
|
||||||
|
wantedBy = [ "multi-user.target" ];
|
||||||
|
after = [ "network.target" ];
|
||||||
|
serviceConfig = {
|
||||||
|
Restart = mkDefault "always";
|
||||||
|
PrivateTmp = mkDefault true;
|
||||||
|
WorkingDirectory = mkDefault "/tmp";
|
||||||
|
DynamicUser = mkDefault true;
|
||||||
|
ExecStart = "${pkgs.upc-qaire-exporter}/bin/upc-qaire-exporter";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
}
|
||||||
35
m/module/vpn-dac.nix
Normal file
35
m/module/vpn-dac.nix
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
{config, ...}:
|
||||||
|
{
|
||||||
|
age.secrets.vpn-dac-login.file = ../../secrets/vpn-dac-login.age;
|
||||||
|
age.secrets.vpn-dac-client-key.file = ../../secrets/vpn-dac-client-key.age;
|
||||||
|
|
||||||
|
services.openvpn.servers = {
|
||||||
|
# systemctl status openvpn-dac.service
|
||||||
|
dac = {
|
||||||
|
config = ''
|
||||||
|
client
|
||||||
|
dev tun
|
||||||
|
proto tcp
|
||||||
|
remote vpn.ac.upc.edu 1194
|
||||||
|
remote vpn.ac.upc.edu 80
|
||||||
|
resolv-retry infinite
|
||||||
|
nobind
|
||||||
|
persist-key
|
||||||
|
persist-tun
|
||||||
|
ca ${./vpn-dac/ca.crt}
|
||||||
|
cert ${./vpn-dac/client.crt}
|
||||||
|
# Only key needs to be secret
|
||||||
|
key ${config.age.secrets.vpn-dac-client-key.path}
|
||||||
|
remote-cert-tls server
|
||||||
|
comp-lzo
|
||||||
|
verb 3
|
||||||
|
auth-user-pass ${config.age.secrets.vpn-dac-login.path}
|
||||||
|
reneg-sec 0
|
||||||
|
|
||||||
|
# Only route fox-ipmi
|
||||||
|
pull-filter ignore "route "
|
||||||
|
route 147.83.35.27 255.255.255.255
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
};
|
||||||
|
}
|
||||||
31
m/module/vpn-dac/ca.crt
Normal file
31
m/module/vpn-dac/ca.crt
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
-----BEGIN CERTIFICATE-----
|
||||||
|
MIIFUjCCBDqgAwIBAgIJAJH118PApk5hMA0GCSqGSIb3DQEBCwUAMIHLMQswCQYD
|
||||||
|
VQQGEwJFUzESMBAGA1UECBMJQmFyY2Vsb25hMRIwEAYDVQQHEwlCYXJjZWxvbmEx
|
||||||
|
LTArBgNVBAoTJFVuaXZlcnNpdGF0IFBvbGl0ZWNuaWNhIGRlIENhdGFsdW55YTEk
|
||||||
|
MCIGA1UECxMbQXJxdWl0ZWN0dXJhIGRlIENvbXB1dGFkb3JzMRAwDgYDVQQDEwdM
|
||||||
|
Q0FDIENBMQ0wCwYDVQQpEwRMQ0FDMR4wHAYJKoZIhvcNAQkBFg9sY2FjQGFjLnVw
|
||||||
|
Yy5lZHUwHhcNMTYwMTEyMTI0NDIxWhcNNDYwMTEyMTI0NDIxWjCByzELMAkGA1UE
|
||||||
|
BhMCRVMxEjAQBgNVBAgTCUJhcmNlbG9uYTESMBAGA1UEBxMJQmFyY2Vsb25hMS0w
|
||||||
|
KwYDVQQKEyRVbml2ZXJzaXRhdCBQb2xpdGVjbmljYSBkZSBDYXRhbHVueWExJDAi
|
||||||
|
BgNVBAsTG0FycXVpdGVjdHVyYSBkZSBDb21wdXRhZG9yczEQMA4GA1UEAxMHTENB
|
||||||
|
QyBDQTENMAsGA1UEKRMETENBQzEeMBwGCSqGSIb3DQEJARYPbGNhY0BhYy51cGMu
|
||||||
|
ZWR1MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA0CteSeof7Xwi51kC
|
||||||
|
F0nQ4E9iR5Lq7wtfRuVPn6JJcIxJJ6+F9gr4R/HIHTztW4XAzReE36DYfexupx3D
|
||||||
|
6UgQIkMLlVyGqRbulNF+RnCx20GosF7Dm4RGBVvOxBP1PGjYq/A+XhaaDAFd0cOF
|
||||||
|
LMNkzuYP7PF0bnBEaHnxmN8bPmuyDyas7fK9AAc3scyWT2jSBPbOVFvCJwPg8MH9
|
||||||
|
V/h+hKwL/7hRt1MVfVv2qyIuKwTki8mUt0RcVbP7oJoRY5K1+R52phIz/GL/b4Fx
|
||||||
|
L6MKXlQxLi8vzP4QZXgCMyV7oFNdU3VqCEXBA11YIRvsOZ4QS19otIk/ZWU5x+HH
|
||||||
|
LAIJ7wIDAQABo4IBNTCCATEwHQYDVR0OBBYEFNyezX1cH1N4QR14ebBpljqmtE7q
|
||||||
|
MIIBAAYDVR0jBIH4MIH1gBTcns19XB9TeEEdeHmwaZY6prRO6qGB0aSBzjCByzEL
|
||||||
|
MAkGA1UEBhMCRVMxEjAQBgNVBAgTCUJhcmNlbG9uYTESMBAGA1UEBxMJQmFyY2Vs
|
||||||
|
b25hMS0wKwYDVQQKEyRVbml2ZXJzaXRhdCBQb2xpdGVjbmljYSBkZSBDYXRhbHVu
|
||||||
|
eWExJDAiBgNVBAsTG0FycXVpdGVjdHVyYSBkZSBDb21wdXRhZG9yczEQMA4GA1UE
|
||||||
|
AxMHTENBQyBDQTENMAsGA1UEKRMETENBQzEeMBwGCSqGSIb3DQEJARYPbGNhY0Bh
|
||||||
|
Yy51cGMuZWR1ggkAkfXXw8CmTmEwDAYDVR0TBAUwAwEB/zANBgkqhkiG9w0BAQsF
|
||||||
|
AAOCAQEAUAmOvVXIQrR+aZVO0bOTeugKBHB75eTIZSIHIn2oDUvDbAP5GXIJ56A1
|
||||||
|
6mZXxemSMY8/9k+pRcwJhfat3IgvAN159XSqf9kRv0NHgc3FWUI1Qv/BsAn0vJO/
|
||||||
|
oK0dbmbbRWqt86qNrCN+cUfz5aovvxN73jFfnvfDQFBk/8enj9wXxYfokjjLPR1Q
|
||||||
|
+oTkH8dY68qf71oaUB9MndppPEPSz0K1S6h1XxvJoSu9MVSXOQHiq1cdZdxRazI3
|
||||||
|
4f7q9sTCL+khwDAuZxAYzlEYxFFa/NN8PWU6xPw6V+t/aDhOiXUPJQB/O/K7mw3Z
|
||||||
|
TQQx5NqM7B5jjak5fauR3/oRD8XXsA==
|
||||||
|
-----END CERTIFICATE-----
|
||||||
100
m/module/vpn-dac/client.crt
Normal file
100
m/module/vpn-dac/client.crt
Normal file
@@ -0,0 +1,100 @@
|
|||||||
|
Certificate:
|
||||||
|
Data:
|
||||||
|
Version: 3 (0x2)
|
||||||
|
Serial Number: 2 (0x2)
|
||||||
|
Signature Algorithm: sha256WithRSAEncryption
|
||||||
|
Issuer: C=ES, ST=Barcelona, L=Barcelona, O=Universitat Politecnica de Catalunya, OU=Arquitectura de Computadors, CN=LCAC CA/name=LCAC/emailAddress=lcac@ac.upc.edu
|
||||||
|
Validity
|
||||||
|
Not Before: Jan 12 12:45:41 2016 GMT
|
||||||
|
Not After : Jan 12 12:45:41 2046 GMT
|
||||||
|
Subject: C=ES, ST=Barcelona, L=Barcelona, O=Universitat Politecnica de Catalunya, OU=Arquitectura de Computadors, CN=client/name=LCAC/emailAddress=lcac@ac.upc.edu
|
||||||
|
Subject Public Key Info:
|
||||||
|
Public Key Algorithm: rsaEncryption
|
||||||
|
Public-Key: (2048 bit)
|
||||||
|
Modulus:
|
||||||
|
00:97:99:fa:7a:0e:4d:e2:1d:a5:b1:a8:14:18:64:
|
||||||
|
c7:66:bf:de:99:1d:92:3b:86:82:4d:95:39:f7:a6:
|
||||||
|
56:49:97:14:4f:e3:37:00:6c:f4:d0:1d:56:79:e7:
|
||||||
|
19:b5:dd:36:15:8e:1d:57:7b:59:29:d2:11:bf:58:
|
||||||
|
48:e0:f7:41:3d:16:64:8d:a2:0b:4a:ac:fa:c6:83:
|
||||||
|
dc:10:2a:2c:d9:97:48:ee:11:2a:bc:4b:60:dd:b9:
|
||||||
|
2e:8f:45:ca:87:0b:38:65:1c:f8:a2:1d:f9:50:aa:
|
||||||
|
6e:60:f9:48:df:57:12:23:e1:e7:0c:81:5c:9f:c5:
|
||||||
|
b2:e6:99:99:95:30:6d:57:36:06:8c:fd:fb:f9:4f:
|
||||||
|
60:d2:3c:ba:ae:28:56:2f:da:58:5c:e8:c5:7b:ec:
|
||||||
|
76:d9:28:6e:fb:8c:07:f9:d7:23:c3:72:76:3c:fa:
|
||||||
|
dc:20:67:8f:cc:16:e0:91:07:d5:68:f9:20:4d:7d:
|
||||||
|
5c:2d:02:04:16:76:52:f3:53:be:a3:dc:0d:d5:fb:
|
||||||
|
6b:55:29:f3:52:35:c8:7d:99:d1:4a:94:be:b1:8e:
|
||||||
|
fd:85:18:25:eb:41:e9:56:da:af:62:84:20:0a:00:
|
||||||
|
17:94:92:94:91:6a:f8:54:37:17:ee:1e:bb:fb:93:
|
||||||
|
71:91:d9:e4:e9:b8:3b:18:7d:6d:7d:4c:ce:58:55:
|
||||||
|
f9:41
|
||||||
|
Exponent: 65537 (0x10001)
|
||||||
|
X509v3 extensions:
|
||||||
|
X509v3 Basic Constraints:
|
||||||
|
CA:FALSE
|
||||||
|
Netscape Comment:
|
||||||
|
Easy-RSA Generated Certificate
|
||||||
|
X509v3 Subject Key Identifier:
|
||||||
|
1B:88:06:D5:33:1D:5C:48:46:B5:DE:78:89:36:96:91:3A:74:43:18
|
||||||
|
X509v3 Authority Key Identifier:
|
||||||
|
keyid:DC:9E:CD:7D:5C:1F:53:78:41:1D:78:79:B0:69:96:3A:A6:B4:4E:EA
|
||||||
|
DirName:/C=ES/ST=Barcelona/L=Barcelona/O=Universitat Politecnica de Catalunya/OU=Arquitectura de Computadors/CN=LCAC CA/name=LCAC/emailAddress=lcac@ac.upc.edu
|
||||||
|
serial:91:F5:D7:C3:C0:A6:4E:61
|
||||||
|
|
||||||
|
X509v3 Extended Key Usage:
|
||||||
|
TLS Web Client Authentication
|
||||||
|
X509v3 Key Usage:
|
||||||
|
Digital Signature
|
||||||
|
X509v3 Subject Alternative Name:
|
||||||
|
DNS:client
|
||||||
|
Signature Algorithm: sha256WithRSAEncryption
|
||||||
|
42:e8:50:b2:e7:88:75:86:0b:bb:29:e3:aa:c6:0e:4c:e8:ea:
|
||||||
|
3d:0c:02:31:7f:3b:80:0c:3f:80:af:45:d6:62:27:a0:0e:e7:
|
||||||
|
26:09:12:97:95:f8:d9:9b:89:b5:ef:56:64:f1:de:82:74:e0:
|
||||||
|
31:0a:cc:90:0a:bd:50:b8:54:95:0a:ae:3b:40:df:76:b6:d1:
|
||||||
|
01:2e:f3:96:9f:52:d4:e9:14:6d:b7:14:9d:45:99:33:36:2a:
|
||||||
|
01:0b:15:1a:ed:55:dc:64:83:65:1a:06:42:d9:c7:dc:97:d4:
|
||||||
|
02:81:c2:58:2b:ea:e4:b7:ae:84:3a:e4:3f:f1:2e:fa:ec:f3:
|
||||||
|
40:5d:b8:6a:d5:5e:e1:e8:2f:e2:2f:48:a4:38:a1:4f:22:e3:
|
||||||
|
4f:66:94:aa:02:78:9a:2b:7a:5d:aa:aa:51:a5:e3:d0:91:e9:
|
||||||
|
1d:f9:08:ed:8b:51:c9:a6:af:46:85:b5:1c:ed:12:a1:28:33:
|
||||||
|
75:36:00:d8:5c:14:65:96:c0:28:7d:47:50:a4:89:5f:b0:72:
|
||||||
|
1a:4b:13:17:26:0f:f0:b8:65:3c:e9:96:36:f9:bf:90:59:33:
|
||||||
|
87:1f:01:03:25:f8:f0:3a:9b:33:02:d0:0a:43:b5:0a:cf:62:
|
||||||
|
a1:45:38:37:07:9d:9c:94:0b:31:c6:3c:34:b7:fc:5a:0c:e4:
|
||||||
|
bf:23:f6:7d
|
||||||
|
-----BEGIN CERTIFICATE-----
|
||||||
|
MIIFqjCCBJKgAwIBAgIBAjANBgkqhkiG9w0BAQsFADCByzELMAkGA1UEBhMCRVMx
|
||||||
|
EjAQBgNVBAgTCUJhcmNlbG9uYTESMBAGA1UEBxMJQmFyY2Vsb25hMS0wKwYDVQQK
|
||||||
|
EyRVbml2ZXJzaXRhdCBQb2xpdGVjbmljYSBkZSBDYXRhbHVueWExJDAiBgNVBAsT
|
||||||
|
G0FycXVpdGVjdHVyYSBkZSBDb21wdXRhZG9yczEQMA4GA1UEAxMHTENBQyBDQTEN
|
||||||
|
MAsGA1UEKRMETENBQzEeMBwGCSqGSIb3DQEJARYPbGNhY0BhYy51cGMuZWR1MB4X
|
||||||
|
DTE2MDExMjEyNDU0MVoXDTQ2MDExMjEyNDU0MVowgcoxCzAJBgNVBAYTAkVTMRIw
|
||||||
|
EAYDVQQIEwlCYXJjZWxvbmExEjAQBgNVBAcTCUJhcmNlbG9uYTEtMCsGA1UEChMk
|
||||||
|
VW5pdmVyc2l0YXQgUG9saXRlY25pY2EgZGUgQ2F0YWx1bnlhMSQwIgYDVQQLExtB
|
||||||
|
cnF1aXRlY3R1cmEgZGUgQ29tcHV0YWRvcnMxDzANBgNVBAMTBmNsaWVudDENMAsG
|
||||||
|
A1UEKRMETENBQzEeMBwGCSqGSIb3DQEJARYPbGNhY0BhYy51cGMuZWR1MIIBIjAN
|
||||||
|
BgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAl5n6eg5N4h2lsagUGGTHZr/emR2S
|
||||||
|
O4aCTZU596ZWSZcUT+M3AGz00B1WeecZtd02FY4dV3tZKdIRv1hI4PdBPRZkjaIL
|
||||||
|
Sqz6xoPcECos2ZdI7hEqvEtg3bkuj0XKhws4ZRz4oh35UKpuYPlI31cSI+HnDIFc
|
||||||
|
n8Wy5pmZlTBtVzYGjP37+U9g0jy6rihWL9pYXOjFe+x22Shu+4wH+dcjw3J2PPrc
|
||||||
|
IGePzBbgkQfVaPkgTX1cLQIEFnZS81O+o9wN1ftrVSnzUjXIfZnRSpS+sY79hRgl
|
||||||
|
60HpVtqvYoQgCgAXlJKUkWr4VDcX7h67+5Nxkdnk6bg7GH1tfUzOWFX5QQIDAQAB
|
||||||
|
o4IBljCCAZIwCQYDVR0TBAIwADAtBglghkgBhvhCAQ0EIBYeRWFzeS1SU0EgR2Vu
|
||||||
|
ZXJhdGVkIENlcnRpZmljYXRlMB0GA1UdDgQWBBQbiAbVMx1cSEa13niJNpaROnRD
|
||||||
|
GDCCAQAGA1UdIwSB+DCB9YAU3J7NfVwfU3hBHXh5sGmWOqa0TuqhgdGkgc4wgcsx
|
||||||
|
CzAJBgNVBAYTAkVTMRIwEAYDVQQIEwlCYXJjZWxvbmExEjAQBgNVBAcTCUJhcmNl
|
||||||
|
bG9uYTEtMCsGA1UEChMkVW5pdmVyc2l0YXQgUG9saXRlY25pY2EgZGUgQ2F0YWx1
|
||||||
|
bnlhMSQwIgYDVQQLExtBcnF1aXRlY3R1cmEgZGUgQ29tcHV0YWRvcnMxEDAOBgNV
|
||||||
|
BAMTB0xDQUMgQ0ExDTALBgNVBCkTBExDQUMxHjAcBgkqhkiG9w0BCQEWD2xjYWNA
|
||||||
|
YWMudXBjLmVkdYIJAJH118PApk5hMBMGA1UdJQQMMAoGCCsGAQUFBwMCMAsGA1Ud
|
||||||
|
DwQEAwIHgDARBgNVHREECjAIggZjbGllbnQwDQYJKoZIhvcNAQELBQADggEBAELo
|
||||||
|
ULLniHWGC7sp46rGDkzo6j0MAjF/O4AMP4CvRdZiJ6AO5yYJEpeV+NmbibXvVmTx
|
||||||
|
3oJ04DEKzJAKvVC4VJUKrjtA33a20QEu85afUtTpFG23FJ1FmTM2KgELFRrtVdxk
|
||||||
|
g2UaBkLZx9yX1AKBwlgr6uS3roQ65D/xLvrs80BduGrVXuHoL+IvSKQ4oU8i409m
|
||||||
|
lKoCeJorel2qqlGl49CR6R35CO2LUcmmr0aFtRztEqEoM3U2ANhcFGWWwCh9R1Ck
|
||||||
|
iV+wchpLExcmD/C4ZTzpljb5v5BZM4cfAQMl+PA6mzMC0ApDtQrPYqFFODcHnZyU
|
||||||
|
CzHGPDS3/FoM5L8j9n0=
|
||||||
|
-----END CERTIFICATE-----
|
||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
{
|
{
|
||||||
imports = [
|
imports = [
|
||||||
../common/xeon.nix
|
../common/ssf.nix
|
||||||
../module/ceph.nix
|
../module/ceph.nix
|
||||||
../module/emulation.nix
|
../module/emulation.nix
|
||||||
../module/slurm-client.nix
|
../module/slurm-client.nix
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
{
|
{
|
||||||
imports = [
|
imports = [
|
||||||
../common/xeon.nix
|
../common/ssf.nix
|
||||||
../module/ceph.nix
|
../module/ceph.nix
|
||||||
../module/emulation.nix
|
../module/emulation.nix
|
||||||
../module/slurm-client.nix
|
../module/slurm-client.nix
|
||||||
|
|||||||
@@ -3,6 +3,13 @@
|
|||||||
{
|
{
|
||||||
imports = [
|
imports = [
|
||||||
../common/base.nix
|
../common/base.nix
|
||||||
|
../common/ssf/hosts.nix
|
||||||
|
../module/emulation.nix
|
||||||
|
../module/debuginfod.nix
|
||||||
|
../module/nvidia.nix
|
||||||
|
../eudy/kernel/perf.nix
|
||||||
|
./wireguard.nix
|
||||||
|
../module/hut-substituter.nix
|
||||||
];
|
];
|
||||||
|
|
||||||
# Don't install Grub on the disk yet
|
# Don't install Grub on the disk yet
|
||||||
@@ -23,19 +30,41 @@
|
|||||||
address = "84.88.51.152";
|
address = "84.88.51.152";
|
||||||
prefixLength = 25;
|
prefixLength = 25;
|
||||||
} ];
|
} ];
|
||||||
|
interfaces.enp5s0f1.ipv4.addresses = [ {
|
||||||
|
address = "10.0.44.1";
|
||||||
|
prefixLength = 24;
|
||||||
|
} ];
|
||||||
|
nat = {
|
||||||
|
enable = true;
|
||||||
|
internalInterfaces = [ "enp5s0f1" ];
|
||||||
|
externalInterface = "eno0";
|
||||||
|
};
|
||||||
|
hosts = {
|
||||||
|
"10.0.44.4" = [ "tent" ];
|
||||||
|
"84.88.53.236" = [ "apex" ];
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
nix.settings = {
|
# Mount the NFS home
|
||||||
substituters = [ "https://jungle.bsc.es/cache" ];
|
fileSystems."/nfs/home" = {
|
||||||
trusted-public-keys = [ "jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=" ];
|
device = "10.106.0.30:/home";
|
||||||
|
fsType = "nfs";
|
||||||
|
options = [ "nfsvers=3" "rsize=1024" "wsize=1024" "cto" "nofail" ];
|
||||||
};
|
};
|
||||||
|
|
||||||
# Configure Nvidia driver to use with CUDA
|
# Enable performance governor
|
||||||
hardware.nvidia.package = config.boot.kernelPackages.nvidiaPackages.production;
|
powerManagement.cpuFreqGovernor = "performance";
|
||||||
hardware.graphics.enable = true;
|
|
||||||
nixpkgs.config.allowUnfree = true;
|
hardware.nvidia.open = false; # Maxwell is older than Turing architecture
|
||||||
nixpkgs.config.nvidia.acceptLicense = true;
|
|
||||||
services.xserver.videoDrivers = [ "nvidia" ];
|
services.openssh.settings.X11Forwarding = true;
|
||||||
|
|
||||||
|
services.prometheus.exporters.node = {
|
||||||
|
enable = true;
|
||||||
|
enabledCollectors = [ "systemd" ];
|
||||||
|
port = 9002;
|
||||||
|
listenAddress = "127.0.0.1";
|
||||||
|
};
|
||||||
|
|
||||||
users.motd = ''
|
users.motd = ''
|
||||||
⠀⠀⠀⠀⠀⠀⠀⣀⣀⣄⣠⣀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀
|
⠀⠀⠀⠀⠀⠀⠀⣀⣀⣄⣠⣀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀
|
||||||
|
|||||||
48
m/raccoon/wireguard.nix
Normal file
48
m/raccoon/wireguard.nix
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
{ config, pkgs, ... }:
|
||||||
|
|
||||||
|
{
|
||||||
|
networking.nat = {
|
||||||
|
enable = true;
|
||||||
|
enableIPv6 = false;
|
||||||
|
externalInterface = "eno0";
|
||||||
|
internalInterfaces = [ "wg0" ];
|
||||||
|
};
|
||||||
|
|
||||||
|
networking.firewall = {
|
||||||
|
allowedUDPPorts = [ 666 ];
|
||||||
|
};
|
||||||
|
|
||||||
|
age.secrets.wgRaccoon.file = ../../secrets/wg-raccoon.age;
|
||||||
|
|
||||||
|
# Enable WireGuard
|
||||||
|
networking.wireguard.enable = true;
|
||||||
|
networking.wireguard.interfaces = {
|
||||||
|
wg0 = {
|
||||||
|
ips = [ "10.106.0.236/24" ];
|
||||||
|
listenPort = 666;
|
||||||
|
privateKeyFile = config.age.secrets.wgRaccoon.path;
|
||||||
|
# Public key: QUfnGXSMEgu2bviglsaSdCjidB51oEDBFpnSFcKGfDI=
|
||||||
|
peers = [
|
||||||
|
{
|
||||||
|
name = "fox";
|
||||||
|
publicKey = "VfMPBQLQTKeyXJSwv8wBhc6OV0j2qAxUpX3kLHunK2Y=";
|
||||||
|
allowedIPs = [ "10.106.0.1/32" ];
|
||||||
|
endpoint = "fox.ac.upc.edu:666";
|
||||||
|
persistentKeepalive = 25;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
name = "apex";
|
||||||
|
publicKey = "VwhcN8vSOzdJEotQTpmPHBC52x3Hbv1lkFIyKubrnUA=";
|
||||||
|
allowedIPs = [ "10.106.0.30/32" "10.0.40.0/24" ];
|
||||||
|
endpoint = "ssfhead.bsc.es:666";
|
||||||
|
persistentKeepalive = 25;
|
||||||
|
}
|
||||||
|
];
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
networking.hosts = {
|
||||||
|
"10.106.0.1" = [ "fox.wg" ];
|
||||||
|
"10.106.0.30" = [ "apex.wg" ];
|
||||||
|
};
|
||||||
|
}
|
||||||
14
m/tent/blackbox.yml
Normal file
14
m/tent/blackbox.yml
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
modules:
|
||||||
|
http_2xx:
|
||||||
|
prober: http
|
||||||
|
timeout: 5s
|
||||||
|
http:
|
||||||
|
preferred_ip_protocol: "ip4"
|
||||||
|
follow_redirects: true
|
||||||
|
valid_status_codes: [] # Defaults to 2xx
|
||||||
|
method: GET
|
||||||
|
icmp:
|
||||||
|
prober: icmp
|
||||||
|
timeout: 5s
|
||||||
|
icmp:
|
||||||
|
preferred_ip_protocol: "ip4"
|
||||||
85
m/tent/configuration.nix
Normal file
85
m/tent/configuration.nix
Normal file
@@ -0,0 +1,85 @@
|
|||||||
|
{ config, pkgs, lib, ... }:
|
||||||
|
|
||||||
|
{
|
||||||
|
imports = [
|
||||||
|
../common/xeon.nix
|
||||||
|
../common/ssf/hosts.nix
|
||||||
|
../module/emulation.nix
|
||||||
|
../module/debuginfod.nix
|
||||||
|
./monitoring.nix
|
||||||
|
./nginx.nix
|
||||||
|
./nix-serve.nix
|
||||||
|
./gitlab-runner.nix
|
||||||
|
./gitea.nix
|
||||||
|
../hut/public-inbox.nix
|
||||||
|
../hut/msmtp.nix
|
||||||
|
../module/p.nix
|
||||||
|
../module/vpn-dac.nix
|
||||||
|
../module/hut-substituter.nix
|
||||||
|
];
|
||||||
|
|
||||||
|
# Select the this using the ID to avoid mismatches
|
||||||
|
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d537675";
|
||||||
|
|
||||||
|
networking = {
|
||||||
|
hostName = "tent";
|
||||||
|
interfaces.eno1.ipv4.addresses = [
|
||||||
|
{
|
||||||
|
address = "10.0.44.4";
|
||||||
|
prefixLength = 24;
|
||||||
|
}
|
||||||
|
];
|
||||||
|
|
||||||
|
# Only BSC DNSs seem to be reachable from the office VLAN
|
||||||
|
nameservers = [ "84.88.52.35" "84.88.52.36" ];
|
||||||
|
search = [ "bsc.es" "ac.upc.edu" ];
|
||||||
|
defaultGateway = "10.0.44.1";
|
||||||
|
hosts = {
|
||||||
|
"84.88.53.236" = [ "apex" ];
|
||||||
|
"10.0.44.1" = [ "raccoon" ];
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
services.p.enable = true;
|
||||||
|
|
||||||
|
services.prometheus.exporters.node = {
|
||||||
|
enable = true;
|
||||||
|
enabledCollectors = [ "systemd" ];
|
||||||
|
port = 9002;
|
||||||
|
listenAddress = "127.0.0.1";
|
||||||
|
};
|
||||||
|
|
||||||
|
boot.swraid = {
|
||||||
|
enable = true;
|
||||||
|
mdadmConf = ''
|
||||||
|
DEVICE partitions
|
||||||
|
ARRAY /dev/md0 metadata=1.2 UUID=496db1e2:056a92aa:a544543f:40db379d
|
||||||
|
MAILADDR root
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
|
||||||
|
fileSystems."/vault" = {
|
||||||
|
device = "/dev/disk/by-label/vault";
|
||||||
|
fsType = "ext4";
|
||||||
|
};
|
||||||
|
|
||||||
|
# Make a /vault/$USER directory for each user.
|
||||||
|
systemd.services.create-vault-dirs = let
|
||||||
|
# Take only normal users in tent
|
||||||
|
users = lib.filterAttrs (_: v: v.isNormalUser) config.users.users;
|
||||||
|
commands = lib.concatLists (lib.mapAttrsToList
|
||||||
|
(_: user: [
|
||||||
|
"install -d -o ${user.name} -g ${user.group} -m 0711 /vault/home/${user.name}"
|
||||||
|
]) users);
|
||||||
|
script = pkgs.writeShellScript "create-vault-dirs.sh" (lib.concatLines commands);
|
||||||
|
in {
|
||||||
|
enable = true;
|
||||||
|
wants = [ "local-fs.target" ];
|
||||||
|
after = [ "local-fs.target" ];
|
||||||
|
wantedBy = [ "multi-user.target" ];
|
||||||
|
serviceConfig.ExecStart = script;
|
||||||
|
};
|
||||||
|
|
||||||
|
# disable automatic garbage collector
|
||||||
|
nix.gc.automatic = lib.mkForce false;
|
||||||
|
}
|
||||||
30
m/tent/gitea.nix
Normal file
30
m/tent/gitea.nix
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
{ config, lib, ... }:
|
||||||
|
{
|
||||||
|
services.gitea = {
|
||||||
|
enable = true;
|
||||||
|
appName = "Gitea in the jungle";
|
||||||
|
|
||||||
|
settings = {
|
||||||
|
server = {
|
||||||
|
ROOT_URL = "https://jungle.bsc.es/git/";
|
||||||
|
LOCAL_ROOT_URL = "https://jungle.bsc.es/git/";
|
||||||
|
LANDING_PAGE = "explore";
|
||||||
|
};
|
||||||
|
metrics.ENABLED = true;
|
||||||
|
service = {
|
||||||
|
DISABLE_REGISTRATION = true;
|
||||||
|
REGISTER_MANUAL_CONFIRM = true;
|
||||||
|
ENABLE_NOTIFY_MAIL = true;
|
||||||
|
};
|
||||||
|
log.LEVEL = "Warn";
|
||||||
|
|
||||||
|
mailer = {
|
||||||
|
ENABLED = true;
|
||||||
|
FROM = "jungle-robot@bsc.es";
|
||||||
|
PROTOCOL = "sendmail";
|
||||||
|
SENDMAIL_PATH = "/run/wrappers/bin/sendmail";
|
||||||
|
SENDMAIL_ARGS = "--";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
}
|
||||||
93
m/tent/gitlab-runner.nix
Normal file
93
m/tent/gitlab-runner.nix
Normal file
@@ -0,0 +1,93 @@
|
|||||||
|
{ pkgs, lib, config, ... }:
|
||||||
|
|
||||||
|
{
|
||||||
|
age.secrets.tent-gitlab-runner-pm-shell.file = ../../secrets/tent-gitlab-runner-pm-shell-token.age;
|
||||||
|
age.secrets.tent-gitlab-runner-pm-docker.file = ../../secrets/tent-gitlab-runner-pm-docker-token.age;
|
||||||
|
age.secrets.tent-gitlab-runner-bsc-docker.file = ../../secrets/tent-gitlab-runner-bsc-docker-token.age;
|
||||||
|
|
||||||
|
services.gitlab-runner = let sec = config.age.secrets; in {
|
||||||
|
enable = true;
|
||||||
|
settings.concurrent = 5;
|
||||||
|
services = {
|
||||||
|
# For gitlab.pm.bsc.es
|
||||||
|
gitlab-pm-shell = {
|
||||||
|
executor = "shell";
|
||||||
|
environmentVariables = {
|
||||||
|
SHELL = "${pkgs.bash}/bin/bash";
|
||||||
|
};
|
||||||
|
authenticationTokenConfigFile = sec.tent-gitlab-runner-pm-shell.path;
|
||||||
|
preGetSourcesScript = pkgs.writeScript "setup" ''
|
||||||
|
echo "This is the preGetSources script running, brace for impact"
|
||||||
|
env
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
gitlab-pm-docker = {
|
||||||
|
authenticationTokenConfigFile = sec.tent-gitlab-runner-pm-docker.path;
|
||||||
|
executor = "docker";
|
||||||
|
dockerImage = "debian:stable";
|
||||||
|
};
|
||||||
|
|
||||||
|
# For gitlab.bsc.es
|
||||||
|
gitlab-bsc-docker = {
|
||||||
|
# gitlab.bsc.es still uses the old token mechanism
|
||||||
|
registrationConfigFile = sec.tent-gitlab-runner-bsc-docker.path;
|
||||||
|
tagList = [ "docker" "tent" "nix" ];
|
||||||
|
executor = "docker";
|
||||||
|
dockerImage = "alpine";
|
||||||
|
dockerVolumes = [
|
||||||
|
"/nix/store:/nix/store:ro"
|
||||||
|
"/nix/var/nix/db:/nix/var/nix/db:ro"
|
||||||
|
"/nix/var/nix/daemon-socket:/nix/var/nix/daemon-socket:ro"
|
||||||
|
];
|
||||||
|
dockerDisableCache = true;
|
||||||
|
registrationFlags = [
|
||||||
|
# Increase build log length to 64 MiB
|
||||||
|
"--output-limit 65536"
|
||||||
|
];
|
||||||
|
preBuildScript = pkgs.writeScript "setup-container" ''
|
||||||
|
mkdir -p -m 0755 /nix/var/log/nix/drvs
|
||||||
|
mkdir -p -m 0755 /nix/var/nix/gcroots
|
||||||
|
mkdir -p -m 0755 /nix/var/nix/profiles
|
||||||
|
mkdir -p -m 0755 /nix/var/nix/temproots
|
||||||
|
mkdir -p -m 0755 /nix/var/nix/userpool
|
||||||
|
mkdir -p -m 1777 /nix/var/nix/gcroots/per-user
|
||||||
|
mkdir -p -m 1777 /nix/var/nix/profiles/per-user
|
||||||
|
mkdir -p -m 0755 /nix/var/nix/profiles/per-user/root
|
||||||
|
mkdir -p -m 0700 "$HOME/.nix-defexpr"
|
||||||
|
mkdir -p -m 0700 "$HOME/.ssh"
|
||||||
|
cat >> "$HOME/.ssh/known_hosts" << EOF
|
||||||
|
bscpm04.bsc.es ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIPx4mC0etyyjYUT2Ztc/bs4ZXSbVMrogs1ZTP924PDgT
|
||||||
|
gitlab-internal.bsc.es ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF9arsAOSRB06hdy71oTvJHG2Mg8zfebADxpvc37lZo3
|
||||||
|
EOF
|
||||||
|
. ${pkgs.nix}/etc/profile.d/nix-daemon.sh
|
||||||
|
# Required to load SSL certificate paths
|
||||||
|
. ${pkgs.cacert}/nix-support/setup-hook
|
||||||
|
'';
|
||||||
|
environmentVariables = {
|
||||||
|
ENV = "/etc/profile";
|
||||||
|
USER = "root";
|
||||||
|
NIX_REMOTE = "daemon";
|
||||||
|
PATH = "${config.system.path}/bin:/bin:/sbin:/usr/bin:/usr/sbin";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
systemd.services.gitlab-runner.serviceConfig = {
|
||||||
|
DynamicUser = lib.mkForce false;
|
||||||
|
User = "gitlab-runner";
|
||||||
|
Group = "gitlab-runner";
|
||||||
|
ExecStart = lib.mkForce
|
||||||
|
''${pkgs.gitlab-runner}/bin/gitlab-runner run --config ''${HOME}/.gitlab-runner/config.toml --listen-address "127.0.0.1:9252" --working-directory ''${HOME}'';
|
||||||
|
};
|
||||||
|
|
||||||
|
users.users.gitlab-runner = {
|
||||||
|
uid = config.ids.uids.gitlab-runner;
|
||||||
|
home = "/var/lib/gitlab-runner";
|
||||||
|
description = "Gitlab Runner";
|
||||||
|
group = "gitlab-runner";
|
||||||
|
extraGroups = [ "docker" ];
|
||||||
|
createHome = true;
|
||||||
|
};
|
||||||
|
users.groups.gitlab-runner.gid = config.ids.gids.gitlab-runner;
|
||||||
|
}
|
||||||
217
m/tent/monitoring.nix
Normal file
217
m/tent/monitoring.nix
Normal file
@@ -0,0 +1,217 @@
|
|||||||
|
{ config, lib, pkgs, ... }:
|
||||||
|
|
||||||
|
{
|
||||||
|
imports = [
|
||||||
|
../module/meteocat-exporter.nix
|
||||||
|
../module/upc-qaire-exporter.nix
|
||||||
|
../module/nix-daemon-exporter.nix
|
||||||
|
];
|
||||||
|
|
||||||
|
age.secrets.grafanaJungleRobotPassword = {
|
||||||
|
file = ../../secrets/jungle-robot-password.age;
|
||||||
|
owner = "grafana";
|
||||||
|
mode = "400";
|
||||||
|
};
|
||||||
|
|
||||||
|
services.grafana = {
|
||||||
|
enable = true;
|
||||||
|
settings = {
|
||||||
|
server = {
|
||||||
|
domain = "jungle.bsc.es";
|
||||||
|
root_url = "%(protocol)s://%(domain)s/grafana";
|
||||||
|
serve_from_sub_path = true;
|
||||||
|
http_port = 2342;
|
||||||
|
http_addr = "127.0.0.1";
|
||||||
|
};
|
||||||
|
smtp = {
|
||||||
|
enabled = true;
|
||||||
|
from_address = "jungle-robot@bsc.es";
|
||||||
|
user = "jungle-robot";
|
||||||
|
# Read the password from a file, which is only readable by grafana user
|
||||||
|
# https://grafana.com/docs/grafana/latest/setup-grafana/configure-grafana/#file-provider
|
||||||
|
password = "$__file{${config.age.secrets.grafanaJungleRobotPassword.path}}";
|
||||||
|
host = "mail.bsc.es:465";
|
||||||
|
startTLS_policy = "NoStartTLS";
|
||||||
|
};
|
||||||
|
feature_toggles.publicDashboards = true;
|
||||||
|
"auth.anonymous".enabled = true;
|
||||||
|
log.level = "warn";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
services.prometheus = {
|
||||||
|
enable = true;
|
||||||
|
port = 9001;
|
||||||
|
retentionTime = "5y";
|
||||||
|
listenAddress = "127.0.0.1";
|
||||||
|
};
|
||||||
|
|
||||||
|
# We need access to the devices to monitor the disk space
|
||||||
|
systemd.services.prometheus-node-exporter.serviceConfig.PrivateDevices = lib.mkForce false;
|
||||||
|
systemd.services.prometheus-node-exporter.serviceConfig.ProtectHome = lib.mkForce "read-only";
|
||||||
|
|
||||||
|
# Credentials for IPMI exporter
|
||||||
|
age.secrets.ipmiYml = {
|
||||||
|
file = ../../secrets/ipmi.yml.age;
|
||||||
|
owner = "ipmi-exporter";
|
||||||
|
};
|
||||||
|
|
||||||
|
# Create an IPMI group and assign the ipmi0 device
|
||||||
|
users.groups.ipmi = {};
|
||||||
|
services.udev.extraRules = ''
|
||||||
|
SUBSYSTEM=="ipmi", KERNEL=="ipmi0", GROUP="ipmi", MODE="0660"
|
||||||
|
'';
|
||||||
|
|
||||||
|
# Add a new ipmi-exporter user that can read the ipmi0 device
|
||||||
|
users.users.ipmi-exporter = {
|
||||||
|
isSystemUser = true;
|
||||||
|
group = "ipmi";
|
||||||
|
};
|
||||||
|
|
||||||
|
# Disable dynamic user so we have the ipmi-exporter user available for the credentials
|
||||||
|
systemd.services.prometheus-ipmi-exporter.serviceConfig = {
|
||||||
|
DynamicUser = lib.mkForce false;
|
||||||
|
PrivateDevices = lib.mkForce false;
|
||||||
|
User = lib.mkForce "ipmi-exporter";
|
||||||
|
Group = lib.mkForce "ipmi";
|
||||||
|
RestrictNamespaces = lib.mkForce false;
|
||||||
|
# Fake uid to 0 so it shuts up
|
||||||
|
ExecStart = let
|
||||||
|
cfg = config.services.prometheus.exporters.ipmi;
|
||||||
|
in lib.mkForce (lib.concatStringsSep " " ([
|
||||||
|
"${pkgs.util-linux}/bin/unshare --map-user 0"
|
||||||
|
"${pkgs.prometheus-ipmi-exporter}/bin/ipmi_exporter"
|
||||||
|
"--web.listen-address ${cfg.listenAddress}:${toString cfg.port}"
|
||||||
|
"--config.file ${lib.escapeShellArg cfg.configFile}"
|
||||||
|
] ++ cfg.extraFlags));
|
||||||
|
};
|
||||||
|
|
||||||
|
services.prometheus = {
|
||||||
|
exporters = {
|
||||||
|
ipmi = {
|
||||||
|
enable = true;
|
||||||
|
configFile = config.age.secrets.ipmiYml.path;
|
||||||
|
#extraFlags = [ "--log.level=debug" ];
|
||||||
|
listenAddress = "127.0.0.1";
|
||||||
|
};
|
||||||
|
node = {
|
||||||
|
enable = true;
|
||||||
|
enabledCollectors = [ "logind" ];
|
||||||
|
port = 9002;
|
||||||
|
listenAddress = "127.0.0.1";
|
||||||
|
};
|
||||||
|
blackbox = {
|
||||||
|
enable = true;
|
||||||
|
listenAddress = "127.0.0.1";
|
||||||
|
configFile = ./blackbox.yml;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
scrapeConfigs = [
|
||||||
|
{
|
||||||
|
job_name = "local";
|
||||||
|
static_configs = [{
|
||||||
|
targets = [
|
||||||
|
"127.0.0.1:9002" # Node exporter
|
||||||
|
#"127.0.0.1:9115" # Blackbox exporter
|
||||||
|
"127.0.0.1:9290" # IPMI exporter for local node
|
||||||
|
"127.0.0.1:9928" # UPC Qaire custom exporter
|
||||||
|
"127.0.0.1:9929" # Meteocat custom exporter
|
||||||
|
"127.0.0.1:9999" # Nix-daemon custom exporter
|
||||||
|
];
|
||||||
|
}];
|
||||||
|
}
|
||||||
|
{
|
||||||
|
job_name = "blackbox-http";
|
||||||
|
metrics_path = "/probe";
|
||||||
|
params = { module = [ "http_2xx" ]; };
|
||||||
|
static_configs = [{
|
||||||
|
targets = [
|
||||||
|
"https://www.google.com/robots.txt"
|
||||||
|
"https://pm.bsc.es/"
|
||||||
|
"https://pm.bsc.es/gitlab/"
|
||||||
|
"https://jungle.bsc.es/"
|
||||||
|
"https://gitlab.bsc.es/"
|
||||||
|
];
|
||||||
|
}];
|
||||||
|
relabel_configs = [
|
||||||
|
{
|
||||||
|
# Takes the address and sets it in the "target=<xyz>" URL parameter
|
||||||
|
source_labels = [ "__address__" ];
|
||||||
|
target_label = "__param_target";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
# Sets the "instance" label with the remote host we are querying
|
||||||
|
source_labels = [ "__param_target" ];
|
||||||
|
target_label = "instance";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
# Shows the host target address instead of the blackbox address
|
||||||
|
target_label = "__address__";
|
||||||
|
replacement = "127.0.0.1:9115";
|
||||||
|
}
|
||||||
|
];
|
||||||
|
}
|
||||||
|
{
|
||||||
|
job_name = "blackbox-icmp";
|
||||||
|
metrics_path = "/probe";
|
||||||
|
params = { module = [ "icmp" ]; };
|
||||||
|
static_configs = [{
|
||||||
|
targets = [
|
||||||
|
"1.1.1.1"
|
||||||
|
"8.8.8.8"
|
||||||
|
"ssfhead"
|
||||||
|
"raccoon"
|
||||||
|
"anella-bsc.cesca.cat"
|
||||||
|
"upc-anella.cesca.cat"
|
||||||
|
"fox.ac.upc.edu"
|
||||||
|
"fox-ipmi.ac.upc.edu"
|
||||||
|
"arenys5.ac.upc.edu"
|
||||||
|
"arenys0-2.ac.upc.edu"
|
||||||
|
"epi01.bsc.es"
|
||||||
|
"axle.bsc.es"
|
||||||
|
];
|
||||||
|
}];
|
||||||
|
relabel_configs = [
|
||||||
|
{
|
||||||
|
# Takes the address and sets it in the "target=<xyz>" URL parameter
|
||||||
|
source_labels = [ "__address__" ];
|
||||||
|
target_label = "__param_target";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
# Sets the "instance" label with the remote host we are querying
|
||||||
|
source_labels = [ "__param_target" ];
|
||||||
|
target_label = "instance";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
# Shows the host target address instead of the blackbox address
|
||||||
|
target_label = "__address__";
|
||||||
|
replacement = "127.0.0.1:9115";
|
||||||
|
}
|
||||||
|
];
|
||||||
|
}
|
||||||
|
{
|
||||||
|
job_name = "ipmi-raccoon";
|
||||||
|
metrics_path = "/ipmi";
|
||||||
|
static_configs = [
|
||||||
|
{ targets = [ "127.0.0.1:9290" ]; }
|
||||||
|
];
|
||||||
|
params = {
|
||||||
|
target = [ "raccoon-ipmi" ];
|
||||||
|
module = [ "raccoon" ];
|
||||||
|
};
|
||||||
|
}
|
||||||
|
{
|
||||||
|
job_name = "ipmi-fox";
|
||||||
|
metrics_path = "/ipmi";
|
||||||
|
static_configs = [
|
||||||
|
{ targets = [ "127.0.0.1:9290" ]; }
|
||||||
|
];
|
||||||
|
params = {
|
||||||
|
target = [ "fox-ipmi.ac.upc.edu" ];
|
||||||
|
module = [ "fox" ];
|
||||||
|
};
|
||||||
|
}
|
||||||
|
];
|
||||||
|
};
|
||||||
|
}
|
||||||
79
m/tent/nginx.nix
Normal file
79
m/tent/nginx.nix
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
{ theFlake, pkgs, ... }:
|
||||||
|
let
|
||||||
|
website = pkgs.stdenv.mkDerivation {
|
||||||
|
name = "jungle-web";
|
||||||
|
src = pkgs.fetchgit {
|
||||||
|
url = "https://jungle.bsc.es/git/rarias/jungle-website.git";
|
||||||
|
rev = "739bf0175a7f05380fe7ad7023ff1d60db1710e1";
|
||||||
|
hash = "sha256-ea5DzhYTzZ9TmqD+x95rdNdLbxPnBluqlYH2NmBYmc4=";
|
||||||
|
};
|
||||||
|
buildInputs = [ pkgs.hugo ];
|
||||||
|
buildPhase = ''
|
||||||
|
rm -rf public/
|
||||||
|
hugo
|
||||||
|
'';
|
||||||
|
installPhase = ''
|
||||||
|
cp -r public $out
|
||||||
|
'';
|
||||||
|
# Don't mess doc/
|
||||||
|
dontFixup = true;
|
||||||
|
};
|
||||||
|
in
|
||||||
|
{
|
||||||
|
networking.firewall.allowedTCPPorts = [ 80 ];
|
||||||
|
services.nginx = {
|
||||||
|
enable = true;
|
||||||
|
virtualHosts."jungle.bsc.es" = {
|
||||||
|
root = "${website}";
|
||||||
|
listen = [
|
||||||
|
{
|
||||||
|
addr = "0.0.0.0";
|
||||||
|
port = 80;
|
||||||
|
}
|
||||||
|
];
|
||||||
|
extraConfig = ''
|
||||||
|
set_real_ip_from 127.0.0.1;
|
||||||
|
set_real_ip_from 84.88.52.107;
|
||||||
|
real_ip_recursive on;
|
||||||
|
real_ip_header X-Forwarded-For;
|
||||||
|
|
||||||
|
location /git {
|
||||||
|
rewrite ^/git$ / break;
|
||||||
|
rewrite ^/git/(.*) /$1 break;
|
||||||
|
proxy_pass http://127.0.0.1:3000;
|
||||||
|
proxy_redirect http:// $scheme://;
|
||||||
|
}
|
||||||
|
location /cache {
|
||||||
|
rewrite ^/cache/(.*) /$1 break;
|
||||||
|
proxy_pass http://127.0.0.1:5000;
|
||||||
|
proxy_redirect http:// $scheme://;
|
||||||
|
}
|
||||||
|
location /lists {
|
||||||
|
proxy_pass http://127.0.0.1:8081;
|
||||||
|
proxy_redirect http:// $scheme://;
|
||||||
|
}
|
||||||
|
location /grafana {
|
||||||
|
proxy_pass http://127.0.0.1:2342;
|
||||||
|
proxy_redirect http:// $scheme://;
|
||||||
|
proxy_set_header Host $host;
|
||||||
|
# Websockets
|
||||||
|
proxy_http_version 1.1;
|
||||||
|
proxy_set_header Upgrade $http_upgrade;
|
||||||
|
proxy_set_header Connection "upgrade";
|
||||||
|
}
|
||||||
|
location ~ ^/~(.+?)(/.*)?$ {
|
||||||
|
alias /vault/home/$1/public_html$2;
|
||||||
|
index index.html index.htm;
|
||||||
|
autoindex on;
|
||||||
|
absolute_redirect off;
|
||||||
|
}
|
||||||
|
location /p/ {
|
||||||
|
alias /var/lib/p/;
|
||||||
|
}
|
||||||
|
location /pub/ {
|
||||||
|
alias /vault/pub/;
|
||||||
|
}
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
};
|
||||||
|
}
|
||||||
16
m/tent/nix-serve.nix
Normal file
16
m/tent/nix-serve.nix
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
{ config, ... }:
|
||||||
|
|
||||||
|
{
|
||||||
|
age.secrets.nixServe.file = ../../secrets/nix-serve.age;
|
||||||
|
|
||||||
|
services.nix-serve = {
|
||||||
|
enable = true;
|
||||||
|
# Only listen locally, as we serve it via ssh
|
||||||
|
bindAddress = "127.0.0.1";
|
||||||
|
port = 5000;
|
||||||
|
|
||||||
|
secretKeyFile = config.age.secrets.nixServe.path;
|
||||||
|
# Public key:
|
||||||
|
# jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0=
|
||||||
|
};
|
||||||
|
}
|
||||||
33
m/weasel/configuration.nix
Normal file
33
m/weasel/configuration.nix
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
{ lib, ... }:
|
||||||
|
|
||||||
|
{
|
||||||
|
imports = [
|
||||||
|
../common/ssf.nix
|
||||||
|
../module/hut-substituter.nix
|
||||||
|
];
|
||||||
|
|
||||||
|
# Select this using the ID to avoid mismatches
|
||||||
|
boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d5356ca";
|
||||||
|
|
||||||
|
# No swap, there is plenty of RAM
|
||||||
|
swapDevices = lib.mkForce [];
|
||||||
|
|
||||||
|
# Users with sudo access
|
||||||
|
users.groups.wheel.members = [ "abonerib" "anavarro" ];
|
||||||
|
|
||||||
|
# Run julia installed with juliaup using julia's own libraries:
|
||||||
|
# NIX_LD_LIBRARY_PATH=~/.julia/juliaup/${VERS}/lib/julia ~/.juliaup/bin/julia
|
||||||
|
programs.nix-ld.enable = true;
|
||||||
|
|
||||||
|
networking = {
|
||||||
|
hostName = "weasel";
|
||||||
|
interfaces.eno1.ipv4.addresses = [ {
|
||||||
|
address = "10.0.40.6";
|
||||||
|
prefixLength = 24;
|
||||||
|
} ];
|
||||||
|
interfaces.ibp5s0.ipv4.addresses = [ {
|
||||||
|
address = "10.0.42.6";
|
||||||
|
prefixLength = 24;
|
||||||
|
} ];
|
||||||
|
};
|
||||||
|
}
|
||||||
89
pkgs/amd-uprof/default.nix
Normal file
89
pkgs/amd-uprof/default.nix
Normal file
@@ -0,0 +1,89 @@
|
|||||||
|
{ stdenv
|
||||||
|
, lib
|
||||||
|
, curl
|
||||||
|
, cacert
|
||||||
|
, runCommandLocal
|
||||||
|
, autoPatchelfHook
|
||||||
|
, elfutils
|
||||||
|
, glib
|
||||||
|
, libGL
|
||||||
|
, ncurses5
|
||||||
|
, xorg
|
||||||
|
, zlib
|
||||||
|
, libxkbcommon
|
||||||
|
, freetype
|
||||||
|
, fontconfig
|
||||||
|
, libGLU
|
||||||
|
, dbus
|
||||||
|
, rocmPackages
|
||||||
|
, libxcrypt-legacy
|
||||||
|
, numactl
|
||||||
|
, radare2
|
||||||
|
}:
|
||||||
|
|
||||||
|
let
|
||||||
|
version = "5.1.701";
|
||||||
|
tarball = "AMDuProf_Linux_x64_${version}.tar.bz2";
|
||||||
|
|
||||||
|
# NOTE: Remember to update the radare2 patch below if AMDuProfPcm changes.
|
||||||
|
uprofSrc = runCommandLocal tarball {
|
||||||
|
nativeBuildInputs = [ curl ];
|
||||||
|
outputHash = "sha256-j9gxcBcIg6Zhc5FglUXf/VV9bKSo+PAKeootbN7ggYk=";
|
||||||
|
SSL_CERT_FILE="${cacert}/etc/ssl/certs/ca-bundle.crt";
|
||||||
|
} ''
|
||||||
|
curl \
|
||||||
|
-o $out \
|
||||||
|
'https://download.amd.com/developer/eula/uprof/uprof-5-1/${tarball}' \
|
||||||
|
-H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:139.0) Gecko/20100101 Firefox/139.0' \
|
||||||
|
-H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' \
|
||||||
|
-H 'Accept-Language: en-US,en;q=0.5' \
|
||||||
|
-H 'Accept-Encoding: gzip, deflate, br, zstd' \
|
||||||
|
-H 'Referer: https://www.amd.com/' 2>&1 | tr '\r' '\n'
|
||||||
|
'';
|
||||||
|
|
||||||
|
in
|
||||||
|
stdenv.mkDerivation {
|
||||||
|
pname = "AMD-uProf";
|
||||||
|
inherit version;
|
||||||
|
src = uprofSrc;
|
||||||
|
dontStrip = true;
|
||||||
|
phases = [ "installPhase" "fixupPhase" ];
|
||||||
|
nativeBuildInputs = [ autoPatchelfHook radare2 ];
|
||||||
|
buildInputs = [
|
||||||
|
stdenv.cc.cc.lib
|
||||||
|
ncurses5
|
||||||
|
elfutils
|
||||||
|
glib
|
||||||
|
libGL
|
||||||
|
libGLU
|
||||||
|
libxcrypt-legacy
|
||||||
|
xorg.libX11
|
||||||
|
xorg.libXext
|
||||||
|
xorg.libXi
|
||||||
|
xorg.libXmu
|
||||||
|
xorg.libxcb
|
||||||
|
xorg.xcbutilwm
|
||||||
|
xorg.xcbutilrenderutil
|
||||||
|
xorg.xcbutilkeysyms
|
||||||
|
xorg.xcbutilimage
|
||||||
|
fontconfig.lib
|
||||||
|
libxkbcommon
|
||||||
|
zlib
|
||||||
|
freetype
|
||||||
|
dbus
|
||||||
|
rocmPackages.rocprofiler
|
||||||
|
numactl
|
||||||
|
];
|
||||||
|
installPhase = ''
|
||||||
|
set -x
|
||||||
|
mkdir -p $out
|
||||||
|
tar -x -v -C $out --strip-components=1 -f $src
|
||||||
|
rm $out/bin/AMDPowerProfilerDriverSource.tar.gz
|
||||||
|
patchelf --replace-needed libroctracer64.so.1 libroctracer64.so $out/bin/ProfileAgents/x64/libAMDGpuAgent.so
|
||||||
|
patchelf --add-needed libcrypt.so.1 --add-needed libstdc++.so.6 $out/bin/AMDuProfSys
|
||||||
|
echo "16334a51fcc48668307ad94e20482ca4 $out/bin/AMDuProfPcm" | md5sum -c -
|
||||||
|
radare2 -w -q -i ${./libnuma.r2} $out/bin/AMDuProfPcm
|
||||||
|
patchelf --add-needed libnuma.so $out/bin/AMDuProfPcm
|
||||||
|
set +x
|
||||||
|
'';
|
||||||
|
}
|
||||||
33
pkgs/amd-uprof/driver.nix
Normal file
33
pkgs/amd-uprof/driver.nix
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
{ stdenv
|
||||||
|
, lib
|
||||||
|
, amd-uprof
|
||||||
|
, kernel
|
||||||
|
, runCommandLocal
|
||||||
|
}:
|
||||||
|
|
||||||
|
let
|
||||||
|
version = amd-uprof.version;
|
||||||
|
tarball = amd-uprof.src;
|
||||||
|
in stdenv.mkDerivation {
|
||||||
|
pname = "AMDPowerProfilerDriver";
|
||||||
|
inherit version;
|
||||||
|
src = runCommandLocal "AMDPowerProfilerDriverSource.tar.gz" { } ''
|
||||||
|
set -x
|
||||||
|
tar -x -f ${tarball} AMDuProf_Linux_x64_${version}/bin/AMDPowerProfilerDriverSource.tar.gz
|
||||||
|
mv AMDuProf_Linux_x64_${version}/bin/AMDPowerProfilerDriverSource.tar.gz $out
|
||||||
|
set +x
|
||||||
|
'';
|
||||||
|
hardeningDisable = [ "pic" "format" ];
|
||||||
|
nativeBuildInputs = kernel.moduleBuildDependencies;
|
||||||
|
patches = [ ./makefile.patch ./hrtimer.patch ];
|
||||||
|
makeFlags = [
|
||||||
|
"KERNEL_VERSION=${kernel.modDirVersion}"
|
||||||
|
"KERNEL_DIR=${kernel.dev}/lib/modules/${kernel.modDirVersion}/build"
|
||||||
|
"INSTALL_MOD_PATH=$(out)"
|
||||||
|
];
|
||||||
|
meta = {
|
||||||
|
description = "AMD Power Profiler Driver";
|
||||||
|
homepage = "https://www.amd.com/es/developer/uprof.html";
|
||||||
|
platforms = lib.platforms.linux;
|
||||||
|
};
|
||||||
|
}
|
||||||
31
pkgs/amd-uprof/hrtimer.patch
Normal file
31
pkgs/amd-uprof/hrtimer.patch
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
--- a/src/PmcTimerConfig.c 2025-09-04 12:17:16.771707049 +0200
|
||||||
|
+++ b/src/PmcTimerConfig.c 2025-09-04 12:17:04.878515468 +0200
|
||||||
|
@@ -99,7 +99,7 @@ static void PmcInitTimer(void* pInfo)
|
||||||
|
|
||||||
|
DRVPRINT("pTimerConfig(%p)", pTimerConfig);
|
||||||
|
|
||||||
|
- hrtimer_init(&pTimerConfig->m_hrTimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
|
||||||
|
+ hrtimer_setup(&pTimerConfig->m_hrTimer, PmcTimerCallback, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
|
||||||
|
}
|
||||||
|
|
||||||
|
int PmcSetupTimer(ClientContext* pClientCtx)
|
||||||
|
@@ -157,7 +157,6 @@ int PmcSetupTimer(ClientContext* pClient
|
||||||
|
{
|
||||||
|
/* Interval in ms */
|
||||||
|
pTimerConfig->m_time = ktime_set(interval / 1000, interval * 1000000);
|
||||||
|
- pTimerConfig->m_hrTimer.function = PmcTimerCallback;
|
||||||
|
|
||||||
|
DRVPRINT("retVal(%d) m_time(%lld)", retVal, (long long int) pTimerConfig->m_time);
|
||||||
|
}
|
||||||
|
--- a/src/PwrProfTimer.c 2025-09-04 12:18:08.750544327 +0200
|
||||||
|
+++ b/src/PwrProfTimer.c 2025-09-04 12:18:28.557863382 +0200
|
||||||
|
@@ -573,8 +573,7 @@ void InitHrTimer(uint32 cpu)
|
||||||
|
pCoreClientData = &per_cpu(g_coreClientData, cpu);
|
||||||
|
|
||||||
|
// initialize HR timer
|
||||||
|
- hrtimer_init(&pCoreClientData->m_hrTimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
|
||||||
|
- pCoreClientData->m_hrTimer.function = &HrTimerCallback;
|
||||||
|
+ hrtimer_setup(&pCoreClientData->m_hrTimer, &HrTimerCallback, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
|
||||||
|
|
||||||
|
return;
|
||||||
|
} // InitHrTimer
|
||||||
10
pkgs/amd-uprof/libnuma.r2
Normal file
10
pkgs/amd-uprof/libnuma.r2
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
# Patch arguments to call sym std::string::find(char const*, unsigned long, unsigned long)
|
||||||
|
# so it matches NixOS:
|
||||||
|
#
|
||||||
|
# Change OS name to NixOS
|
||||||
|
wz NixOS @ 0x00550a43
|
||||||
|
# And set the length to 5 characters
|
||||||
|
wa mov ecx, 5 @0x00517930
|
||||||
|
#
|
||||||
|
# Then change the argument to dlopen() so it only uses libnuma.so
|
||||||
|
wz libnuma.so @ 0x00562940
|
||||||
66
pkgs/amd-uprof/makefile.patch
Normal file
66
pkgs/amd-uprof/makefile.patch
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
--- a/Makefile 2025-06-19 20:36:49.346693267 +0200
|
||||||
|
+++ b/Makefile 2025-06-19 20:42:29.778088660 +0200
|
||||||
|
@@ -27,7 +27,7 @@ MODULE_VERSION=$(shell cat AMDPowerProfi
|
||||||
|
MODULE_NAME_KO=$(MODULE_NAME).ko
|
||||||
|
|
||||||
|
# check is module inserted
|
||||||
|
-MODPROBE_OUTPUT=$(shell lsmod | grep $(MODULE_NAME))
|
||||||
|
+#MODPROBE_OUTPUT=$(shell lsmod | grep $(MODULE_NAME))
|
||||||
|
|
||||||
|
# check pcore dkms status
|
||||||
|
PCORE_DKMS_STATUS=$(shell dkms status | grep $(MODULE_NAME) | grep $(MODULE_VERSION))
|
||||||
|
@@ -50,7 +50,7 @@ endif
|
||||||
|
# “-Wno-missing-attributes” is added for GCC version >= 9.0 and kernel version <= 5.00
|
||||||
|
G_VERSION=9
|
||||||
|
K_VERSION=5
|
||||||
|
-KERNEL_MAJOR_VERSION=$(shell uname -r | cut -f1 -d.)
|
||||||
|
+KERNEL_MAJOR_VERSION=$(shell echo "$(KERNEL_VERSION)" | cut -f1 -d.)
|
||||||
|
GCCVERSION = $(shell gcc -dumpversion | cut -f1 -d.)
|
||||||
|
ifeq ($(G_VERSION),$(firstword $(sort $(GCCVERSION) $(G_VERSION))))
|
||||||
|
ifeq ($(K_VERSION),$(lastword $(sort $(KERNEL_MAJOR_VERSION) $(K_VERSION))))
|
||||||
|
@@ -66,17 +66,7 @@ ${MODULE_NAME}-objs := src/PmcDataBuffe
|
||||||
|
|
||||||
|
# make
|
||||||
|
all:
|
||||||
|
- @chmod a+x ./AMDPPcert.sh
|
||||||
|
- @./AMDPPcert.sh 0 1; echo $$? > $(PWD)/sign_status;
|
||||||
|
- @SIGSTATUS1=`cat $(PWD)/sign_status | tr -d '\n'`; \
|
||||||
|
- if [ $$SIGSTATUS1 -eq 1 ]; then \
|
||||||
|
- exit 1; \
|
||||||
|
- fi
|
||||||
|
- @make -C /lib/modules/$(KERNEL_VERSION)/build M=$(PWD) $(MAKE_OPTS) EXTRA_CFLAGS="$(EXTRA_CFLAGS)" modules
|
||||||
|
- @SIGSTATUS3=`cat $(PWD)/sign_status | tr -d '\n'`; \
|
||||||
|
- if [ $$SIGSTATUS3 -eq 0 ]; then \
|
||||||
|
- ./AMDPPcert.sh 1 $(MODULE_NAME_KO); \
|
||||||
|
- fi
|
||||||
|
+ make -C $(KERNEL_DIR) M=$(PWD) $(MAKE_OPTS) CFLAGS_MODULE="$(EXTRA_CFLAGS)" modules
|
||||||
|
|
||||||
|
# make clean
|
||||||
|
clean:
|
||||||
|
@@ -84,23 +74,9 @@ clean:
|
||||||
|
|
||||||
|
# make install
|
||||||
|
install:
|
||||||
|
- @mkdir -p /lib/modules/`uname -r`/kernel/drivers/extra
|
||||||
|
- @rm -f /lib/modules/`uname -r`/kernel/drivers/extra/$(MODULE_NAME_KO)
|
||||||
|
- @cp $(MODULE_NAME_KO) /lib/modules/`uname -r`/kernel/drivers/extra/
|
||||||
|
- @depmod -a
|
||||||
|
- @if [ ! -z "$(MODPROBE_OUTPUT)" ]; then \
|
||||||
|
- echo "Uninstalling AMDPowerProfiler Linux kernel module.";\
|
||||||
|
- rmmod $(MODULE_NAME);\
|
||||||
|
- fi
|
||||||
|
- @modprobe $(MODULE_NAME) 2> $(PWD)/sign_status1; \
|
||||||
|
- cat $(PWD)/sign_status1 | grep "Key was rejected by service"; \
|
||||||
|
- echo $$? > $(PWD)/sign_status; SIGSTATUS1=`cat $(PWD)/sign_status | tr -d '\n'`; \
|
||||||
|
- if [ $$SIGSTATUS1 -eq 0 ]; then \
|
||||||
|
- echo "ERROR: Secure Boot enabled, correct key is not yet enrolled in BIOS key table"; \
|
||||||
|
- exit 1; \
|
||||||
|
- else \
|
||||||
|
- cat $(PWD)/sign_status1; \
|
||||||
|
- fi
|
||||||
|
+ mkdir -p $(INSTALL_MOD_PATH)/lib/modules/$(KERNEL_VERSION)/kernel/drivers/extra/
|
||||||
|
+ cp -a $(MODULE_NAME_KO) $(INSTALL_MOD_PATH)/lib/modules/$(KERNEL_VERSION)/kernel/drivers/extra/
|
||||||
|
+
|
||||||
|
# make dkms
|
||||||
|
dkms:
|
||||||
|
@chmod a+x ./AMDPPcert.sh
|
||||||
12
pkgs/cudainfo/Makefile
Normal file
12
pkgs/cudainfo/Makefile
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
HOSTCXX ?= g++
|
||||||
|
NVCC := nvcc -ccbin $(HOSTCXX)
|
||||||
|
CXXFLAGS := -m64
|
||||||
|
|
||||||
|
# Target rules
|
||||||
|
all: cudainfo
|
||||||
|
|
||||||
|
cudainfo: cudainfo.cpp
|
||||||
|
$(NVCC) $(CXXFLAGS) -o $@ $<
|
||||||
|
|
||||||
|
clean:
|
||||||
|
rm -f cudainfo cudainfo.o
|
||||||
600
pkgs/cudainfo/cudainfo.cpp
Normal file
600
pkgs/cudainfo/cudainfo.cpp
Normal file
@@ -0,0 +1,600 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 1993-2015 NVIDIA Corporation. All rights reserved.
|
||||||
|
*
|
||||||
|
* Please refer to the NVIDIA end user license agreement (EULA) associated
|
||||||
|
* with this source code for terms and conditions that govern your use of
|
||||||
|
* this software. Any use, reproduction, disclosure, or distribution of
|
||||||
|
* this software and related documentation outside the terms of the EULA
|
||||||
|
* is strictly prohibited.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
/* This sample queries the properties of the CUDA devices present in the system via CUDA Runtime API. */
|
||||||
|
|
||||||
|
// Shared Utilities (QA Testing)
|
||||||
|
|
||||||
|
// std::system includes
|
||||||
|
#include <memory>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
#include <cuda_runtime.h>
|
||||||
|
|
||||||
|
// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
|
||||||
|
#define checkCudaErrors(val) check ( (val), #val, __FILE__, __LINE__ )
|
||||||
|
|
||||||
|
// CUDA Runtime error messages
|
||||||
|
#ifdef __DRIVER_TYPES_H__
|
||||||
|
static const char *_cudaGetErrorEnum(cudaError_t error)
|
||||||
|
{
|
||||||
|
switch (error)
|
||||||
|
{
|
||||||
|
case cudaSuccess:
|
||||||
|
return "cudaSuccess";
|
||||||
|
|
||||||
|
case cudaErrorMissingConfiguration:
|
||||||
|
return "cudaErrorMissingConfiguration";
|
||||||
|
|
||||||
|
case cudaErrorMemoryAllocation:
|
||||||
|
return "cudaErrorMemoryAllocation";
|
||||||
|
|
||||||
|
case cudaErrorInitializationError:
|
||||||
|
return "cudaErrorInitializationError";
|
||||||
|
|
||||||
|
case cudaErrorLaunchFailure:
|
||||||
|
return "cudaErrorLaunchFailure";
|
||||||
|
|
||||||
|
case cudaErrorPriorLaunchFailure:
|
||||||
|
return "cudaErrorPriorLaunchFailure";
|
||||||
|
|
||||||
|
case cudaErrorLaunchTimeout:
|
||||||
|
return "cudaErrorLaunchTimeout";
|
||||||
|
|
||||||
|
case cudaErrorLaunchOutOfResources:
|
||||||
|
return "cudaErrorLaunchOutOfResources";
|
||||||
|
|
||||||
|
case cudaErrorInvalidDeviceFunction:
|
||||||
|
return "cudaErrorInvalidDeviceFunction";
|
||||||
|
|
||||||
|
case cudaErrorInvalidConfiguration:
|
||||||
|
return "cudaErrorInvalidConfiguration";
|
||||||
|
|
||||||
|
case cudaErrorInvalidDevice:
|
||||||
|
return "cudaErrorInvalidDevice";
|
||||||
|
|
||||||
|
case cudaErrorInvalidValue:
|
||||||
|
return "cudaErrorInvalidValue";
|
||||||
|
|
||||||
|
case cudaErrorInvalidPitchValue:
|
||||||
|
return "cudaErrorInvalidPitchValue";
|
||||||
|
|
||||||
|
case cudaErrorInvalidSymbol:
|
||||||
|
return "cudaErrorInvalidSymbol";
|
||||||
|
|
||||||
|
case cudaErrorMapBufferObjectFailed:
|
||||||
|
return "cudaErrorMapBufferObjectFailed";
|
||||||
|
|
||||||
|
case cudaErrorUnmapBufferObjectFailed:
|
||||||
|
return "cudaErrorUnmapBufferObjectFailed";
|
||||||
|
|
||||||
|
case cudaErrorInvalidHostPointer:
|
||||||
|
return "cudaErrorInvalidHostPointer";
|
||||||
|
|
||||||
|
case cudaErrorInvalidDevicePointer:
|
||||||
|
return "cudaErrorInvalidDevicePointer";
|
||||||
|
|
||||||
|
case cudaErrorInvalidTexture:
|
||||||
|
return "cudaErrorInvalidTexture";
|
||||||
|
|
||||||
|
case cudaErrorInvalidTextureBinding:
|
||||||
|
return "cudaErrorInvalidTextureBinding";
|
||||||
|
|
||||||
|
case cudaErrorInvalidChannelDescriptor:
|
||||||
|
return "cudaErrorInvalidChannelDescriptor";
|
||||||
|
|
||||||
|
case cudaErrorInvalidMemcpyDirection:
|
||||||
|
return "cudaErrorInvalidMemcpyDirection";
|
||||||
|
|
||||||
|
case cudaErrorAddressOfConstant:
|
||||||
|
return "cudaErrorAddressOfConstant";
|
||||||
|
|
||||||
|
case cudaErrorTextureFetchFailed:
|
||||||
|
return "cudaErrorTextureFetchFailed";
|
||||||
|
|
||||||
|
case cudaErrorTextureNotBound:
|
||||||
|
return "cudaErrorTextureNotBound";
|
||||||
|
|
||||||
|
case cudaErrorSynchronizationError:
|
||||||
|
return "cudaErrorSynchronizationError";
|
||||||
|
|
||||||
|
case cudaErrorInvalidFilterSetting:
|
||||||
|
return "cudaErrorInvalidFilterSetting";
|
||||||
|
|
||||||
|
case cudaErrorInvalidNormSetting:
|
||||||
|
return "cudaErrorInvalidNormSetting";
|
||||||
|
|
||||||
|
case cudaErrorMixedDeviceExecution:
|
||||||
|
return "cudaErrorMixedDeviceExecution";
|
||||||
|
|
||||||
|
case cudaErrorCudartUnloading:
|
||||||
|
return "cudaErrorCudartUnloading";
|
||||||
|
|
||||||
|
case cudaErrorUnknown:
|
||||||
|
return "cudaErrorUnknown";
|
||||||
|
|
||||||
|
case cudaErrorNotYetImplemented:
|
||||||
|
return "cudaErrorNotYetImplemented";
|
||||||
|
|
||||||
|
case cudaErrorMemoryValueTooLarge:
|
||||||
|
return "cudaErrorMemoryValueTooLarge";
|
||||||
|
|
||||||
|
case cudaErrorInvalidResourceHandle:
|
||||||
|
return "cudaErrorInvalidResourceHandle";
|
||||||
|
|
||||||
|
case cudaErrorNotReady:
|
||||||
|
return "cudaErrorNotReady";
|
||||||
|
|
||||||
|
case cudaErrorInsufficientDriver:
|
||||||
|
return "cudaErrorInsufficientDriver";
|
||||||
|
|
||||||
|
case cudaErrorSetOnActiveProcess:
|
||||||
|
return "cudaErrorSetOnActiveProcess";
|
||||||
|
|
||||||
|
case cudaErrorInvalidSurface:
|
||||||
|
return "cudaErrorInvalidSurface";
|
||||||
|
|
||||||
|
case cudaErrorNoDevice:
|
||||||
|
return "cudaErrorNoDevice";
|
||||||
|
|
||||||
|
case cudaErrorECCUncorrectable:
|
||||||
|
return "cudaErrorECCUncorrectable";
|
||||||
|
|
||||||
|
case cudaErrorSharedObjectSymbolNotFound:
|
||||||
|
return "cudaErrorSharedObjectSymbolNotFound";
|
||||||
|
|
||||||
|
case cudaErrorSharedObjectInitFailed:
|
||||||
|
return "cudaErrorSharedObjectInitFailed";
|
||||||
|
|
||||||
|
case cudaErrorUnsupportedLimit:
|
||||||
|
return "cudaErrorUnsupportedLimit";
|
||||||
|
|
||||||
|
case cudaErrorDuplicateVariableName:
|
||||||
|
return "cudaErrorDuplicateVariableName";
|
||||||
|
|
||||||
|
case cudaErrorDuplicateTextureName:
|
||||||
|
return "cudaErrorDuplicateTextureName";
|
||||||
|
|
||||||
|
case cudaErrorDuplicateSurfaceName:
|
||||||
|
return "cudaErrorDuplicateSurfaceName";
|
||||||
|
|
||||||
|
case cudaErrorDevicesUnavailable:
|
||||||
|
return "cudaErrorDevicesUnavailable";
|
||||||
|
|
||||||
|
case cudaErrorInvalidKernelImage:
|
||||||
|
return "cudaErrorInvalidKernelImage";
|
||||||
|
|
||||||
|
case cudaErrorNoKernelImageForDevice:
|
||||||
|
return "cudaErrorNoKernelImageForDevice";
|
||||||
|
|
||||||
|
case cudaErrorIncompatibleDriverContext:
|
||||||
|
return "cudaErrorIncompatibleDriverContext";
|
||||||
|
|
||||||
|
case cudaErrorPeerAccessAlreadyEnabled:
|
||||||
|
return "cudaErrorPeerAccessAlreadyEnabled";
|
||||||
|
|
||||||
|
case cudaErrorPeerAccessNotEnabled:
|
||||||
|
return "cudaErrorPeerAccessNotEnabled";
|
||||||
|
|
||||||
|
case cudaErrorDeviceAlreadyInUse:
|
||||||
|
return "cudaErrorDeviceAlreadyInUse";
|
||||||
|
|
||||||
|
case cudaErrorProfilerDisabled:
|
||||||
|
return "cudaErrorProfilerDisabled";
|
||||||
|
|
||||||
|
case cudaErrorProfilerNotInitialized:
|
||||||
|
return "cudaErrorProfilerNotInitialized";
|
||||||
|
|
||||||
|
case cudaErrorProfilerAlreadyStarted:
|
||||||
|
return "cudaErrorProfilerAlreadyStarted";
|
||||||
|
|
||||||
|
case cudaErrorProfilerAlreadyStopped:
|
||||||
|
return "cudaErrorProfilerAlreadyStopped";
|
||||||
|
|
||||||
|
/* Since CUDA 4.0*/
|
||||||
|
case cudaErrorAssert:
|
||||||
|
return "cudaErrorAssert";
|
||||||
|
|
||||||
|
case cudaErrorTooManyPeers:
|
||||||
|
return "cudaErrorTooManyPeers";
|
||||||
|
|
||||||
|
case cudaErrorHostMemoryAlreadyRegistered:
|
||||||
|
return "cudaErrorHostMemoryAlreadyRegistered";
|
||||||
|
|
||||||
|
case cudaErrorHostMemoryNotRegistered:
|
||||||
|
return "cudaErrorHostMemoryNotRegistered";
|
||||||
|
|
||||||
|
/* Since CUDA 5.0 */
|
||||||
|
case cudaErrorOperatingSystem:
|
||||||
|
return "cudaErrorOperatingSystem";
|
||||||
|
|
||||||
|
case cudaErrorPeerAccessUnsupported:
|
||||||
|
return "cudaErrorPeerAccessUnsupported";
|
||||||
|
|
||||||
|
case cudaErrorLaunchMaxDepthExceeded:
|
||||||
|
return "cudaErrorLaunchMaxDepthExceeded";
|
||||||
|
|
||||||
|
case cudaErrorLaunchFileScopedTex:
|
||||||
|
return "cudaErrorLaunchFileScopedTex";
|
||||||
|
|
||||||
|
case cudaErrorLaunchFileScopedSurf:
|
||||||
|
return "cudaErrorLaunchFileScopedSurf";
|
||||||
|
|
||||||
|
case cudaErrorSyncDepthExceeded:
|
||||||
|
return "cudaErrorSyncDepthExceeded";
|
||||||
|
|
||||||
|
case cudaErrorLaunchPendingCountExceeded:
|
||||||
|
return "cudaErrorLaunchPendingCountExceeded";
|
||||||
|
|
||||||
|
case cudaErrorNotPermitted:
|
||||||
|
return "cudaErrorNotPermitted";
|
||||||
|
|
||||||
|
case cudaErrorNotSupported:
|
||||||
|
return "cudaErrorNotSupported";
|
||||||
|
|
||||||
|
/* Since CUDA 6.0 */
|
||||||
|
case cudaErrorHardwareStackError:
|
||||||
|
return "cudaErrorHardwareStackError";
|
||||||
|
|
||||||
|
case cudaErrorIllegalInstruction:
|
||||||
|
return "cudaErrorIllegalInstruction";
|
||||||
|
|
||||||
|
case cudaErrorMisalignedAddress:
|
||||||
|
return "cudaErrorMisalignedAddress";
|
||||||
|
|
||||||
|
case cudaErrorInvalidAddressSpace:
|
||||||
|
return "cudaErrorInvalidAddressSpace";
|
||||||
|
|
||||||
|
case cudaErrorInvalidPc:
|
||||||
|
return "cudaErrorInvalidPc";
|
||||||
|
|
||||||
|
case cudaErrorIllegalAddress:
|
||||||
|
return "cudaErrorIllegalAddress";
|
||||||
|
|
||||||
|
/* Since CUDA 6.5*/
|
||||||
|
case cudaErrorInvalidPtx:
|
||||||
|
return "cudaErrorInvalidPtx";
|
||||||
|
|
||||||
|
case cudaErrorInvalidGraphicsContext:
|
||||||
|
return "cudaErrorInvalidGraphicsContext";
|
||||||
|
|
||||||
|
case cudaErrorStartupFailure:
|
||||||
|
return "cudaErrorStartupFailure";
|
||||||
|
|
||||||
|
case cudaErrorApiFailureBase:
|
||||||
|
return "cudaErrorApiFailureBase";
|
||||||
|
}
|
||||||
|
|
||||||
|
return "<unknown>";
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
template< typename T >
|
||||||
|
void check(T result, char const *const func, const char *const file, int const line)
|
||||||
|
{
|
||||||
|
if (result)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n",
|
||||||
|
file, line, static_cast<unsigned int>(result), _cudaGetErrorEnum(result), func);
|
||||||
|
cudaDeviceReset();
|
||||||
|
// Make sure we call CUDA Device Reset before exiting
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int *pArgc = NULL;
|
||||||
|
char **pArgv = NULL;
|
||||||
|
|
||||||
|
#if CUDART_VERSION < 5000
|
||||||
|
|
||||||
|
// CUDA-C includes
|
||||||
|
#include <cuda.h>
|
||||||
|
|
||||||
|
// This function wraps the CUDA Driver API into a template function
|
||||||
|
template <class T>
|
||||||
|
inline void getCudaAttribute(T *attribute, CUdevice_attribute device_attribute, int device)
|
||||||
|
{
|
||||||
|
CUresult error = cuDeviceGetAttribute(attribute, device_attribute, device);
|
||||||
|
|
||||||
|
if (CUDA_SUCCESS != error) {
|
||||||
|
fprintf(stderr, "cuSafeCallNoSync() Driver API error = %04d from file <%s>, line %i.\n",
|
||||||
|
error, __FILE__, __LINE__);
|
||||||
|
|
||||||
|
// cudaDeviceReset causes the driver to clean up all state. While
|
||||||
|
// not mandatory in normal operation, it is good practice. It is also
|
||||||
|
// needed to ensure correct operation when the application is being
|
||||||
|
// profiled. Calling cudaDeviceReset causes all profile data to be
|
||||||
|
// flushed before the application exits
|
||||||
|
cudaDeviceReset();
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* CUDART_VERSION < 5000 */
|
||||||
|
|
||||||
|
// Beginning of GPU Architecture definitions
|
||||||
|
inline int ConvertSMVer2Cores(int major, int minor)
|
||||||
|
{
|
||||||
|
// Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
|
||||||
|
typedef struct {
|
||||||
|
int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
|
||||||
|
int Cores;
|
||||||
|
} sSMtoCores;
|
||||||
|
|
||||||
|
sSMtoCores nGpuArchCoresPerSM[] = {
|
||||||
|
{ 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class
|
||||||
|
{ 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class
|
||||||
|
{ 0x30, 192}, // Kepler Generation (SM 3.0) GK10x class
|
||||||
|
{ 0x32, 192}, // Kepler Generation (SM 3.2) GK10x class
|
||||||
|
{ 0x35, 192}, // Kepler Generation (SM 3.5) GK11x class
|
||||||
|
{ 0x37, 192}, // Kepler Generation (SM 3.7) GK21x class
|
||||||
|
{ 0x50, 128}, // Maxwell Generation (SM 5.0) GM10x class
|
||||||
|
{ 0x52, 128}, // Maxwell Generation (SM 5.2) GM20x class
|
||||||
|
{ -1, -1 }
|
||||||
|
};
|
||||||
|
|
||||||
|
int index = 0;
|
||||||
|
|
||||||
|
while (nGpuArchCoresPerSM[index].SM != -1) {
|
||||||
|
if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor)) {
|
||||||
|
return nGpuArchCoresPerSM[index].Cores;
|
||||||
|
}
|
||||||
|
|
||||||
|
index++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we don't find the values, we default use the previous one to run properly
|
||||||
|
printf("MapSMtoCores for SM %d.%d is undefined. Default to use %d Cores/SM\n", major, minor, nGpuArchCoresPerSM[index-1].Cores);
|
||||||
|
return nGpuArchCoresPerSM[index-1].Cores;
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Program main
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
int
|
||||||
|
main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
pArgc = &argc;
|
||||||
|
pArgv = argv;
|
||||||
|
|
||||||
|
printf("%s Starting...\n\n", argv[0]);
|
||||||
|
printf(" CUDA Device Query (Runtime API) version (CUDART static linking)\n\n");
|
||||||
|
|
||||||
|
int deviceCount = 0;
|
||||||
|
cudaError_t error_id = cudaGetDeviceCount(&deviceCount);
|
||||||
|
|
||||||
|
if (error_id != cudaSuccess) {
|
||||||
|
printf("cudaGetDeviceCount failed: %s (%d)\n",
|
||||||
|
cudaGetErrorString(error_id), (int) error_id);
|
||||||
|
printf("Result = FAIL\n");
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
|
||||||
|
// This function call returns 0 if there are no CUDA capable devices.
|
||||||
|
if (deviceCount == 0)
|
||||||
|
printf("There are no available device(s) that support CUDA\n");
|
||||||
|
else
|
||||||
|
printf("Detected %d CUDA Capable device(s)\n", deviceCount);
|
||||||
|
|
||||||
|
int dev, driverVersion = 0, runtimeVersion = 0;
|
||||||
|
|
||||||
|
for (dev = 0; dev < deviceCount; ++dev) {
|
||||||
|
cudaSetDevice(dev);
|
||||||
|
cudaDeviceProp deviceProp;
|
||||||
|
cudaGetDeviceProperties(&deviceProp, dev);
|
||||||
|
|
||||||
|
printf("\nDevice %d: \"%s\"\n", dev, deviceProp.name);
|
||||||
|
|
||||||
|
// Console log
|
||||||
|
cudaDriverGetVersion(&driverVersion);
|
||||||
|
cudaRuntimeGetVersion(&runtimeVersion);
|
||||||
|
printf(" CUDA Driver Version / Runtime Version %d.%d / %d.%d\n", driverVersion/1000, (driverVersion%100)/10, runtimeVersion/1000, (runtimeVersion%100)/10);
|
||||||
|
printf(" CUDA Capability Major/Minor version number: %d.%d\n", deviceProp.major, deviceProp.minor);
|
||||||
|
|
||||||
|
printf(" Total amount of global memory: %.0f MBytes (%llu bytes)\n",
|
||||||
|
(float)deviceProp.totalGlobalMem/1048576.0f, (unsigned long long) deviceProp.totalGlobalMem);
|
||||||
|
|
||||||
|
printf(" (%2d) Multiprocessors, (%3d) CUDA Cores/MP: %d CUDA Cores\n",
|
||||||
|
deviceProp.multiProcessorCount,
|
||||||
|
ConvertSMVer2Cores(deviceProp.major, deviceProp.minor),
|
||||||
|
ConvertSMVer2Cores(deviceProp.major, deviceProp.minor) * deviceProp.multiProcessorCount);
|
||||||
|
printf(" GPU Max Clock rate: %.0f MHz (%0.2f GHz)\n", deviceProp.clockRate * 1e-3f, deviceProp.clockRate * 1e-6f);
|
||||||
|
|
||||||
|
|
||||||
|
#if CUDART_VERSION >= 5000
|
||||||
|
// This is supported in CUDA 5.0 (runtime API device properties)
|
||||||
|
printf(" Memory Clock rate: %.0f Mhz\n", deviceProp.memoryClockRate * 1e-3f);
|
||||||
|
printf(" Memory Bus Width: %d-bit\n", deviceProp.memoryBusWidth);
|
||||||
|
|
||||||
|
if (deviceProp.l2CacheSize) {
|
||||||
|
printf(" L2 Cache Size: %d bytes\n", deviceProp.l2CacheSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
// This only available in CUDA 4.0-4.2 (but these were only exposed in the CUDA Driver API)
|
||||||
|
int memoryClock;
|
||||||
|
getCudaAttribute<int>(&memoryClock, CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, dev);
|
||||||
|
printf(" Memory Clock rate: %.0f Mhz\n", memoryClock * 1e-3f);
|
||||||
|
int memBusWidth;
|
||||||
|
getCudaAttribute<int>(&memBusWidth, CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, dev);
|
||||||
|
printf(" Memory Bus Width: %d-bit\n", memBusWidth);
|
||||||
|
int L2CacheSize;
|
||||||
|
getCudaAttribute<int>(&L2CacheSize, CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE, dev);
|
||||||
|
|
||||||
|
if (L2CacheSize) {
|
||||||
|
printf(" L2 Cache Size: %d bytes\n", L2CacheSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
printf(" Maximum Texture Dimension Size (x,y,z) 1D=(%d), 2D=(%d, %d), 3D=(%d, %d, %d)\n",
|
||||||
|
deviceProp.maxTexture1D , deviceProp.maxTexture2D[0], deviceProp.maxTexture2D[1],
|
||||||
|
deviceProp.maxTexture3D[0], deviceProp.maxTexture3D[1], deviceProp.maxTexture3D[2]);
|
||||||
|
printf(" Maximum Layered 1D Texture Size, (num) layers 1D=(%d), %d layers\n",
|
||||||
|
deviceProp.maxTexture1DLayered[0], deviceProp.maxTexture1DLayered[1]);
|
||||||
|
printf(" Maximum Layered 2D Texture Size, (num) layers 2D=(%d, %d), %d layers\n",
|
||||||
|
deviceProp.maxTexture2DLayered[0], deviceProp.maxTexture2DLayered[1], deviceProp.maxTexture2DLayered[2]);
|
||||||
|
|
||||||
|
|
||||||
|
printf(" Total amount of constant memory: %lu bytes\n", deviceProp.totalConstMem);
|
||||||
|
printf(" Total amount of shared memory per block: %lu bytes\n", deviceProp.sharedMemPerBlock);
|
||||||
|
printf(" Total number of registers available per block: %d\n", deviceProp.regsPerBlock);
|
||||||
|
printf(" Warp size: %d\n", deviceProp.warpSize);
|
||||||
|
printf(" Maximum number of threads per multiprocessor: %d\n", deviceProp.maxThreadsPerMultiProcessor);
|
||||||
|
printf(" Maximum number of threads per block: %d\n", deviceProp.maxThreadsPerBlock);
|
||||||
|
printf(" Max dimension size of a thread block (x,y,z): (%d, %d, %d)\n",
|
||||||
|
deviceProp.maxThreadsDim[0],
|
||||||
|
deviceProp.maxThreadsDim[1],
|
||||||
|
deviceProp.maxThreadsDim[2]);
|
||||||
|
printf(" Max dimension size of a grid size (x,y,z): (%d, %d, %d)\n",
|
||||||
|
deviceProp.maxGridSize[0],
|
||||||
|
deviceProp.maxGridSize[1],
|
||||||
|
deviceProp.maxGridSize[2]);
|
||||||
|
printf(" Maximum memory pitch: %lu bytes\n", deviceProp.memPitch);
|
||||||
|
printf(" Texture alignment: %lu bytes\n", deviceProp.textureAlignment);
|
||||||
|
printf(" Concurrent copy and kernel execution: %s with %d copy engine(s)\n", (deviceProp.deviceOverlap ? "Yes" : "No"), deviceProp.asyncEngineCount);
|
||||||
|
printf(" Run time limit on kernels: %s\n", deviceProp.kernelExecTimeoutEnabled ? "Yes" : "No");
|
||||||
|
printf(" Integrated GPU sharing Host Memory: %s\n", deviceProp.integrated ? "Yes" : "No");
|
||||||
|
printf(" Support host page-locked memory mapping: %s\n", deviceProp.canMapHostMemory ? "Yes" : "No");
|
||||||
|
printf(" Alignment requirement for Surfaces: %s\n", deviceProp.surfaceAlignment ? "Yes" : "No");
|
||||||
|
printf(" Device has ECC support: %s\n", deviceProp.ECCEnabled ? "Enabled" : "Disabled");
|
||||||
|
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
|
||||||
|
printf(" CUDA Device Driver Mode (TCC or WDDM): %s\n", deviceProp.tccDriver ? "TCC (Tesla Compute Cluster Driver)" : "WDDM (Windows Display Driver Model)");
|
||||||
|
#endif
|
||||||
|
printf(" Device supports Unified Addressing (UVA): %s\n", deviceProp.unifiedAddressing ? "Yes" : "No");
|
||||||
|
printf(" Device PCI Domain ID / Bus ID / location ID: %d / %d / %d\n", deviceProp.pciDomainID, deviceProp.pciBusID, deviceProp.pciDeviceID);
|
||||||
|
|
||||||
|
const char *sComputeMode[] = {
|
||||||
|
"Default (multiple host threads can use ::cudaSetDevice() with device simultaneously)",
|
||||||
|
"Exclusive (only one host thread in one process is able to use ::cudaSetDevice() with this device)",
|
||||||
|
"Prohibited (no host thread can use ::cudaSetDevice() with this device)",
|
||||||
|
"Exclusive Process (many threads in one process is able to use ::cudaSetDevice() with this device)",
|
||||||
|
"Unknown",
|
||||||
|
NULL
|
||||||
|
};
|
||||||
|
printf(" Compute Mode:\n");
|
||||||
|
printf(" < %s >\n", sComputeMode[deviceProp.computeMode]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// If there are 2 or more GPUs, query to determine whether RDMA is supported
|
||||||
|
if (deviceCount >= 2)
|
||||||
|
{
|
||||||
|
cudaDeviceProp prop[64];
|
||||||
|
int gpuid[64]; // we want to find the first two GPU's that can support P2P
|
||||||
|
int gpu_p2p_count = 0;
|
||||||
|
|
||||||
|
for (int i=0; i < deviceCount; i++)
|
||||||
|
{
|
||||||
|
checkCudaErrors(cudaGetDeviceProperties(&prop[i], i));
|
||||||
|
|
||||||
|
// Only boards based on Fermi or later can support P2P
|
||||||
|
if ((prop[i].major >= 2)
|
||||||
|
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
|
||||||
|
// on Windows (64-bit), the Tesla Compute Cluster driver for windows must be enabled to supprot this
|
||||||
|
&& prop[i].tccDriver
|
||||||
|
#endif
|
||||||
|
)
|
||||||
|
{
|
||||||
|
// This is an array of P2P capable GPUs
|
||||||
|
gpuid[gpu_p2p_count++] = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Show all the combinations of support P2P GPUs
|
||||||
|
int can_access_peer_0_1, can_access_peer_1_0;
|
||||||
|
|
||||||
|
if (gpu_p2p_count >= 2)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < gpu_p2p_count-1; i++)
|
||||||
|
{
|
||||||
|
for (int j = 1; j < gpu_p2p_count; j++)
|
||||||
|
{
|
||||||
|
checkCudaErrors(cudaDeviceCanAccessPeer(&can_access_peer_0_1, gpuid[i], gpuid[j]));
|
||||||
|
printf("> Peer access from %s (GPU%d) -> %s (GPU%d) : %s\n", prop[gpuid[i]].name, gpuid[i],
|
||||||
|
prop[gpuid[j]].name, gpuid[j] ,
|
||||||
|
can_access_peer_0_1 ? "Yes" : "No");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int j = 1; j < gpu_p2p_count; j++)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < gpu_p2p_count-1; i++)
|
||||||
|
{
|
||||||
|
checkCudaErrors(cudaDeviceCanAccessPeer(&can_access_peer_1_0, gpuid[j], gpuid[i]));
|
||||||
|
printf("> Peer access from %s (GPU%d) -> %s (GPU%d) : %s\n", prop[gpuid[j]].name, gpuid[j],
|
||||||
|
prop[gpuid[i]].name, gpuid[i] ,
|
||||||
|
can_access_peer_1_0 ? "Yes" : "No");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// csv masterlog info
|
||||||
|
// *****************************
|
||||||
|
// exe and CUDA driver name
|
||||||
|
printf("\n");
|
||||||
|
std::string sProfileString = "deviceQuery, CUDA Driver = CUDART";
|
||||||
|
char cTemp[128];
|
||||||
|
|
||||||
|
// driver version
|
||||||
|
sProfileString += ", CUDA Driver Version = ";
|
||||||
|
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
|
||||||
|
sprintf_s(cTemp, 10, "%d.%d", driverVersion/1000, (driverVersion%100)/10);
|
||||||
|
#else
|
||||||
|
sprintf(cTemp, "%d.%d", driverVersion/1000, (driverVersion%100)/10);
|
||||||
|
#endif
|
||||||
|
sProfileString += cTemp;
|
||||||
|
|
||||||
|
// Runtime version
|
||||||
|
sProfileString += ", CUDA Runtime Version = ";
|
||||||
|
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
|
||||||
|
sprintf_s(cTemp, 10, "%d.%d", runtimeVersion/1000, (runtimeVersion%100)/10);
|
||||||
|
#else
|
||||||
|
sprintf(cTemp, "%d.%d", runtimeVersion/1000, (runtimeVersion%100)/10);
|
||||||
|
#endif
|
||||||
|
sProfileString += cTemp;
|
||||||
|
|
||||||
|
// Device count
|
||||||
|
sProfileString += ", NumDevs = ";
|
||||||
|
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
|
||||||
|
sprintf_s(cTemp, 10, "%d", deviceCount);
|
||||||
|
#else
|
||||||
|
sprintf(cTemp, "%d", deviceCount);
|
||||||
|
#endif
|
||||||
|
sProfileString += cTemp;
|
||||||
|
|
||||||
|
// Print Out all device Names
|
||||||
|
for (dev = 0; dev < deviceCount; ++dev)
|
||||||
|
{
|
||||||
|
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
|
||||||
|
sprintf_s(cTemp, 13, ", Device%d = ", dev);
|
||||||
|
#else
|
||||||
|
sprintf(cTemp, ", Device%d = ", dev);
|
||||||
|
#endif
|
||||||
|
cudaDeviceProp deviceProp;
|
||||||
|
cudaGetDeviceProperties(&deviceProp, dev);
|
||||||
|
sProfileString += cTemp;
|
||||||
|
sProfileString += deviceProp.name;
|
||||||
|
}
|
||||||
|
|
||||||
|
sProfileString += "\n";
|
||||||
|
printf("%s", sProfileString.c_str());
|
||||||
|
|
||||||
|
printf("Result = PASS\n");
|
||||||
|
|
||||||
|
// finish
|
||||||
|
// cudaDeviceReset causes the driver to clean up all state. While
|
||||||
|
// not mandatory in normal operation, it is good practice. It is also
|
||||||
|
// needed to ensure correct operation when the application is being
|
||||||
|
// profiled. Calling cudaDeviceReset causes all profile data to be
|
||||||
|
// flushed before the application exits
|
||||||
|
cudaDeviceReset();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
43
pkgs/cudainfo/default.nix
Normal file
43
pkgs/cudainfo/default.nix
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
{
|
||||||
|
stdenv
|
||||||
|
, cudatoolkit
|
||||||
|
, cudaPackages
|
||||||
|
, autoAddDriverRunpath
|
||||||
|
, strace
|
||||||
|
}:
|
||||||
|
|
||||||
|
stdenv.mkDerivation (finalAttrs: {
|
||||||
|
name = "cudainfo";
|
||||||
|
src = ./.;
|
||||||
|
buildInputs = [
|
||||||
|
cudatoolkit # Required for nvcc
|
||||||
|
cudaPackages.cuda_cudart.static # Required for -lcudart_static
|
||||||
|
autoAddDriverRunpath
|
||||||
|
];
|
||||||
|
installPhase = ''
|
||||||
|
mkdir -p $out/bin
|
||||||
|
cp -a cudainfo $out/bin
|
||||||
|
'';
|
||||||
|
passthru.gpuCheck = stdenv.mkDerivation {
|
||||||
|
name = "cudainfo-test";
|
||||||
|
requiredSystemFeatures = [ "cuda" ];
|
||||||
|
dontBuild = true;
|
||||||
|
nativeCheckInputs = [
|
||||||
|
finalAttrs.finalPackage # The cudainfo package from above
|
||||||
|
strace # When it fails, it will show the trace
|
||||||
|
];
|
||||||
|
dontUnpack = true;
|
||||||
|
doCheck = true;
|
||||||
|
checkPhase = ''
|
||||||
|
if ! cudainfo; then
|
||||||
|
set -x
|
||||||
|
cudainfo=$(command -v cudainfo)
|
||||||
|
ldd $cudainfo
|
||||||
|
readelf -d $cudainfo
|
||||||
|
strace -f $cudainfo
|
||||||
|
set +x
|
||||||
|
fi
|
||||||
|
'';
|
||||||
|
installPhase = "touch $out";
|
||||||
|
};
|
||||||
|
})
|
||||||
25
pkgs/meteocat-exporter/default.nix
Normal file
25
pkgs/meteocat-exporter/default.nix
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
{ python3Packages, lib }:
|
||||||
|
|
||||||
|
python3Packages.buildPythonApplication rec {
|
||||||
|
pname = "meteocat-exporter";
|
||||||
|
version = "1.0";
|
||||||
|
|
||||||
|
src = ./.;
|
||||||
|
|
||||||
|
doCheck = false;
|
||||||
|
|
||||||
|
build-system = with python3Packages; [
|
||||||
|
setuptools
|
||||||
|
];
|
||||||
|
|
||||||
|
dependencies = with python3Packages; [
|
||||||
|
beautifulsoup4
|
||||||
|
lxml
|
||||||
|
prometheus-client
|
||||||
|
];
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
description = "MeteoCat Prometheus Exporter";
|
||||||
|
platforms = platforms.linux;
|
||||||
|
};
|
||||||
|
}
|
||||||
54
pkgs/meteocat-exporter/meteocat-exporter
Normal file
54
pkgs/meteocat-exporter/meteocat-exporter
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import time
|
||||||
|
from prometheus_client import start_http_server, Gauge
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from urllib import request
|
||||||
|
|
||||||
|
# Configuration -------------------------------------------
|
||||||
|
meteo_station = "X8" # Barcelona - Zona Universitària
|
||||||
|
listening_port = 9929
|
||||||
|
update_period = 60 * 5 # Each 5 min
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
|
||||||
|
metric_tmin = Gauge('meteocat_temp_min', 'Min temperature')
|
||||||
|
metric_tmax = Gauge('meteocat_temp_max', 'Max temperature')
|
||||||
|
metric_tavg = Gauge('meteocat_temp_avg', 'Average temperature')
|
||||||
|
metric_srad = Gauge('meteocat_solar_radiation', 'Solar radiation')
|
||||||
|
|
||||||
|
def update(st):
|
||||||
|
url = 'https://www.meteo.cat/observacions/xema/dades?codi=' + st
|
||||||
|
response = request.urlopen(url)
|
||||||
|
data = response.read()
|
||||||
|
soup = BeautifulSoup(data, 'lxml')
|
||||||
|
table = soup.find("table", {"class" : "tblperiode"})
|
||||||
|
rows = table.find_all('tr')
|
||||||
|
row = rows[-1] # Take the last row
|
||||||
|
row_data = []
|
||||||
|
header = row.find('th')
|
||||||
|
header_text = header.text.strip()
|
||||||
|
row_data.append(header_text)
|
||||||
|
for col in row.find_all('td'):
|
||||||
|
row_data.append(col.text)
|
||||||
|
try:
|
||||||
|
# Sometimes it will return '(s/d)' and fail to parse
|
||||||
|
metric_tavg.set(float(row_data[1]))
|
||||||
|
metric_tmax.set(float(row_data[2]))
|
||||||
|
metric_tmin.set(float(row_data[3]))
|
||||||
|
metric_srad.set(float(row_data[10]))
|
||||||
|
#print("ok: temp_avg={}".format(float(row_data[1])))
|
||||||
|
except:
|
||||||
|
print("cannot parse row: {}".format(row))
|
||||||
|
metric_tavg.set(float("nan"))
|
||||||
|
metric_tmax.set(float("nan"))
|
||||||
|
metric_tmin.set(float("nan"))
|
||||||
|
metric_srad.set(float("nan"))
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
start_http_server(port=listening_port, addr="localhost")
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
update(meteo_station)
|
||||||
|
except:
|
||||||
|
print("update failed")
|
||||||
|
time.sleep(update_period)
|
||||||
11
pkgs/meteocat-exporter/setup.py
Normal file
11
pkgs/meteocat-exporter/setup.py
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
from setuptools import setup, find_packages
|
||||||
|
|
||||||
|
setup(name='meteocat-exporter',
|
||||||
|
version='1.0',
|
||||||
|
# Modules to import from other scripts:
|
||||||
|
packages=find_packages(),
|
||||||
|
# Executables
|
||||||
|
scripts=["meteocat-exporter"],
|
||||||
|
)
|
||||||
@@ -1,36 +0,0 @@
|
|||||||
diff --git a/src/util/mpir_hwtopo.c b/src/util/mpir_hwtopo.c
|
|
||||||
index 33e88bc..ee3641c 100644
|
|
||||||
--- a/src/util/mpir_hwtopo.c
|
|
||||||
+++ b/src/util/mpir_hwtopo.c
|
|
||||||
@@ -200,18 +200,6 @@ int MPII_hwtopo_init(void)
|
|
||||||
#ifdef HAVE_HWLOC
|
|
||||||
bindset = hwloc_bitmap_alloc();
|
|
||||||
hwloc_topology_init(&hwloc_topology);
|
|
||||||
- char *xmlfile = MPIR_pmi_get_jobattr("PMI_hwloc_xmlfile");
|
|
||||||
- if (xmlfile != NULL) {
|
|
||||||
- int rc;
|
|
||||||
- rc = hwloc_topology_set_xml(hwloc_topology, xmlfile);
|
|
||||||
- if (rc == 0) {
|
|
||||||
- /* To have hwloc still actually call OS-specific hooks, the
|
|
||||||
- * HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM has to be set to assert that the loaded
|
|
||||||
- * file is really the underlying system. */
|
|
||||||
- hwloc_topology_set_flags(hwloc_topology, HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM);
|
|
||||||
- }
|
|
||||||
- MPL_free(xmlfile);
|
|
||||||
- }
|
|
||||||
|
|
||||||
hwloc_topology_set_io_types_filter(hwloc_topology, HWLOC_TYPE_FILTER_KEEP_ALL);
|
|
||||||
if (!hwloc_topology_load(hwloc_topology))
|
|
||||||
|
|
||||||
--- a/src/mpi/init/local_proc_attrs.c
|
|
||||||
+++ b/src/mpi/init/local_proc_attrs.c
|
|
||||||
@@ -79,10 +79,6 @@ int MPII_init_local_proc_attrs(int *p_thread_required)
|
|
||||||
/* Set the number of tag bits. The device may override this value. */
|
|
||||||
MPIR_Process.tag_bits = MPIR_TAG_BITS_DEFAULT;
|
|
||||||
|
|
||||||
- char *requested_kinds = MPIR_pmi_get_jobattr("PMI_mpi_memory_alloc_kinds");
|
|
||||||
- MPIR_get_supported_memory_kinds(requested_kinds, &MPIR_Process.memory_alloc_kinds);
|
|
||||||
- MPL_free(requested_kinds);
|
|
||||||
-
|
|
||||||
return mpi_errno;
|
|
||||||
}
|
|
||||||
@@ -11,10 +11,6 @@ final: prev:
|
|||||||
paths = [ pmix.dev pmix.out ];
|
paths = [ pmix.dev pmix.out ];
|
||||||
};
|
};
|
||||||
in prev.mpich.overrideAttrs (old: {
|
in prev.mpich.overrideAttrs (old: {
|
||||||
patches = (old.patches or []) ++ [
|
|
||||||
# See https://github.com/pmodels/mpich/issues/6946
|
|
||||||
./mpich-fix-hwtopo.patch
|
|
||||||
];
|
|
||||||
buildInput = old.buildInputs ++ [
|
buildInput = old.buildInputs ++ [
|
||||||
libfabric
|
libfabric
|
||||||
pmixAll
|
pmixAll
|
||||||
@@ -54,4 +50,18 @@ final: prev:
|
|||||||
});
|
});
|
||||||
|
|
||||||
prometheus-slurm-exporter = prev.callPackage ./slurm-exporter.nix { };
|
prometheus-slurm-exporter = prev.callPackage ./slurm-exporter.nix { };
|
||||||
|
meteocat-exporter = prev.callPackage ./meteocat-exporter/default.nix { };
|
||||||
|
upc-qaire-exporter = prev.callPackage ./upc-qaire-exporter/default.nix { };
|
||||||
|
cudainfo = prev.callPackage ./cudainfo/default.nix { };
|
||||||
|
|
||||||
|
amd-uprof = prev.callPackage ./amd-uprof/default.nix { };
|
||||||
|
|
||||||
|
# FIXME: Extend this to all linuxPackages variants. Open problem, see:
|
||||||
|
# https://discourse.nixos.org/t/whats-the-right-way-to-make-a-custom-kernel-module-available/4636
|
||||||
|
linuxPackages = prev.linuxPackages.extend (_final: _prev: {
|
||||||
|
amd-uprof-driver = _prev.callPackage ./amd-uprof/driver.nix { };
|
||||||
|
});
|
||||||
|
linuxPackages_latest = prev.linuxPackages_latest.extend(_final: _prev: {
|
||||||
|
amd-uprof-driver = _prev.callPackage ./amd-uprof/driver.nix { };
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|||||||
24
pkgs/upc-qaire-exporter/default.nix
Normal file
24
pkgs/upc-qaire-exporter/default.nix
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
{ python3Packages, lib }:
|
||||||
|
|
||||||
|
python3Packages.buildPythonApplication rec {
|
||||||
|
pname = "upc-qaire-exporter";
|
||||||
|
version = "1.0";
|
||||||
|
|
||||||
|
src = ./.;
|
||||||
|
|
||||||
|
doCheck = false;
|
||||||
|
|
||||||
|
build-system = with python3Packages; [
|
||||||
|
setuptools
|
||||||
|
];
|
||||||
|
|
||||||
|
dependencies = with python3Packages; [
|
||||||
|
prometheus-client
|
||||||
|
requests
|
||||||
|
];
|
||||||
|
|
||||||
|
meta = with lib; {
|
||||||
|
description = "UPC Qaire Prometheus Exporter";
|
||||||
|
platforms = platforms.linux;
|
||||||
|
};
|
||||||
|
}
|
||||||
11
pkgs/upc-qaire-exporter/setup.py
Normal file
11
pkgs/upc-qaire-exporter/setup.py
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
from setuptools import setup, find_packages
|
||||||
|
|
||||||
|
setup(name='upc-qaire-exporter',
|
||||||
|
version='1.0',
|
||||||
|
# Modules to import from other scripts:
|
||||||
|
packages=find_packages(),
|
||||||
|
# Executables
|
||||||
|
scripts=["upc-qaire-exporter"],
|
||||||
|
)
|
||||||
74
pkgs/upc-qaire-exporter/upc-qaire-exporter
Normal file
74
pkgs/upc-qaire-exporter/upc-qaire-exporter
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import time
|
||||||
|
from prometheus_client import start_http_server, Gauge
|
||||||
|
import requests, json
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
|
# Configuration -------------------------------------------
|
||||||
|
listening_port = 9928
|
||||||
|
update_period = 60 * 5 # Each 5 min
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
|
||||||
|
metric_temp = Gauge('upc_c6_s302_temp', 'UPC C6 S302 temperature sensor')
|
||||||
|
|
||||||
|
def genparams():
|
||||||
|
d = {}
|
||||||
|
d['topic'] = 'TEMPERATURE'
|
||||||
|
d['shift_dates_to'] = ''
|
||||||
|
d['datapoints'] = 301
|
||||||
|
d['devicesAndColors'] = '1148418@@@#40ACB6'
|
||||||
|
|
||||||
|
now = datetime.now()
|
||||||
|
|
||||||
|
d['fromDate'] = now.strftime('%d/%m/%Y')
|
||||||
|
d['toDate'] = now.strftime('%d/%m/%Y')
|
||||||
|
d['serviceFrequency'] = 'NONE'
|
||||||
|
|
||||||
|
# WTF!
|
||||||
|
for i in range(7):
|
||||||
|
for j in range(48):
|
||||||
|
key = 'week.days[{}].hours[{}].value'.format(i, j)
|
||||||
|
d[key] = 'OPEN'
|
||||||
|
|
||||||
|
return d
|
||||||
|
|
||||||
|
def measure():
|
||||||
|
# First we need to load session
|
||||||
|
s = requests.Session()
|
||||||
|
r = s.get("https://upc.edu/sirena")
|
||||||
|
if r.status_code != 200:
|
||||||
|
print("bad HTTP status code on new session: {}".format(r.status_code))
|
||||||
|
return
|
||||||
|
|
||||||
|
if s.cookies.get("JSESSIONID") is None:
|
||||||
|
print("cannot get JSESSIONID")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Now we can pull the data
|
||||||
|
url = "https://upcsirena.app.dexma.com/l_12535/analysis/by_datapoints/data.json"
|
||||||
|
r = s.post(url, data=genparams())
|
||||||
|
|
||||||
|
if r.status_code != 200:
|
||||||
|
print("bad HTTP status code on data: {}".format(r.status_code))
|
||||||
|
return
|
||||||
|
|
||||||
|
#print(r.text)
|
||||||
|
j = json.loads(r.content)
|
||||||
|
|
||||||
|
# Just take the last one
|
||||||
|
last = j['data']['chartElementList'][-1]
|
||||||
|
temp = last['values']['1148418-Temperatura']
|
||||||
|
|
||||||
|
return temp
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
start_http_server(port=listening_port, addr="localhost")
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
metric_temp.set(measure())
|
||||||
|
except:
|
||||||
|
print("measure failed")
|
||||||
|
metric_temp.set(float("nan"))
|
||||||
|
|
||||||
|
time.sleep(update_period)
|
||||||
Binary file not shown.
@@ -1,9 +1,13 @@
|
|||||||
age-encryption.org/v1
|
age-encryption.org/v1
|
||||||
-> ssh-ed25519 HY2yRg eRVX5yndWDLg9hw7sY1Iu8pJFy47luHvdL+zZGK2u1s
|
-> ssh-ed25519 HY2yRg gKGxsjHfpiRDQ6Tuvcx7pjKgrVUGweotuplLYwCGvik
|
||||||
e1nXXiMW0ywkZYh2s6c7/quGMfBOJOaRhNQDjCD2Iyc
|
DSz9j/stVyB1lXpVP+kg+H+RDgSftREGFFLQZClC3kI
|
||||||
-> ssh-ed25519 CAWG4Q gYG7GRxRpJ0/5Wz0Z0J2wfLfkMFNmcy81dQEewM7gUA
|
-> ssh-ed25519 cK5kHw 17DpKekfNVy4V742QSd61r2w6iawtOJR7Ct3UflDXio
|
||||||
lamdUdx+xOFWF1lmUM4x9TT0cJtKu9Sp7w9JHwm13u0
|
hsqTEPCYjHKvndMWPl4GpG23CzjGgVrS+cLIymISJHU
|
||||||
-> ssh-ed25519 MSF3dg HEzfpR8alG6WPzhaEjAmmjOFoFcMSQUldx46dBsXri4
|
-> ssh-ed25519 CAWG4Q oK01d4pbBqEZVsymSiKijPvJo714xsMSRMbzkssJKiw
|
||||||
OAD5H/zZGhfevYrFJzJrbNKPomKZDOS9Qx5tmTp78Jo
|
hs0tVFkqtIHXg9jtC2iDgCtefFcWvGJkXB+HJUcqXQs
|
||||||
--- A0sMSiNXWaEIgRXR0x6UAIaluuVH6Zlv4CJ9sI0NXOw
|
-> ssh-ed25519 xA739A KxO+AawfLMERHwzt3YnZRwPFlCfGETma7fo8M+ZtsAY
|
||||||
<EFBFBD><EFBFBD>6<EFBFBD>ph<EFBFBD><EFBFBD><EFBFBD>{<7B>><3E>F|<7C>i<EFBFBD>v<0B><>E}{<7B>ru<72><75>Ʒ<EFBFBD><C6B7><1A><EFBFBD><7F>}^<5E><>><3E>c6<06><14>j<> <09>g<EFBFBD>GW<47><57>:<3A>J3<19>|<7C>|<7C>Z<EFBFBD>
|
eSn0+/rhLQxNKt5xKubKck8Nxun2Sh3eJqBU/hwgzZM
|
||||||
|
-> ssh-ed25519 MSF3dg OyaZBLB2kO8fU139lXbbC404gT7IzIWk+BMhYzabBDg
|
||||||
|
/fiPFfBJcb+e40+fZbwCw7niF2hh+JxUPiKSiwUSOWg
|
||||||
|
--- ycZyGX+Li+LsOuweF9OVPl8aoMaRgp/RdFbDrPszkUs
|
||||||
|
<EFBFBD><EFBFBD><EFBFBD><EFBFBD>YM<EFBFBD><EFBFBD>:E O<><4F>2<EFBFBD>r=<15>&4<><04>CQΣ<51><CEA3>hC<68><43><EFBFBD>cb<63>^Sy<53><79>% <09><>x-vC`g<><15><><EFBFBD><EFBFBD>W^<5E><>wVG<0B><><EFBFBD>
|
||||||
Binary file not shown.
@@ -1,10 +1,13 @@
|
|||||||
age-encryption.org/v1
|
age-encryption.org/v1
|
||||||
-> ssh-ed25519 HY2yRg GdmdkW+BqqwBgu30b846jv3J7jtCM+a3rgOERuA050A
|
-> ssh-ed25519 HY2yRg U2KQWviZIVNemm9e8h7H+eOzoYNxXgLLS3hsZLMAuGk
|
||||||
FeGqM75jG9egesR+yyVKHm0/M+uBBp5Hclg4+qN0BR8
|
6n5dH1McNzk3rscP4v2pqZYDWtUFMd15rZsEd/mqIFM
|
||||||
-> ssh-ed25519 CAWG4Q a0wTWHgulQUYDAMZmXf3dOf6PdYgCqNtSylzWVVRNVM
|
-> ssh-ed25519 cK5kHw Ebrj/cpz1cFWAYAV9OxgyyH85OEMUnfUIV66p7jaoFY
|
||||||
Bx+WSYaiY4ZwlSZJo2a1XPMQmbKOU7F0tKAqVRLBOPo
|
6J7hWqODtS/fIF4BpxhxbrxZq5vbolvbLqRKqazT02M
|
||||||
-> ssh-ed25519 MSF3dg KccUvZZUbxbCrRWUWrX8KcHF6vQ5FV/BqUqI59G7dj4
|
-> ssh-ed25519 CAWG4Q mXqoQH9ycHF7u0y8mazCgynHxNLxTnrmQHke+2a5QCc
|
||||||
CFr7GXpZ9rPgy7HBfOyiYF9FnZUw6KcZwq9f7/0KaU8
|
mq6PdSF+KOqthuXwzTCsOQsi5KG0z1wHUck+bSTyOBY
|
||||||
--- E0Rp6RR/8+o0jvB1lRdhnlabxvI6uu/IgL2ZpPXzTc8
|
-> ssh-ed25519 xA739A TADeswueqDEroZWLjMw3RDNwVQ2xRD+JUMVZENovn0M
|
||||||
<EFBFBD><13>#<23><>H<EFBFBD>$<24>F;<3B><EFBFBD><7F>%<25><>6<><02>2<EFBFBD><32>rfX<66>\Dn <20>ш<EFBFBD>ȉ<EFBFBD>x<EFBFBD><78>><3E><>&;<3B>c<EFBFBD>U<EFBFBD>I=<3D><>M<EFBFBD><4D><EFBFBD>?T<><54>Ǹ<EFBFBD><16>"px<70>ӭ\s<><73><EFBFBD>bF<62><46><EFBFBD><EFBFBD>WD<>{<7B>
|
KFlnSjVFbjc+ZsbY8Ed7edC5B01TJGzd/dSryiLArPc
|
||||||
AW>?U<><55><EFBFBD><17><>HԳ
|
-> ssh-ed25519 MSF3dg Pq+ZD8AqJGDHDbd4PO1ngNFST8+6C2ghZkO/knKzzEc
|
||||||
|
wyiL/u38hdQMokmfTsBrY7CtYwc+31FG4EDaqVEn31U
|
||||||
|
--- 1z4cOipayh0zYkvasEVEvGreajegE/dqBV7b6E7aFh0
|
||||||
|
<EFBFBD><EFBFBD><EFBFBD><EFBFBD>R<EFBFBD>@<40>/i<>I'<27><><EFBFBD>Nx<4E>r"<1D>`<1E>O<EFBFBD><4F><EFBFBD>y<><79>8<EFBFBD><38> \/<2F><>I<19><17>D<EFBFBD>`<60>ߓ<EFBFBD><DF93><EFBFBD><1E><04>uy<75><79><EFBFBD>:9Lt<4C><1D><><EFBFBD>؋<EFBFBD><D88B><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>AU<41><55><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>`<60>;<3B>q8<71>GLU#<23>i<EFBFBD>y<EFBFBD><79>i<03>ڜ
|
||||||
@@ -1,9 +1,15 @@
|
|||||||
age-encryption.org/v1
|
age-encryption.org/v1
|
||||||
-> ssh-ed25519 HY2yRg xWRxJGWSzA5aplRYCYLB6aBwrUrQQJ2MtDYaD75V5nI
|
-> ssh-ed25519 HY2yRg NYGOSeZn8nGJUqpWoOAA9XO8P7eckUBKXCs8wPs+wlU
|
||||||
J07XF3NQiaYKKKNRcNWi9MloJD2wXHd+2K7bo6lF+QU
|
oLlgaZJVLV9Im1h0vHEKPVApsh46av8ovMgNoFDKle4
|
||||||
-> ssh-ed25519 CAWG4Q jNWymbyCczcm8RcaIEbFQBlOMALsuxTl4+pLUi0aR20
|
-> ssh-ed25519 cK5kHw bC9UlQXeP5LwIFFO9oHXocqojLtSPWE/kWbhCbiSIGg
|
||||||
z5NixlrRD+Y7Z/aFPs6hiDW4/lp8CBQCeJYpbuG9yYM
|
wPGpwKpCcV09jvxVmwj6BTmjm+CZv42sdgCqSfD624Y
|
||||||
-> ssh-ed25519 MSF3dg QsUQloEKN3k1G49FQnNR/Do6ILgGpjFcw3zu5kk1Ako
|
-> ssh-ed25519 CAWG4Q WkJgjedCBn4i4b/VFuU9Wq21VkHxiuwsla+0PuiSiD4
|
||||||
IHwyFWUEWqCStNcFprnpBa8L5J6zKIsn+7HcgGRv3sM
|
/nnfy2DTxQkkfCqzIa+lxgqn6MIgFlN5gZHYYApePvs
|
||||||
--- oUia0fsL6opeYWACyXtHAu/Ld+bUIt/7S1VszYTvwgU
|
-> ssh-ed25519 xA739A 9gcn6j7c7rCR50AetiuCkAnMsSEMtQto///qlTkAWhs
|
||||||
<EFBFBD><EFBFBD>V<EFBFBD><16>*<2A>t<1B>2-<2D>7<><37><EFBFBD><EFBFBD><EFBFBD><EFBFBD>h<EFBFBD>&<26><>͢_!տ+<2B><><EFBFBD><EFBFBD>(<28><0F><11>n<EFBFBD><6E> <09><>(<28><19><>/}<7D><><EFBFBD><EFBFBD>C<EFBFBD>Nͷ|<04>N<>u<EFBFBD>5<EFBFBD>ù勚K<E58B9A><4B>l<EFBFBD>"<22><>klOX<4F>y<EFBFBD><79><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>A<EFBFBD><41>e<><65>$
|
lXrOn+cehZpRkIRzSJ1e64KsCqWf3tKa4ABbYBquvqM
|
||||||
|
-> ssh-ed25519 MSF3dg FzrEytuzBKr+HwpC1bxev3q+6cSZoMMCJdJfuANlHwo
|
||||||
|
qVyt4YpzGfvNX6IqwXs6oRA5aSgidFFxEA22D8XPJBU
|
||||||
|
--- FHVIG8tcNJBte+3VUsR3FsOs8xqrAeboFLxOV/xvSz0
|
||||||
|
_B<5F>M<EFBFBD>j<EFBFBD>H~<7E><>
|
||||||
|
^<5E>^x<>'
|
||||||
|
<EFBFBD>X<1D><0C><>|1d<31>:`r<><0C><><EFBFBD>X<EFBFBD>A<EFBFBD><41>B<EFBFBD>Z<EFBFBD><5A><1C><18><><EFBFBD>|<7C>^<16>E<EFBFBD>{<7B><><EFBFBD>]<5D><>U<EFBFBD><1E><16>Ͼ<EFBFBD><CFBE><EFBFBD><EFBFBD>EЗ<45><D097><EFBFBD>83<38>V<EFBFBD><56><EFBFBD><1E>D<EFBFBD><44><EFBFBD><EFBFBD><EFBFBD>^<5E><>;:
|
||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -2,6 +2,11 @@ let
|
|||||||
keys = import ../keys.nix;
|
keys = import ../keys.nix;
|
||||||
adminsKeys = builtins.attrValues keys.admins;
|
adminsKeys = builtins.attrValues keys.admins;
|
||||||
hut = [ keys.hosts.hut ] ++ adminsKeys;
|
hut = [ keys.hosts.hut ] ++ adminsKeys;
|
||||||
|
fox = [ keys.hosts.fox ] ++ adminsKeys;
|
||||||
|
apex = [ keys.hosts.apex ] ++ adminsKeys;
|
||||||
|
raccoon = [ keys.hosts.raccoon ] ++ adminsKeys;
|
||||||
|
mon = [ keys.hosts.hut keys.hosts.tent ] ++ adminsKeys;
|
||||||
|
tent = [ keys.hosts.tent ] ++ adminsKeys;
|
||||||
# Only expose ceph keys to safe nodes and admins
|
# Only expose ceph keys to safe nodes and admins
|
||||||
safe = keys.hostGroup.safe ++ adminsKeys;
|
safe = keys.hostGroup.safe ++ adminsKeys;
|
||||||
in
|
in
|
||||||
@@ -10,10 +15,20 @@ in
|
|||||||
"gitlab-runner-docker-token.age".publicKeys = hut;
|
"gitlab-runner-docker-token.age".publicKeys = hut;
|
||||||
"gitlab-runner-shell-token.age".publicKeys = hut;
|
"gitlab-runner-shell-token.age".publicKeys = hut;
|
||||||
"gitlab-bsc-docker-token.age".publicKeys = hut;
|
"gitlab-bsc-docker-token.age".publicKeys = hut;
|
||||||
"nix-serve.age".publicKeys = hut;
|
"nix-serve.age".publicKeys = mon;
|
||||||
"jungle-robot-password.age".publicKeys = hut;
|
"jungle-robot-password.age".publicKeys = mon;
|
||||||
"ipmi.yml.age".publicKeys = hut;
|
"ipmi.yml.age".publicKeys = mon;
|
||||||
|
|
||||||
|
"tent-gitlab-runner-pm-docker-token.age".publicKeys = tent;
|
||||||
|
"tent-gitlab-runner-pm-shell-token.age".publicKeys = tent;
|
||||||
|
"tent-gitlab-runner-bsc-docker-token.age".publicKeys = tent;
|
||||||
|
"vpn-dac-login.age".publicKeys = tent;
|
||||||
|
"vpn-dac-client-key.age".publicKeys = tent;
|
||||||
|
|
||||||
"ceph-user.age".publicKeys = safe;
|
"ceph-user.age".publicKeys = safe;
|
||||||
"munge-key.age".publicKeys = safe;
|
"munge-key.age".publicKeys = safe;
|
||||||
|
|
||||||
|
"wg-fox.age".publicKeys = fox;
|
||||||
|
"wg-apex.age".publicKeys = apex;
|
||||||
|
"wg-raccoon.age".publicKeys = raccoon;
|
||||||
}
|
}
|
||||||
|
|||||||
13
secrets/tent-gitlab-runner-bsc-docker-token.age
Normal file
13
secrets/tent-gitlab-runner-bsc-docker-token.age
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
age-encryption.org/v1
|
||||||
|
-> ssh-ed25519 G5LX5w Zhbs+NM/SI49qQ0X8bBpWUWxYM0vUKCXNAnPpIE2NR0
|
||||||
|
CkBUmJ26EkwHztT8Pz0UGq2KZwN0Xz8iYQ9cEHL9OWQ
|
||||||
|
-> ssh-ed25519 cK5kHw 5KjUXJywRDp2A7l5ukTCS+WIAalxwP1f71ejGxwNrX4
|
||||||
|
JW8OLmfkULXo9AwYMGNyOgZ+nQ0MVc0PCM4kKPIo6V4
|
||||||
|
-> ssh-ed25519 CAWG4Q cVjY3R0ZHAfokA4kWlu5vOl2Gs7mdqRgRk4WSUOXAjg
|
||||||
|
IxEDvuximW99EqxmpW+Btpm0Zydmwg/u87bqnl26NYc
|
||||||
|
-> ssh-ed25519 xA739A hmuwZuxmJnuAjmU4X8yhPQ+hPWvN1G+ZS0pvD7fHamg
|
||||||
|
fnAPW6ZCrv5pSO4RQhhr8xz7ij7jAZJk0ApWluOXDng
|
||||||
|
-> ssh-ed25519 MSF3dg SSGLcWnum0Qo/0OnKDZVg9xAZMwGwVNYYmRJXxb4GU0
|
||||||
|
pdl6kATG7n2oMsoUboBfu+vDKurJcH1UvUa70rfMQkE
|
||||||
|
--- a2ZQAeAQlO9DWnegIAq6NpI1Po6f38l+hitZvq+zIW8
|
||||||
|
<EFBFBD>\ֺ"^<5E>DT<44>H<EFBFBD><48>3<EFBFBD><33><EFBFBD>_|.h<0E><><EFBFBD><EFBFBD><03>^<5E>n<14><0E><><EFBFBD><EFBFBD><1A>g<EFBFBD>S<EFBFBD>]_<><5F>?n<>z~2<>!<21>p7<70><37><<3C><14>ʨD?<3F>~<02>F<EFBFBD>$<24>`<60>q+<2B><><EFBFBD>SW<53>(+<2B><>P<EFBFBD>c<1E>u[<5B>m<EFBFBD>`O<>ܛ<EFBFBD>ϖT
|
||||||
13
secrets/tent-gitlab-runner-pm-docker-token.age
Normal file
13
secrets/tent-gitlab-runner-pm-docker-token.age
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
age-encryption.org/v1
|
||||||
|
-> ssh-ed25519 G5LX5w VKM/Y6Wy0gmb2gc4Q00VzHQ4IAxfSyshuDoaAzlEkFM
|
||||||
|
vf18uoEN5ZLJ4HcJg85epaseh1CRL9/ncXtU2HpH+QE
|
||||||
|
-> ssh-ed25519 cK5kHw sMuG07kjlI6VjPjELOUPzkn+KT9Yq7BPf0zSATM2aGI
|
||||||
|
/eODwL8KwyVgFjBK2MJlbqjN7mEvXCSsjq9D96szrng
|
||||||
|
-> ssh-ed25519 CAWG4Q t3/Ty7yCqC5x8KQY4VaHSQ9Q3epqMpXoBDKyKx9+VzE
|
||||||
|
JwgUsqMd+1jFZvFp9/SIoowbhSMVEkKp03T69+OHjho
|
||||||
|
-> ssh-ed25519 xA739A 0ohmKK427+4vupivrtjXp0dDK8wT4XUA9rWgcsCGKgA
|
||||||
|
msbeQyz3pL8RLtAeXX5tsfyHyOXxhfYpqaLEKnRxpPQ
|
||||||
|
-> ssh-ed25519 MSF3dg H+6jAoP7/Dxp8C/7Bk1C4CT1hpkUhtbnTWWIxkO24Ec
|
||||||
|
SrMuUG93T5lUw3xINEen5EEKLXJizIGFhBO1fVroFHE
|
||||||
|
--- tIPnH9cxTV3m3qzvZB97Egz+raWwZJ182BXXKDu8f+o
|
||||||
|
<EFBFBD><EFBFBD>f#<23>,|<7C>Ey.v<>DL<44>Ӻ<05>JPX<50><07><>`<60><><EFBFBD><EFBFBD>-#<23>F<EFBFBD>Ubs<62>(Q!?<3F><1A>#xJG?5<><35><EFBFBD><EFBFBD><EFBFBD>~<7E><>6MA<15>U<><55><EFBFBD>C<01><>M<>$+}W<>NϨG!<21><><EFBFBD><EFBFBD>a<EFBFBD><61><EFBFBD><EFBFBD>%<25>ǽ<EFBFBD>G
|
||||||
13
secrets/tent-gitlab-runner-pm-shell-token.age
Normal file
13
secrets/tent-gitlab-runner-pm-shell-token.age
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
age-encryption.org/v1
|
||||||
|
-> ssh-ed25519 G5LX5w 1KfTmTRP3iSdcclf/FuIpFWpy1tgKs5ED+qSYWo7inY
|
||||||
|
RX6Q1nLFF/yiVLpkWrl0BI0PpLoBi753+y8l/AXjNE4
|
||||||
|
-> ssh-ed25519 cK5kHw TP7+OQpQSNuyArnUo1C97J3P3oB0YtzCEPeVvlzsYHE
|
||||||
|
Bsy5KPNHTVNHnF1sxOvlfJq3CNMVFaXdYkRG2vSj7qM
|
||||||
|
-> ssh-ed25519 CAWG4Q eQyzwNaH6CfaYIjs8abEuQxt6vxRXsGz69UletMUVDE
|
||||||
|
FDcynPO7xg4PWez5Z8gTg5LyE0Wgb3zT9i3Kon67QsU
|
||||||
|
-> ssh-ed25519 xA739A 2JuLai2fUu3dZBydS8cMrLrEUIUkz4NNaiupoBOtTwU
|
||||||
|
sdM3X+XRzysop7yqa76Z7FAwTHOj91STCtZvfIgCdB0
|
||||||
|
-> ssh-ed25519 MSF3dg fSPkiWnpInX1V5p3afPCoPotcGFoWFiOMPThtY927lc
|
||||||
|
8v7E/3l0xA2VWZPXzkN4NmnaA0KJutLMurn/ZXZmhxA
|
||||||
|
--- MQkyBx9hT4ILYXKoZT18PWny1QbDFymcZr63zjMN/qQ
|
||||||
|
-b<>#<23><>M.<16>@<40>t<EFBFBD><74><EFBFBD>ŵ}+ό#@<40><><EFBFBD><EFBFBD><EFBFBD>k<EFBFBD>y<EFBFBD><79><EFBFBD>?v<><76>n<1F><>T<EFBFBD>+<2B><><EFBFBD>[<5B>Q<EFBFBD> gA<67><41><EFBFBD>
|
||||||
BIN
secrets/vpn-dac-client-key.age
Normal file
BIN
secrets/vpn-dac-client-key.age
Normal file
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user