001/* 002 * Copyright 2025 devteam@scivicslab.com 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, 011 * software distributed under the License is distributed on an 012 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 013 * either express or implied. See the License for the 014 * specific language governing permissions and limitations 015 * under the License. 016 */ 017 018package com.scivicslab.actoriac.report.sections.basic; 019 020import com.scivicslab.actoriac.report.SectionBuilder; 021 022import java.sql.Connection; 023import java.sql.PreparedStatement; 024import java.sql.ResultSet; 025import java.sql.SQLException; 026import java.util.LinkedHashMap; 027import java.util.Map; 028import java.util.logging.Logger; 029import java.util.regex.Matcher; 030import java.util.regex.Pattern; 031 032/** 033 * POJO section builder that summarizes GPU information from logs. 034 * 035 * <p>Pure business logic - no {@code CallableByActionName}. 036 * Use {@link GpuSummarySectionIIAR} to expose as an actor.</p> 037 * 038 * <p>Parses GPU information from workflow execution logs, supporting 039 * both NVIDIA (nvidia-smi) and AMD (ROCm) GPU data formats.</p> 040 * 041 * <h2>Output example:</h2> 042 * <pre> 043 * [GPU Summary] 044 * 192.168.5.13, gpu, NVIDIA GeForce RTX 4080 045 * 192.168.5.13, vram, 16GB 046 * 192.168.5.13, driver, 550.54.14 047 * 192.168.5.13, toolkit, CUDA 12.4 048 * 192.168.5.13, arch, 8.9 049 * 192.168.5.14, gpu, NVIDIA GeForce RTX 4080 050 * ... 051 * 052 * Summary: 2 NVIDIA, 1 AMD 053 * </pre> 054 * 055 * @author devteam@scivicslab.com 056 * @since 2.16.0 057 */ 058public class GpuSummarySection implements SectionBuilder { 059 060 private static final Logger logger = Logger.getLogger(GpuSummarySection.class.getName()); 061 062 private Connection connection; 063 private long sessionId = -1; 064 065 /** 066 * Sets the database connection for log queries. 067 * 068 * @param connection the JDBC connection to the H2 log database 069 */ 070 public void setConnection(Connection connection) { 071 this.connection = connection; 072 } 073 074 /** 075 * Sets the session ID to query logs from. 076 * 077 * @param sessionId the session ID 078 */ 079 public void setSessionId(long sessionId) { 080 this.sessionId = sessionId; 081 } 082 083 @Override 084 public String generate() { 085 if (connection == null || sessionId < 0) { 086 logger.warning("GpuSummarySection: connection or sessionId not set"); 087 return ""; 088 } 089 090 try { 091 return buildGpuSummary(); 092 } catch (SQLException e) { 093 logger.warning("GpuSummarySection: SQL error: " + e.getMessage()); 094 return ""; 095 } 096 } 097 098 /** 099 * Build GPU summary from logs. 100 */ 101 private String buildGpuSummary() throws SQLException { 102 String sql = "SELECT actor_name, message FROM logs " + 103 "WHERE session_id = ? AND actor_name IN (" + 104 " SELECT DISTINCT actor_name FROM logs " + 105 " WHERE session_id = ? AND message LIKE '%GPU INFO%'" + 106 ") AND (message LIKE '%GPU INFO%' OR message LIKE '%NVIDIA%' " + 107 "OR message LIKE '%GeForce%' OR message LIKE '%Quadro%' " + 108 "OR message LIKE '%CUDA_VERSION%' OR message LIKE '%Radeon%' " + 109 "OR message LIKE '%AMD_GPU%' OR message LIKE '%ROCM_VERSION%' " + 110 "OR message LIKE '%GFX_ARCH%' OR message LIKE '%GPU_NAME%' " + 111 "OR message LIKE '%AMD%' OR message LIKE '%VGA%') " + 112 "ORDER BY actor_name, timestamp"; 113 114 Map<String, GpuInfo> nodeGpus = new LinkedHashMap<>(); 115 116 try (PreparedStatement ps = connection.prepareStatement(sql)) { 117 ps.setLong(1, sessionId); 118 ps.setLong(2, sessionId); 119 try (ResultSet rs = ps.executeQuery()) { 120 while (rs.next()) { 121 String nodeId = rs.getString("actor_name"); 122 String message = rs.getString("message"); 123 GpuInfo gpuInfo = nodeGpus.computeIfAbsent(nodeId, k -> new GpuInfo()); 124 125 parseGpuMessage(message, gpuInfo); 126 } 127 } 128 } 129 130 if (nodeGpus.isEmpty()) { 131 return ""; // No GPU info, skip this section 132 } 133 134 return formatOutput(nodeGpus); 135 } 136 137 /** 138 * Parse GPU information from a log message. 139 */ 140 private void parseGpuMessage(String message, GpuInfo gpuInfo) { 141 for (String line : message.split("\n")) { 142 String cleanLine = line.replaceFirst("^\\[node-[^\\]]+\\]\\s*", "").trim(); 143 if (cleanLine.contains("GPU INFO") || cleanLine.isEmpty()) continue; 144 145 // Parse NVIDIA CUDA_VERSION line 146 if (cleanLine.startsWith("CUDA_VERSION:")) { 147 gpuInfo.toolkit = "CUDA " + cleanLine.replaceFirst("CUDA_VERSION:\\s*", "").trim(); 148 continue; 149 } 150 151 // Parse AMD ROCm output 152 if (cleanLine.startsWith("AMD_GPU:")) { 153 gpuInfo.isAmd = true; 154 continue; 155 } 156 if (cleanLine.startsWith("GPU_NAME:")) { 157 gpuInfo.name = cleanLine.replaceFirst("GPU_NAME:\\s*", "").trim(); 158 continue; 159 } 160 if (cleanLine.startsWith("VRAM_BYTES:")) { 161 try { 162 long vramBytes = Long.parseLong(cleanLine.replaceFirst("VRAM_BYTES:\\s*", "").trim()); 163 long vramGB = vramBytes / (1024L * 1024L * 1024L); 164 gpuInfo.vram = vramGB + "GB"; 165 } catch (NumberFormatException e) { 166 // ignore 167 } 168 continue; 169 } 170 if (cleanLine.startsWith("DRIVER_VERSION:")) { 171 gpuInfo.driver = cleanLine.replaceFirst("DRIVER_VERSION:\\s*", "").trim(); 172 continue; 173 } 174 if (cleanLine.startsWith("ROCM_VERSION:")) { 175 gpuInfo.toolkit = "ROCm " + cleanLine.replaceFirst("ROCM_VERSION:\\s*", "").trim(); 176 continue; 177 } 178 if (cleanLine.startsWith("GFX_ARCH:")) { 179 gpuInfo.arch = cleanLine.replaceFirst("GFX_ARCH:\\s*", "").trim(); 180 continue; 181 } 182 183 // Parse nvidia-smi CSV output 184 Pattern nvidiaCsvPattern = Pattern.compile( 185 "^(NVIDIA [^,]+|[^,]*GeForce[^,]*|[^,]*Quadro[^,]*|[^,]*Tesla[^,]*|[^,]*A100[^,]*|[^,]*H100[^,]*|[^,]*GB[0-9]+[^,]*),\\s*(?:(\\d+)\\s*MiB|\\[N/A\\]),\\s*([\\d.]+),\\s*([\\d.]+)$" 186 ); 187 Matcher nvidiaMatcher = nvidiaCsvPattern.matcher(cleanLine); 188 if (nvidiaMatcher.find()) { 189 gpuInfo.name = nvidiaMatcher.group(1).trim(); 190 String vramStr = nvidiaMatcher.group(2); 191 if (vramStr != null) { 192 int vramMB = Integer.parseInt(vramStr); 193 gpuInfo.vram = (vramMB >= 1024) ? (vramMB / 1024) + "GB" : vramMB + "MB"; 194 } 195 gpuInfo.driver = nvidiaMatcher.group(3).trim(); 196 gpuInfo.arch = nvidiaMatcher.group(4).trim(); 197 continue; 198 } 199 200 // Parse lspci output for AMD/Intel GPUs (fallback) 201 Pattern lspciPattern = Pattern.compile( 202 "(?:VGA compatible controller|3D controller|Display controller):\\s*(.+?)(?:\\s*\\(rev|$)"); 203 Matcher lspciMatcher = lspciPattern.matcher(cleanLine); 204 if (lspciMatcher.find()) { 205 String gpuName = lspciMatcher.group(1).trim(); 206 if (gpuInfo.name == null) { 207 gpuInfo.name = gpuName; 208 } 209 } 210 } 211 } 212 213 /** 214 * Format the output string. 215 */ 216 private String formatOutput(Map<String, GpuInfo> nodeGpus) { 217 // Count GPU types 218 int nvidiaCount = 0, amdCount = 0, otherCount = 0; 219 for (GpuInfo gpu : nodeGpus.values()) { 220 if (gpu.name != null) { 221 if (gpu.name.contains("NVIDIA") || gpu.name.contains("GeForce") || 222 gpu.name.contains("Quadro") || gpu.name.contains("Tesla")) { 223 nvidiaCount++; 224 } else if (gpu.isAmd || gpu.name.contains("AMD") || gpu.name.contains("Radeon")) { 225 amdCount++; 226 } else { 227 otherCount++; 228 } 229 } 230 } 231 232 StringBuilder sb = new StringBuilder(); 233 sb.append("[GPU Summary]\n"); 234 for (Map.Entry<String, GpuInfo> entry : nodeGpus.entrySet()) { 235 String nodeShort = entry.getKey().replaceFirst("^node-", ""); 236 GpuInfo gpu = entry.getValue(); 237 if (gpu.name != null) { 238 sb.append(String.format("%s, gpu, %s%n", nodeShort, gpu.name)); 239 } 240 if (gpu.vram != null) { 241 sb.append(String.format("%s, vram, %s%n", nodeShort, gpu.vram)); 242 } 243 if (gpu.driver != null) { 244 sb.append(String.format("%s, driver, %s%n", nodeShort, gpu.driver)); 245 } 246 if (gpu.toolkit != null) { 247 sb.append(String.format("%s, toolkit, %s%n", nodeShort, gpu.toolkit)); 248 } 249 if (gpu.arch != null) { 250 sb.append(String.format("%s, arch, %s%n", nodeShort, gpu.arch)); 251 } 252 } 253 254 sb.append("\nSummary: "); 255 boolean first = true; 256 if (nvidiaCount > 0) { 257 sb.append(nvidiaCount).append(" NVIDIA"); 258 first = false; 259 } 260 if (amdCount > 0) { 261 if (!first) sb.append(", "); 262 sb.append(amdCount).append(" AMD"); 263 first = false; 264 } 265 if (otherCount > 0) { 266 if (!first) sb.append(", "); 267 sb.append(otherCount).append(" Other"); 268 } 269 sb.append("\n"); 270 271 return sb.toString(); 272 } 273 274 @Override 275 public String getTitle() { 276 return null; // Title is embedded in content 277 } 278 279 /** 280 * GPU information holder. 281 */ 282 private static class GpuInfo { 283 String name; 284 String vram; 285 String driver; 286 String toolkit; // CUDA x.x or ROCm x.x 287 String arch; // compute cap (NVIDIA) or gfx ID (AMD) 288 boolean isAmd; 289 } 290}