Add SOR solver

2023-06-23 17:20:15 +02:00 · 2023-06-23 17:20:15 +02:00 · 3a201d35ca
commit 3a201d35ca
parent d8185c0d0c
2 changed files with 66 additions and 9 deletions
--- a/src/heat/common/heat.h
+++ b/src/heat/common/heat.h
@ -53,6 +53,6 @@ double getTime(void);
 const char *summary(void);
 double solve(HeatConfiguration *conf, int64_t rows, int64_t cols, int timesteps, void *extraData);
 void computeBlock(const int64_t rows, const int64_t cols, const int rstart, const int rend, const int cstart, const int cend, double M[rows][cols]);
-double computeBlockResidual(const int64_t rows, const int64_t cols, const int rstart, const int rend, const int cstart, const int cend, double M[rows][cols]);
+void computeBlockResidual(const int64_t rows, const int64_t cols, const int rstart, const int rend, const int cstart, const int cend, double M[rows][cols], double relax, double *residual, double *max_elem);
 #endif // HEAT_H
--- a/src/heat/common/kernel.c
+++ b/src/heat/common/kernel.c
@ -1,4 +1,5 @@
 #include <stdint.h>
 #include <stdio.h>
 #include "common/heat.h"
 #ifndef SIMD
@ -19,6 +20,7 @@ void computeBlock(const int64_t rows, const int64_t cols,
 		const int cstart, const int cend,
 		double M[rows][cols])
 {
 	(void) cend;
 	// Assuming square blocks
 	const int bs = rend-rstart+1;
 	for (int k = 0; k < bs; ++k) {
@ -40,19 +42,74 @@ void computeBlock(const int64_t rows, const int64_t cols,
 }
 #endif
-double computeBlockResidual(const int64_t rows, const int64_t cols,
+
 #if 1
 void computeBlockResidual(const int64_t rows, const int64_t cols,
 		const int rstart, const int rend,
 		const int cstart, const int cend,
-		double M[rows][cols])
+		double M[rows][cols], double relax,
 		double *residual, double *max_elem)
 {
-	double sum = 0.0;
+	//double relax = 1.95;
 	for (int r = rstart; r <= rend; ++r) {
 		for (int c = cstart; c <= cend; ++c) {
-			const double value = 0.25*(M[r-1][c] + M[r+1][c] + M[r][c-1] + M[r][c+1]);
+			double old = M[r][c];
-			const double diff = value - M[r][c];
+			double fdiff = 0.25*(M[r-1][c] + M[r+1][c] + M[r][c-1] + M[r][c+1]);
-			sum += diff*diff;
+			double new = (1 - relax) * old + relax * fdiff;
-			M[r][c] = value;
+			double diff = new - old;
 			/* Use the largest absolute error as residual */
 			*residual = fmax(*residual, fabs(diff));
 			*max_elem = fmax(*max_elem, fabs(new));
 			//fprintf(stderr, "residual = %e in (%4d, %4d)\n", residual, r, c);
 			M[r][c] = new;
 		}
 	}
 	return sum;
 }
 #else
 /* Red-black parallelization */
 void computeBlockResidual(const int64_t rows, const int64_t cols,
 		const int rstart, const int rend,
 		const int cstart, const int cend,
 		double M[rows][cols], double relax,
 		double * restrict residual, double * restrict max_elem)
 {
 	(void)(residual);
 	(void)(max_elem);
 	const double A = 1 - relax;
 	const double B = 0.25 * relax;
 	//double relax = 1.95;
 	for (int r = rstart; r <= rend; r += 2) {
 		#pragma clang loop vectorize(enable)
 		for (int c = cstart; c <= cend; c += 2) {
 			double old = M[r][c];
 			double new = A * old + B * (M[r-1][c] + M[r+1][c] + M[r][c-1] + M[r][c+1]);
 			//double diff = new - old;
 			/* Use the largest absolute error as residual */
 			//*residual = fmax(*residual, fabs(diff));
 			//*max_elem = fmax(*max_elem, fabs(new));
 			//fprintf(stderr, "residual = %e in (%4d, %4d)\n", residual, r, c);
 			M[r][c] = new;
 		}
 	}
 	for (int r = rstart+1; r <= rend; r += 2) {
 		#pragma clang loop vectorize(enable)
 		for (int c = cstart+1; c <= cend; c += 2) {
 			double old = M[r][c];
 			double new = A * old + B * (M[r-1][c] + M[r+1][c] + M[r][c-1] + M[r][c+1]);
 			//double diff = new - old;
 			/* Use the largest absolute error as residual */
 			//*residual = fmax(*residual, fabs(diff));
 			//*max_elem = fmax(*max_elem, fabs(new));
 			//fprintf(stderr, "residual = %e in (%4d, %4d)\n", residual, r, c);
 			M[r][c] = new;
 		}
 	}
 }
 #endif