Clean up FFT gravity

This commit is contained in:
Tamás Bálint Misius 2023-01-27 20:04:10 +01:00
parent 85d492bad6
commit aa78a1ee6b
No known key found for this signature in database
GPG Key ID: 5B472A12F6ECA9F2
5 changed files with 102 additions and 133 deletions

View File

@ -3806,10 +3806,10 @@ void Simulation::BeforeSim()
grav->gravity_update_async();
//Get updated buffer pointers for gravity
gravx = grav->gravx;
gravy = grav->gravy;
gravp = grav->gravp;
gravmap = grav->gravmap;
gravx = &grav->gravx[0];
gravy = &grav->gravy[0];
gravp = &grav->gravp[0];
gravmap = &grav->gravmap[0];
}
if(emp_decor>0)
emp_decor -= emp_decor/25+2;
@ -4045,10 +4045,10 @@ Simulation::Simulation():
//Give air sim references to our data
grav->bmap = bmap;
//Gravity sim gives us maps to use
gravx = grav->gravx;
gravy = grav->gravy;
gravp = grav->gravp;
gravmap = grav->gravmap;
gravx = &grav->gravx[0];
gravy = &grav->gravy[0];
gravp = &grav->gravp[0];
gravmap = &grav->gravmap[0];
//Create and attach air simulation
air = new Air(*this);

View File

@ -9,44 +9,30 @@
Gravity::Gravity(CtorTag)
{
// Allocate full size Gravmaps
unsigned int size = NCELL;
th_ogravmap = new float[size];
th_gravmap = new float[size];
th_gravy = new float[size];
th_gravx = new float[size];
th_gravp = new float[size];
gravmap = new float[size];
gravy = new float[size];
gravx = new float[size];
gravp = new float[size];
gravmask = new unsigned[size];
th_ogravmap.resize(NCELL);
th_gravmap.resize(NCELL);
th_gravy.resize(NCELL);
th_gravx.resize(NCELL);
th_gravp.resize(NCELL);
gravmap.resize(NCELL);
gravy.resize(NCELL);
gravx.resize(NCELL);
gravp.resize(NCELL);
gravmask.resize(NCELL);
}
Gravity::~Gravity()
{
stop_grav_async();
delete[] th_ogravmap;
delete[] th_gravmap;
delete[] th_gravy;
delete[] th_gravx;
delete[] th_gravp;
delete[] gravmap;
delete[] gravy;
delete[] gravx;
delete[] gravp;
delete[] gravmask;
}
void Gravity::Clear()
{
int size = NCELL;
std::fill(gravy, gravy + size, 0.0f);
std::fill(gravx, gravx + size, 0.0f);
std::fill(gravp, gravp + size, 0.0f);
std::fill(gravmap, gravmap + size, 0.0f);
std::fill(gravmask, gravmask + size, 0xFFFFFFFF);
std::fill(&gravy[0], &gravy[0] + NCELL, 0.0f);
std::fill(&gravx[0], &gravx[0] + NCELL, 0.0f);
std::fill(&gravp[0], &gravp[0] + NCELL, 0.0f);
std::fill(&gravmap[0], &gravmap[0] + NCELL, 0.0f);
std::fill(&gravmask[0], &gravmask[0] + NCELL, UINT32_C(0xFFFFFFFF));
ignoreNextResult = true;
}
@ -86,8 +72,8 @@ void Gravity::gravity_update_async()
gravcv.notify_one();
}
unsigned int size = NCELL;
membwand(gravy, gravmask, size * sizeof(float), size * sizeof(unsigned));
membwand(gravx, gravmask, size * sizeof(float), size * sizeof(unsigned));
membwand(&gravy[0], &gravmask[0], size * sizeof(float), size * sizeof(uint32_t));
membwand(&gravx[0], &gravmask[0], size * sizeof(float), size * sizeof(uint32_t));
std::fill(&gravmap[0], &gravmap[0] + size, 0.0f);
}
@ -244,8 +230,6 @@ void Gravity::gravity_mask()
unsigned maskvalue;
mask_el *t_mask_el = nullptr;
mask_el *c_mask_el = nullptr;
if (!gravmask)
return;
memset(checkmap, 0, sizeof(checkmap));
for (int x = 0; x < XCELLS; x++)
{

View File

@ -2,19 +2,37 @@
#include "Misc.h"
#include <cstring>
#include <cmath>
#include <complex>
#include <fftw3.h>
constexpr auto xblock2 = XCELLS * 2;
constexpr auto yblock2 = YCELLS * 2;
constexpr auto fft_tsize = (xblock2 / 2 + 1) * yblock2;
//NCELL*4 is size of data array, scaling needed because FFTW calculates an unnormalized DFT
constexpr auto scaleFactor = -float(M_GRAV) / (NCELL * 4);
static_assert(sizeof(std::complex<float>) == sizeof(fftwf_complex));
struct FftwArrayDeleter { void operator ()(float ptr[]) const { fftwf_free(ptr); } };
struct FftwComplexArrayDeleter { void operator ()(std::complex<float> ptr[]) const { fftwf_free(ptr); } };
struct FftwPlanDeleter { void operator ()(fftwf_plan ptr ) const { fftwf_destroy_plan(ptr); } };
using FftwArrayPtr = std::unique_ptr<float [], FftwArrayDeleter >;
using FftwComplexArrayPtr = std::unique_ptr<std::complex<float> [], FftwComplexArrayDeleter>;
using FftwPlanPtr = std::unique_ptr<std::remove_pointer<fftwf_plan>::type, FftwPlanDeleter >;
FftwArrayPtr FftwArray(size_t size)
{
return FftwArrayPtr(reinterpret_cast<float *>(fftwf_malloc(size * sizeof(float))));
}
FftwComplexArrayPtr FftwComplexArray(size_t size)
{
return FftwComplexArrayPtr(reinterpret_cast<std::complex<float> *>(fftwf_malloc(size * sizeof(std::complex<float>))));
}
struct GravityImpl : public Gravity
{
bool grav_fft_status = false;
float *th_ptgravx = nullptr;
float *th_ptgravy = nullptr;
float *th_gravmapbig = nullptr;
float *th_gravxbig = nullptr;
float *th_gravybig = nullptr;
fftwf_complex *th_ptgravxt, *th_ptgravyt, *th_gravmapbigt, *th_gravxbigt, *th_gravybigt;
fftwf_plan plan_gravmap, plan_gravx_inverse, plan_gravy_inverse;
FftwArrayPtr th_gravmapbig , th_gravxbig , th_gravybig ;
FftwComplexArrayPtr th_ptgravxt, th_ptgravyt, th_gravmapbigt, th_gravxbigt, th_gravybigt;
FftwPlanPtr plan_gravmap, plan_gravx_inverse, plan_gravy_inverse;
void grav_fft_init();
void grav_fft_cleanup();
@ -34,34 +52,28 @@ GravityImpl::~GravityImpl()
void GravityImpl::grav_fft_init()
{
int xblock2 = XCELLS*2;
int yblock2 = YCELLS*2;
int fft_tsize = (xblock2/2+1)*yblock2;
float distance, scaleFactor;
fftwf_plan plan_ptgravx, plan_ptgravy;
if (grav_fft_status) return;
FftwPlanPtr plan_ptgravx, plan_ptgravy;
//use fftw malloc function to ensure arrays are aligned, to get better performance
th_ptgravx = reinterpret_cast<float*>(fftwf_malloc(xblock2 * yblock2 * sizeof(float)));
th_ptgravy = reinterpret_cast<float*>(fftwf_malloc(xblock2 * yblock2 * sizeof(float)));
th_ptgravxt = reinterpret_cast<fftwf_complex*>(fftwf_malloc(fft_tsize * sizeof(fftwf_complex)));
th_ptgravyt = reinterpret_cast<fftwf_complex*>(fftwf_malloc(fft_tsize * sizeof(fftwf_complex)));
th_gravmapbig = reinterpret_cast<float*>(fftwf_malloc(xblock2 * yblock2 * sizeof(float)));
th_gravmapbigt = reinterpret_cast<fftwf_complex*>(fftwf_malloc(fft_tsize * sizeof(fftwf_complex)));
th_gravxbig = reinterpret_cast<float*>(fftwf_malloc(xblock2 * yblock2 * sizeof(float)));
th_gravybig = reinterpret_cast<float*>(fftwf_malloc(xblock2 * yblock2 * sizeof(float)));
th_gravxbigt = reinterpret_cast<fftwf_complex*>(fftwf_malloc(fft_tsize * sizeof(fftwf_complex)));
th_gravybigt = reinterpret_cast<fftwf_complex*>(fftwf_malloc(fft_tsize * sizeof(fftwf_complex)));
FftwArrayPtr th_ptgravx = FftwArray(xblock2 * yblock2);
FftwArrayPtr th_ptgravy = FftwArray(xblock2 * yblock2);
th_ptgravxt = FftwComplexArray(fft_tsize);
th_ptgravyt = FftwComplexArray(fft_tsize);
th_gravmapbig = FftwArray(xblock2 * yblock2);
th_gravmapbigt = FftwComplexArray(fft_tsize);
th_gravxbig = FftwArray(xblock2 * yblock2);
th_gravybig = FftwArray(xblock2 * yblock2);
th_gravxbigt = FftwComplexArray(fft_tsize);
th_gravybigt = FftwComplexArray(fft_tsize);
//select best algorithm, could use FFTW_PATIENT or FFTW_EXHAUSTIVE but that increases the time taken to plan, and I don't see much increase in execution speed
plan_ptgravx = fftwf_plan_dft_r2c_2d(yblock2, xblock2, th_ptgravx, th_ptgravxt, FFTW_MEASURE);
plan_ptgravy = fftwf_plan_dft_r2c_2d(yblock2, xblock2, th_ptgravy, th_ptgravyt, FFTW_MEASURE);
plan_gravmap = fftwf_plan_dft_r2c_2d(yblock2, xblock2, th_gravmapbig, th_gravmapbigt, FFTW_MEASURE);
plan_gravx_inverse = fftwf_plan_dft_c2r_2d(yblock2, xblock2, th_gravxbigt, th_gravxbig, FFTW_MEASURE);
plan_gravy_inverse = fftwf_plan_dft_c2r_2d(yblock2, xblock2, th_gravybigt, th_gravybig, FFTW_MEASURE);
plan_ptgravx = FftwPlanPtr(fftwf_plan_dft_r2c_2d(yblock2, xblock2, th_ptgravx.get(), reinterpret_cast<fftwf_complex *>(th_ptgravxt.get()), FFTW_MEASURE));
plan_ptgravy = FftwPlanPtr(fftwf_plan_dft_r2c_2d(yblock2, xblock2, th_ptgravy.get(), reinterpret_cast<fftwf_complex *>(th_ptgravyt.get()), FFTW_MEASURE));
plan_gravmap = FftwPlanPtr(fftwf_plan_dft_r2c_2d(yblock2, xblock2, th_gravmapbig.get(), reinterpret_cast<fftwf_complex *>(th_gravmapbigt.get()), FFTW_MEASURE));
plan_gravx_inverse = FftwPlanPtr(fftwf_plan_dft_c2r_2d(yblock2, xblock2, reinterpret_cast<fftwf_complex *>(th_gravxbigt.get()), th_gravxbig.get(), FFTW_MEASURE));
plan_gravy_inverse = FftwPlanPtr(fftwf_plan_dft_c2r_2d(yblock2, xblock2, reinterpret_cast<fftwf_complex *>(th_gravybigt.get()), th_gravybig.get(), FFTW_MEASURE));
//NCELL*4 is size of data array, scaling needed because FFTW calculates an unnormalized DFT
scaleFactor = -float(M_GRAV)/(NCELL*4);
//calculate velocity map caused by a point mass
for (int y = 0; y < yblock2; y++)
{
@ -69,7 +81,7 @@ void GravityImpl::grav_fft_init()
{
if (x == XCELLS && y == YCELLS)
continue;
distance = hypotf(float(x-XCELLS), float(y-YCELLS));
auto distance = hypotf(float(x-XCELLS), float(y-YCELLS));
th_ptgravx[y * xblock2 + x] = scaleFactor * (x - XCELLS) / powf(distance, 3);
th_ptgravy[y * xblock2 + x] = scaleFactor * (y - YCELLS) / powf(distance, 3);
}
@ -78,15 +90,11 @@ void GravityImpl::grav_fft_init()
th_ptgravy[yblock2 * xblock2 / 2 + xblock2 / 2] = 0.0f;
//transform point mass velocity maps
fftwf_execute(plan_ptgravx);
fftwf_execute(plan_ptgravy);
fftwf_destroy_plan(plan_ptgravx);
fftwf_destroy_plan(plan_ptgravy);
fftwf_free(th_ptgravx);
fftwf_free(th_ptgravy);
fftwf_execute(plan_ptgravx.get());
fftwf_execute(plan_ptgravy.get());
//clear padded gravmap
memset(th_gravmapbig, 0, xblock2 * yblock2 * sizeof(float));
memset(th_gravmapbig.get(), 0, xblock2 * yblock2 * sizeof(float));
grav_fft_status = true;
}
@ -94,17 +102,6 @@ void GravityImpl::grav_fft_init()
void GravityImpl::grav_fft_cleanup()
{
if (!grav_fft_status) return;
fftwf_free(th_ptgravxt);
fftwf_free(th_ptgravyt);
fftwf_free(th_gravmapbig);
fftwf_free(th_gravmapbigt);
fftwf_free(th_gravxbig);
fftwf_free(th_gravybig);
fftwf_free(th_gravxbigt);
fftwf_free(th_gravybigt);
fftwf_destroy_plan(plan_gravmap);
fftwf_destroy_plan(plan_gravx_inverse);
fftwf_destroy_plan(plan_gravy_inverse);
grav_fft_status = false;
}
@ -121,26 +118,23 @@ void Gravity::update_grav()
if (!fftGravity->grav_fft_status)
fftGravity->grav_fft_init();
auto *th_gravmapbig = fftGravity->th_gravmapbig;
auto *th_gravxbig = fftGravity->th_gravxbig;
auto *th_gravybig = fftGravity->th_gravybig;
auto *th_ptgravxt = fftGravity->th_ptgravxt;
auto *th_ptgravyt = fftGravity->th_ptgravyt;
auto *th_gravmapbigt = fftGravity->th_gravmapbigt;
auto *th_gravxbigt = fftGravity->th_gravxbigt;
auto *th_gravybigt = fftGravity->th_gravybigt;
auto *th_gravmapbig = fftGravity->th_gravmapbig.get();
auto *th_gravxbig = fftGravity->th_gravxbig.get();
auto *th_gravybig = fftGravity->th_gravybig.get();
auto *th_ptgravxt = fftGravity->th_ptgravxt.get();
auto *th_ptgravyt = fftGravity->th_ptgravyt.get();
auto *th_gravmapbigt = fftGravity->th_gravmapbigt.get();
auto *th_gravxbigt = fftGravity->th_gravxbigt.get();
auto *th_gravybigt = fftGravity->th_gravybigt.get();
auto &plan_gravmap = fftGravity->plan_gravmap;
auto &plan_gravx_inverse = fftGravity->plan_gravx_inverse;
auto &plan_gravy_inverse = fftGravity->plan_gravy_inverse;
int xblock2 = XCELLS*2, yblock2 = YCELLS*2;
int fft_tsize = (xblock2/2+1)*yblock2;
float mr, mc, pr, pc, gr, gc;
if (memcmp(th_ogravmap, th_gravmap, sizeof(float)*NCELL) != 0)
if (memcmp(&th_ogravmap[0], &th_gravmap[0], sizeof(float) * NCELL) != 0)
{
th_gravchanged = 1;
membwand(th_gravmap, gravmask, NCELL*sizeof(float), NCELL*sizeof(unsigned));
membwand(&th_gravmap[0], &gravmask[0], NCELL * sizeof(float), NCELL * sizeof(uint32_t));
//copy gravmap into padded gravmap array
for (int y = 0; y < YCELLS; y++)
{
@ -150,28 +144,16 @@ void Gravity::update_grav()
}
}
//transform gravmap
fftwf_execute(plan_gravmap);
fftwf_execute(plan_gravmap.get());
//do convolution (multiply the complex numbers)
for (int i = 0; i < fft_tsize; i++)
{
mr = th_gravmapbigt[i][0];
mc = th_gravmapbigt[i][1];
pr = th_ptgravxt[i][0];
pc = th_ptgravxt[i][1];
gr = mr*pr-mc*pc;
gc = mr*pc+mc*pr;
th_gravxbigt[i][0] = gr;
th_gravxbigt[i][1] = gc;
pr = th_ptgravyt[i][0];
pc = th_ptgravyt[i][1];
gr = mr*pr-mc*pc;
gc = mr*pc+mc*pr;
th_gravybigt[i][0] = gr;
th_gravybigt[i][1] = gc;
th_gravxbigt[i] = th_gravmapbigt[i] * th_ptgravxt[i];
th_gravybigt[i] = th_gravmapbigt[i] * th_ptgravyt[i];
}
//inverse transform, and copy from padded arrays into normal velocity maps
fftwf_execute(plan_gravx_inverse);
fftwf_execute(plan_gravy_inverse);
fftwf_execute(plan_gravx_inverse.get());
fftwf_execute(plan_gravy_inverse.get());
for (int y = 0; y < YCELLS; y++)
{
for (int x = 0; x < XCELLS; x++)

View File

@ -6,6 +6,8 @@
#include <mutex>
#include <condition_variable>
#include <memory>
#include <vector>
#include <cstdint>
class Simulation;
@ -15,11 +17,11 @@ protected:
bool enabled = false;
// Maps to be processed by the gravity thread
float *th_ogravmap = nullptr;
float *th_gravmap = nullptr;
float *th_gravx = nullptr;
float *th_gravy = nullptr;
float *th_gravp = nullptr;
std::vector<float> th_ogravmap;
std::vector<float> th_gravmap;
std::vector<float> th_gravx;
std::vector<float> th_gravy;
std::vector<float> th_gravp;
int th_gravchanged = 0;
@ -53,11 +55,12 @@ public:
~Gravity();
//Maps to be used by the main thread
float *gravmap = nullptr;
float *gravp = nullptr;
float *gravy = nullptr;
float *gravx = nullptr;
unsigned *gravmask = nullptr;
std::vector<float> gravmap;
std::vector<float> gravp;
std::vector<float> gravy;
std::vector<float> gravx;
std::vector<uint32_t> gravmask;
static_assert(sizeof(float) == sizeof(uint32_t));
unsigned char (*bmap)[XCELLS];

View File

@ -6,9 +6,9 @@
void Gravity::get_result()
{
memcpy(gravy, th_gravy, NCELL*sizeof(float));
memcpy(gravx, th_gravx, NCELL*sizeof(float));
memcpy(gravp, th_gravp, NCELL*sizeof(float));
memcpy(&gravy[0], &th_gravy[0], NCELL * sizeof(float));
memcpy(&gravx[0], &th_gravx[0], NCELL * sizeof(float));
memcpy(&gravp[0], &th_gravp[0], NCELL * sizeof(float));
}
void Gravity::update_grav(void)