Skip to content

Commit

Permalink
Merge B project as our subdirectory
Browse files Browse the repository at this point in the history
  • Loading branch information
mmarinero committed Dec 27, 2010
2 parents 8c058ae + 6402d8b commit 6572abc
Show file tree
Hide file tree
Showing 5 changed files with 219 additions and 0 deletions.
6 changes: 6 additions & 0 deletions fftSse/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
practica3 : practica3.c dft.o
gcc $(FLAGS) -lm practica3.c dft.o -o practica3
dft.o: dft.asm
nasm $(FLAGS) -f elf dft.asm
clean :
-rm -f dft.o practica3
7 changes: 7 additions & 0 deletions fftSse/check.bash
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/bash
for tamano in `seq 0 20`
do
./practica3 | egrep -o "([0-9]|\.)+$" >>.tiempos.txt
done
./sumatiempo.py
rm .tiempos.txt
64 changes: 64 additions & 0 deletions fftSse/dft.asm
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
%define xr [ebp + 8]
%define xi [ebp + 12]
%define matrizr [ebp + 16]
%define matrizi [ebp + 20]
%define rr [ebp + 24]
%define ri [ebp + 28]
%define n [ebp + 32]
%define n2 [ebp - 4] ;contador del bucle externo
%define i [ebp -8] ;contador rr[i], ri[i], usar n2 como contador ascendente complica la lógica y no mejora el rendimiento


segment .text
global dft

dft:
enter 8, 0
pusha
mov ecx, n
mov dword i, 0
mov dword n2, ecx
shr dword n, 1 ;n/2
mov eax, xr
mov ebx, xi
mov edx, matrizr
mov esi, matrizi
bucle1:
xor edi, edi
xorpd xmm0, xmm0
xorpd xmm1, xmm1
mov ecx, n
bucle2:
movapd xmm2, [eax+edi] ;xr
movapd xmm3, [ebx+edi] ;xi
movapd xmm4, [edx+edi] ;matrizr
movapd xmm5, [esi+edi] ;matrizi
mulpd xmm2, [edx+edi] ;xr * matrizr
mulpd xmm3, [esi+edi] ;xi * matrizi
subpd xmm2, xmm3 ;(xr * matrizr) - (xi * matrizi)
addpd xmm0, xmm2 ;acumulador rr
mulpd xmm5, [eax+edi] ;matrizi * xr
mulpd xmm4, [ebx+edi] ;matrizr * xi
addpd xmm4, xmm5 ;(matrizi * xr) + (matrizr * xi)
addpd xmm1, xmm4 ;acmulador ri
add edi, 16
loop bucle2

shl dword n, 4 ;n * 16 = tam * 8 = tam *sizeof(double)
add edx, n ;incrementar fila
add esi, n ;incrementar fila
shr dword n, 4 ;restaurar n
mov edi, i
mov ecx, rr
haddpd xmm0, xmm1 ;sumar horizontalmente resultados parciales
movlpd [ecx+edi*8], xmm0 ;guardar parte real
mov ecx, ri
movhpd [ecx+edi*8], xmm0 ;guardar parte imaginaria
inc dword i
dec dword n2
jnz bucle1

popa
mov eax, 0
leave
ret
133 changes: 133 additions & 0 deletions fftSse/practica3.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
#include <math.h>
#include <sys/time.h>
#include <stdlib.h>
#include <stdio.h>
#define FALSE 0
#define PI 3.1415926535897932384626433832795029L
#define TRUE 1
#define TAMANO 1024
#define RANDOM 1

int dftc(double *, double *, double *, double *, double *, double *, int);
int dft(double *xr, double *xi, double *matrizr, double *matrizi, double *rr, double *ri, int tam) __attribute__((cdecl));
int matrizdft(double *,double *, int);
int imprime(double *, double *,int , int);

main(int argc, char *argv[]){
//calcula la transformada discreta de fourier
//con instrucciones en c y sse
//el tamano debe ser un numero par
//se permite pasar tamano como primer parametro o ser la constante
int tam = TAMANO;
if (argc > 1) tam = atoi(argv[1]);
double *xr, *xi;
double *matrizr, *matrizi;
double *rr, *ri;
double *rrc, *ric;
//variables medir tiempo
struct timeval tiempo;
struct timezone tz;
unsigned long inicio, fin;
//reservar memoria alineada
posix_memalign((void **)&matrizr, 16, tam*tam*sizeof(double));
posix_memalign((void **)&matrizi, 16, tam*tam*sizeof(double));
posix_memalign((void **)&xr, 16, tam*sizeof(double));
posix_memalign((void **)&xi, 16, tam*sizeof(double));
posix_memalign((void **)&rr, 16, tam*sizeof(double));
posix_memalign((void **)&ri, 16, tam*sizeof(double));
posix_memalign((void **)&rrc, 16, tam*sizeof(double));
posix_memalign((void **)&ric, 16, tam*sizeof(double));

//generar vectores si RANDOM = 1 aleatorios
int i;
gettimeofday(&tiempo, &tz);
srand(tiempo.tv_usec);
for (i = 0; i<tam; i++) {
xr[i] = RANDOM ? rand(): i+1;
xi[i] = RANDOM ? rand(): i+1;
}
//imprime(xr, xi, tam, TRUE); //imprime vector de entrada

matrizdft(matrizr, matrizi, tam);
//imprime(matrizr, matrizi, tam, FALSE); //imprime matriz dft

//medir tiempo inicial
gettimeofday(&tiempo, &tz);
inicio = tiempo.tv_sec * 1000000 + tiempo.tv_usec;
dftc(xr, xi, matrizr, matrizi, rrc, ric, tam);
//medir tiempo final
gettimeofday(&tiempo, &tz);
fin = tiempo.tv_sec * 1000000 + tiempo.tv_usec;
printf("Segundos para el calculo en C: %f\n",
(float) (fin-inicio)/1000000.0);
//imprime(rrc, ric, tam, TRUE); //imprime vector resultado c

//medir tiempo inicial
gettimeofday(&tiempo, &tz);
inicio = tiempo.tv_sec * 1000000 + tiempo.tv_usec;
dft(xr, xi, matrizr, matrizi, rr, ri,tam);
//medir tiempo final
gettimeofday(&tiempo, &tz);
fin = tiempo.tv_sec * 1000000 + tiempo.tv_usec;
printf("Segundos para el calculo con SSE: %f\n",
(float) (fin-inicio)/1000000.0);
//imprime(rr, ri, tam, TRUE); //imprime resultado vector sse

//comprobar diferencia entre c y sse
double errorAbsoluto = 0;
for (i = 0; i<tam; i++) {
rr[i] = fabs(rrc[i]-rr[i]);
ri[i] = fabs(ric[i]-ri[i]);
errorAbsoluto += rr[i] + ri[i];
}
//imprime(rr, ri, tam, TRUE); //imprime vector de diferencias
//printf("\nDiferencia total resultados:\nabs(resultadoC-resultadoSSE) = %e\n", errorAbsoluto);

free(matrizr);
free(matrizi);
free(xr);
free(xi);
free(rr);
free(ri);
free(rrc);
free(ric);
return 0;
}

int dftc(double *xr, double *xi, double *matrizr, double *matrizi, double *rr, double *ri, int n) {
//calcula la transformada discreta de fourier de un vector complejo
int i, j;
double tempr, tempi;
for (i=0; i < n; i++, matrizr+=n, matrizi+=n) {
for (j=0, tempr=0.0, tempi=0.0; j < n; j++){
tempr += *(matrizr+j) * xr[j] - *(matrizi+j) * xi[j];
tempi += *(matrizi+j) * xr[j] + *(matrizr+j) * xi[j];
}
rr[i] = tempr;
ri[i] = tempi;
}
}

int matrizdft(double *matrizr,double *matrizi, int n){
//genera una matriz dft de tamanno n
int i, j;
double pi2divn =-PI * 2 / n;
for (i=0; i<n; i++, matrizr+=n, matrizi+=n) {
for (j=0; j<n; j++){
*(matrizr+j) = cos(pi2divn*i*j);
*(matrizi+j) = sin(pi2divn*i*j);
}
}
}

int imprime(double *matrizr, double *matrizi, int n, int v){
//imprime matrices de tamanno n, si v true imprime vector
int i, j;
for (i=0; i<n; i++, matrizr+=n, matrizi+=n) {
for (j=0; j<n; j++)
printf("% 2.2e % 2.2ei ",*(matrizr+j), *(matrizi+j));
printf("\n");
if (v) break;
}
printf("\n");
}
9 changes: 9 additions & 0 deletions fftSse/sumatiempo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/usr/bin/env python
flag = [1, 0]
sse, c = (0.0, 0.0)
file = open(".tiempos.txt","r")
for line in file.readlines():
c = c + float(line)*flag[0]
sse = sse + float(line)*flag[1]
flag.reverse()
print "t.m C:\t%f s\nt.m SSE\t%f s\nC/SSE\t%f" % (c/20, sse/20, c/sse)

0 comments on commit 6572abc

Please sign in to comment.