Skip to content

Commit d1d5dcf

Browse files
authored
Merge pull request #231 from fjtrujy/gprof_improvements
Some `gprof` improvements
2 parents 62c71d2 + 8c477e9 commit d1d5dcf

File tree

13 files changed

+424
-45
lines changed

13 files changed

+424
-45
lines changed

configure.ac

+4
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ AC_CHECK_LIB([z], [compress])
3333
AC_C_CONST
3434
AC_C_BIGENDIAN
3535

36+
AC_ARG_WITH(gprofflags, [ --with-gprofflags build with gprof flag included], [], [--with_gprofflags=no])
3637
AC_ARG_WITH(pthread, [ --with-pthread build with pthread support], [], [with_pthread=yes])
3738

3839
# Checks for library functions.
@@ -73,6 +74,9 @@ AC_SUBST(PSPSDK_LIBDIR)
7374

7475
# CFLAGS and CXXFLAGS used to build pspsdk libraries.
7576
PSPSDK_CFLAGS="$CFLAGS -mno-gpopt -Wall -Werror -D_PSP_FW_VERSION=600"
77+
if test "$with_gprofflags" = yes ; then
78+
PSPSDK_CFLAGS="$PSPSDK_CFLAGS -pg -g"
79+
fi
7680
PSPSDK_CXXFLAGS="$PSPSDK_CFLAGS -fno-exceptions -fno-rtti"
7781
if test "$with_pthread" = no ; then
7882
PSPSDK_CFLAGS="$PSPSDK_CFLAGS -DPSP_WITHOUT_PTHREAD"

src/prof/Makefile.am

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ CFLAGS = @PSPSDK_CFLAGS@ -std=gnu99 -Wall -Wmissing-prototypes
1111
CCASFLAGS = $(CFLAGS) -I$(top_srcdir)/src/base -I$(top_srcdir)/src/kernel
1212

1313
libpspprofincludedir = @PSPSDK_INCLUDEDIR@
14-
libpspprofinclude_HEADERS =
14+
libpspprofinclude_HEADERS = pspprof.h
1515

1616
lib_LIBRARIES = libpspprof.a
1717
libpspprof_a_SOURCES = prof.c mcount.s

src/prof/mcount.s

-21
Original file line numberDiff line numberDiff line change
@@ -17,30 +17,12 @@ _mcount:
1717
sd $6, 32($29)
1818
sd $7, 40($29)
1919

20-
# Make sure we're not recursively called when compiling __mcount()
21-
# With -pg
22-
la $4, _busy
23-
lw $5, 0($4)
24-
bnez $5, done
25-
nop
26-
27-
# Mark busy
28-
li $5, 1
29-
sw $5, 0($4)
30-
3120
# Call internal C handler
3221
move $4, $1
3322
move $5, $31
3423
jal __mcount
3524
nop
3625

37-
# Unmark busy
38-
la $4, _busy
39-
li $5, 0
40-
sw $5, 0($4)
41-
42-
done:
43-
4426
# Restore registers
4527
ld $31, 0($29)
4628
ld $1, 8($29)
@@ -52,9 +34,6 @@ _mcount:
5234
j $31
5335
move $31, $1 # restore caller's ra
5436

55-
_busy:
56-
.space 4
57-
5837
.end _mcount
5938

6039
.set reorder

src/prof/prof.c

+63-23
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
#include <stdio.h>
1414
#include <string.h>
1515

16+
#include <pspprof.h>
17+
1618
#define GMON_PROF_ON 0
1719
#define GMON_PROF_BUSY 1
1820
#define GMON_PROF_ERROR 2
@@ -78,8 +80,11 @@ extern int _ftext;
7880
extern int _etext;
7981

8082
/* forward declarations */
83+
__attribute__((__no_instrument_function__, __no_profile_instrument_function__))
8184
void __gprof_cleanup(void);
85+
__attribute__((__no_instrument_function__, __no_profile_instrument_function__))
8286
void __mcount(unsigned int, unsigned int);
87+
__attribute__((__no_instrument_function__, __no_profile_instrument_function__))
8388
static SceUInt timer_handler(SceUID uid, SceKernelSysClock *c1, SceKernelSysClock *c2, void *common);
8489

8590
/** Initializes pg library
@@ -89,6 +94,7 @@ static SceUInt timer_handler(SceUID uid, SceKernelSysClock *c1, SceKernelSysCloc
8994
for sampling statistics. Note that this also installs a timer that
9095
runs at 1000 hert.
9196
*/
97+
__attribute__((__no_instrument_function__, __no_profile_instrument_function__))
9298
static void initialize()
9399
{
94100
initialized = 1;
@@ -122,6 +128,15 @@ static void initialize()
122128
memset((void *)gp.samples, '\0', gp.nsamples * (sizeof(unsigned int )));
123129

124130
gp.timer = sceKernelCreateVTimer("gprof timer", NULL);
131+
if (gp.timer < 0)
132+
{
133+
free(gp.arcs);
134+
free(gp.samples);
135+
gp.arcs = 0;
136+
gp.samples = 0;
137+
gp.state = GMON_PROF_ERROR;
138+
return;
139+
}
125140

126141
SceKernelSysClock sc;
127142
sc.hi = 0;
@@ -151,12 +166,17 @@ static void initialize()
151166
}
152167
}
153168

154-
/** Writes gmon.out dump file and stops profiling
169+
__attribute__((__no_instrument_function__, __no_profile_instrument_function__))
170+
void gprof_start(void) {
171+
// There is already a profiling session running, let's stop it and ignore the result
172+
if (gp.state == GMON_PROF_ON) {
173+
gprof_stop(NULL, 0);
174+
}
175+
initialize();
176+
}
155177

156-
Called from atexit() handler; will dump out a host:gmon.out file
157-
with all collected information.
158-
*/
159-
void __gprof_cleanup()
178+
__attribute__((__no_instrument_function__, __no_profile_instrument_function__))
179+
void gprof_stop(const char* filename, int should_dump)
160180
{
161181
FILE *fp;
162182
int i;
@@ -171,29 +191,47 @@ void __gprof_cleanup()
171191
/* disable profiling before we make plenty of libc calls */
172192
gp.state = GMON_PROF_OFF;
173193

194+
// Delete timer
174195
sceKernelStopVTimer(gp.timer);
175-
176-
fp = fopen("gmon.out", "wb");
177-
hdr.lpc = gp.lowpc;
178-
hdr.hpc = gp.highpc;
179-
hdr.ncnt = sizeof(hdr) + (sizeof(unsigned int) * gp.nsamples);
180-
hdr.version = GMONVERSION;
181-
hdr.profrate = SAMPLE_FREQ;
182-
hdr.resv[0] = 0;
183-
hdr.resv[1] = 0;
184-
hdr.resv[2] = 0;
185-
fwrite(&hdr, 1, sizeof(hdr), fp);
186-
fwrite(gp.samples, gp.nsamples, sizeof(unsigned int), fp);
187-
188-
for (i=0; i<gp.narcs; i++)
189-
{
190-
if (gp.arcs[i].count > 0)
196+
sceKernelDeleteVTimer(gp.timer);
197+
198+
if (should_dump) {
199+
fp = fopen(filename, "wb");
200+
hdr.lpc = gp.lowpc;
201+
hdr.hpc = gp.highpc;
202+
hdr.ncnt = sizeof(hdr) + (sizeof(unsigned int) * gp.nsamples);
203+
hdr.version = GMONVERSION;
204+
hdr.profrate = SAMPLE_FREQ;
205+
hdr.resv[0] = 0;
206+
hdr.resv[1] = 0;
207+
hdr.resv[2] = 0;
208+
fwrite(&hdr, 1, sizeof(hdr), fp);
209+
fwrite(gp.samples, gp.nsamples, sizeof(unsigned int), fp);
210+
211+
for (i=0; i<gp.narcs; i++)
191212
{
192-
fwrite(gp.arcs + i, sizeof(struct rawarc), 1, fp);
213+
if (gp.arcs[i].count > 0)
214+
{
215+
fwrite(gp.arcs + i, sizeof(struct rawarc), 1, fp);
216+
}
193217
}
218+
219+
fclose(fp);
194220
}
195221

196-
fclose(fp);
222+
// Free memory
223+
free(gp.arcs);
224+
free(gp.samples);
225+
}
226+
227+
/** Writes gmon.out dump file and stops profiling
228+
Called from atexit() handler; will dump out a gmon.out file
229+
at cwd with all collected information.
230+
*/
231+
__attribute__((__no_instrument_function__, __no_profile_instrument_function__))
232+
void __gprof_cleanup()
233+
{
234+
gprof_stop("gmon.out", 1);
197235
}
198236

199237
/** Internal C handler for _mcount()
@@ -205,6 +243,7 @@ void __gprof_cleanup()
205243
beginning of each compiled routine, which eventually brings the
206244
control to here.
207245
*/
246+
__attribute__((__no_instrument_function__, __no_profile_instrument_function__))
208247
void __mcount(unsigned int frompc, unsigned int selfpc)
209248
{
210249
int e;
@@ -238,6 +277,7 @@ void __mcount(unsigned int frompc, unsigned int selfpc)
238277

239278
/** Internal timer handler
240279
*/
280+
__attribute__((__no_instrument_function__, __no_profile_instrument_function__))
241281
static SceUInt timer_handler(SceUID uid, SceKernelSysClock *requested, SceKernelSysClock *actual, void *common)
242282
{
243283
unsigned int frompc = gp.pc;

src/prof/pspprof.h

+41
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
/*
2+
* PSP Software Development Kit - https://github.com/pspdev
3+
* -----------------------------------------------------------------------
4+
* Licensed under the BSD license, see LICENSE in PSPSDK root for details.
5+
*
6+
* pspprof.h - Prototypes for the profiler library
7+
*
8+
* Copyright (c) 2006 Urchin
9+
*
10+
*/
11+
#ifndef __PSPPROF_H__
12+
#define __PSPPROF_H__
13+
14+
#ifdef __cplusplus
15+
extern "C" {
16+
#endif
17+
18+
19+
/**
20+
* Start the profiler.
21+
* If the profiler is already running, this function stop previous one,
22+
* and ignore the result.
23+
* Finally, it initializes a new profiler session.
24+
*/
25+
__attribute__((__no_instrument_function__, __no_profile_instrument_function__))
26+
void gprof_start(void);
27+
/**
28+
* Stop the profiler.
29+
* If the profiler is not running, this function does nothing.
30+
* @param filename The name of the file to write the profiling data to.
31+
* @param should_dump If 1, the profiling data will be written to the file.
32+
* If 0, the profiling data will be discarded.
33+
*/
34+
__attribute__((__no_instrument_function__, __no_profile_instrument_function__))
35+
void gprof_stop(const char* filename, int should_dump);
36+
37+
#ifdef __cplusplus
38+
}
39+
#endif
40+
41+
#endif /* __PSPPROF_H__ */

src/samples/Makefile.am

+2
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ SAMPLES = \
1212
debug/debugkb \
1313
debug/sio \
1414
debug/gdb \
15+
gprof/basic \
16+
gprof/custom \
1517
gu/beginobject \
1618
gu/blend \
1719
gu/blit \

src/samples/Makefile.samples

+2
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ SAMPLES = \
99
debug/debugkb \
1010
debug/sio \
1111
debug/gdb \
12+
gprof/basic \
13+
gprof/custom \
1214
gu/beginobject \
1315
gu/blend \
1416
gu/blit \
+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
TARGET = gprofbasic
2+
OBJS = main.o
3+
4+
INCDIR =
5+
CFLAGS = -O2 -Wall -pg -g
6+
CXXFLAGS = $(CFLAGS) -fno-exceptions -fno-rtti
7+
ASFLAGS = $(CFLAGS)
8+
9+
LIBDIR =
10+
LDFLAGS = -pg -g
11+
12+
EXTRA_TARGETS = EBOOT.PBP
13+
PSP_EBOOT_TITLE = GProf Basic Example
14+
15+
PSPSDK=$(shell psp-config --pspsdk-path)
16+
include $(PSPSDK)/lib/build.mak

src/samples/gprof/basic/README.md

+66
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
Sample program to show how to use the `gprof` feature.
2+
3+
The requiremnts are quite easy, just adding `-g -pg` flags to the `CFLAGS` and `LDFLAGS` is enough to make things to work out of the box.
4+
5+
Firstly execute your program, then once program ends it will automatically generates a `gmon.out` file at CWD level.
6+
7+
In order to inspect the content of the generated file you need to use the `psp-gprof` binary.
8+
9+
For instance, following the next syntax:
10+
```
11+
psp-gprof -b {binary.elf} gmon.out
12+
```
13+
14+
like:
15+
```
16+
psp-gprof -b gprofbasic.elf gmon.out
17+
```
18+
19+
20+
It will show something like:
21+
```
22+
Flat profile:
23+
24+
Each sample counts as 0.001 seconds.
25+
% cumulative self self total
26+
time seconds seconds calls ms/call ms/call name
27+
95.98 0.17 0.17 104728 0.00 0.00 is_prime
28+
4.02 0.17 0.01 1 7.00 7.00 dummy_function
29+
0.00 0.17 0.00 1 0.00 174.00 main
30+
0.00 0.17 0.00 1 0.00 167.00 sum_of_square_roots
31+
32+
33+
Call graph
34+
35+
36+
granularity: each sample hit covers 2 byte(s) for 0.57% of 0.17 seconds
37+
38+
index % time self children called name
39+
0.00 0.17 1/1 _main [2]
40+
[1] 100.0 0.00 0.17 1 main [1]
41+
0.00 0.17 1/1 sum_of_square_roots [4]
42+
0.01 0.00 1/1 dummy_function [5]
43+
-----------------------------------------------
44+
<spontaneous>
45+
[2] 100.0 0.00 0.17 _main [2]
46+
0.00 0.17 1/1 main [1]
47+
-----------------------------------------------
48+
0.17 0.00 104728/104728 sum_of_square_roots [4]
49+
[3] 96.0 0.17 0.00 104728 is_prime [3]
50+
-----------------------------------------------
51+
0.00 0.17 1/1 main [1]
52+
[4] 96.0 0.00 0.17 1 sum_of_square_roots [4]
53+
0.17 0.00 104728/104728 is_prime [3]
54+
-----------------------------------------------
55+
0.01 0.00 1/1 main [1]
56+
[5] 4.0 0.01 0.00 1 dummy_function [5]
57+
-----------------------------------------------
58+
59+
60+
Index by function name
61+
62+
[5] dummy_function [1] main
63+
[3] is_prime [4] sum_of_square_roots
64+
```
65+
66+
Cheers

0 commit comments

Comments
 (0)