Skip to content

Commit 41712f8

Browse files
committed
remove commented-out returned SV code, introduce split_head
1 parent 7ecd9ca commit 41712f8

File tree

2 files changed

+116
-37
lines changed

2 files changed

+116
-37
lines changed

API.xs

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
*/
1010

1111
#define WIN32_LEAN_AND_MEAN
12-
#include <emmintrin.h>
1312
#include <windows.h>
1413
#include <memory.h>
1514
#define PERL_NO_GET_CONTEXT
@@ -18,10 +17,9 @@
1817
#include "perl.h"
1918
#include "XSUB.h"
2019
#define CROAK croak
21-
20+
#include <emmintrin.h>
2221
#include "API.h"
2322

24-
2523
/*
2624
* some Perl macros for backward compatibility
2725
*/
@@ -177,6 +175,9 @@ STATIC void w32sv_setwstr(pTHX_ SV * sv, WCHAR *wstr, INT_PTR wlenparam) {
177175
char flags, short stackunwind, char outType
178176
note the stackunwind is unaligned
179177
*/
178+
179+
#define CTRL_IS_MORE 0x10
180+
#define CTRL_HAS_PROTO 0x20
180181
typedef struct {
181182
union {
182183
struct {
@@ -808,7 +809,11 @@ void
808809
_ImportXS(...)
809810
PREINIT:
810811
char * subname;
812+
#ifdef W32A_SPLITHEAD
813+
XS_EUPXS(XS_Win32__API_ImportCall);
814+
#else
811815
XS_EUPXS(XS_Win32__API_Call);
816+
#endif
812817
#if (PERL_REVISION == 5 && PERL_VERSION < 9)
813818
char* file = __FILE__;
814819
#else
@@ -822,7 +827,11 @@ CODE:
822827
subname = SvPVX(sv); }
823828
{ SV * api = POPs;
824829
PUTBACK;
830+
#ifdef W32A_SPLITHEAD
831+
{ CV * cv = newXS(subname, XS_Win32__API_ImportCall, file);
832+
#else
825833
{ CV * cv = newXS(subname, XS_Win32__API_Call, file);
834+
#endif
826835
XSANY.any_ptr = (APICONTROL *) SvPVX(SvRV(api));
827836
setMgSV(aTHX_ (SV*)cv, api); }}
828837
return;

Call.c

Lines changed: 104 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ mov edi, [ecx+ebx]
2323
Special version of ST() macro whose x parameter is in units of "sizeof(SV *)".
2424
This saves a *4 or *8 on x */
2525
#define W32A_ST(x) *(SV**)((size_t)PL_stack_base+(size_t)(x))
26+
#define IS_CALL sizeof(SV *) // must be SV *, subbed from a SV **
27+
#define NEEDS_POST_CALL_LOOP 0x1
2628

2729
/*all callbacks in Call() that use Call()'s SP (not a dSP SP)
2830
must call SPAGAIN after the ENTER, incase of a earlier callback
@@ -40,6 +42,9 @@ mov edi, [ecx+ebx]
4042
update above /|\
4143
*/
4244

45+
/* SPLIT_HEAD is only for compilers that will optimize to a jmp, not a call,
46+
none on 32 bits known right now*/
47+
#ifndef W32A_SPLITHEAD
4348
XS(XS_Win32__API_Call)
4449
{
4550
WIN32_API_PROFF(QueryPerformanceFrequency(&my_freq));
@@ -59,13 +64,11 @@ XS(XS_Win32__API_Call)
5964
ax_p++;
6065
PERL_UNUSED_VAR(cv); /* -W */
6166
{
62-
{
6367
APIPARAM *params;
6468
const APICONTROL * control;
6569
APIPARAM * param;
66-
//SV * retsv;
70+
size_t param_len;
6771
SV* in_type;
68-
//AV* intypes;
6972

7073
AV* pparray;
7174
SV** ppref;
@@ -77,8 +80,6 @@ XS(XS_Win32__API_Call)
7780
SV ** ax_end;
7881
long_ptr tin;
7982
UCHAR rt_flags;
80-
#define IS_CALL sizeof(SV *) // must be SV *, subbed from a SV **
81-
#define NEEDS_POST_CALL_LOOP 0x1
8283
SV * sentinal;
8384
if(!XSANY.any_ptr){ /* ->Call( */
8485
SV* api;
@@ -94,6 +95,83 @@ XS(XS_Win32__API_Call)
9495
rt_flags = 0;
9596
control = (const APICONTROL *)XSANY.any_ptr;
9697
}
98+
99+
100+
#else //is W32A_SPLITHEAD
101+
STATIC void Call_body(pTHX_ const APICONTROL * const control, UCHAR rt_flags, SV ** ax_p,
102+
SV ** items_sv);
103+
104+
XS(XS_Win32__API_ImportCall)
105+
{
106+
WIN32_API_PROFF(QueryPerformanceFrequency(&my_freq));
107+
WIN32_API_PROFF(W32A_Prof_GT(&start));
108+
{
109+
dVAR;
110+
SV ** ax_p = (SV **)((size_t)(POPMARK)*sizeof(SV *)); /*ax_p = pointer, not the normal ax */
111+
if (PL_markstack_ptr+1 == PL_markstack_max)
112+
markstack_grow();
113+
{
114+
dSP;
115+
EXTEND(SP,CALL_PL_ST_EXTEND);//the one and only EXTEND, all users must
116+
//static assert against the constant
117+
{//compiler can toss some variables that EXTEND used
118+
SV **mark = &(W32A_ST(ax_p));
119+
SV ** items_sv = (size_t)sp - (size_t)mark;
120+
ax_p++;
121+
PERL_UNUSED_VAR(cv); /* -W */
122+
Call_body(aTHX_ (const APICONTROL *)XSANY.any_ptr, 0, ax_p, items_sv);
123+
}
124+
}
125+
}
126+
}
127+
128+
XS(XS_Win32__API_Call)
129+
{
130+
WIN32_API_PROFF(QueryPerformanceFrequency(&my_freq));
131+
WIN32_API_PROFF(W32A_Prof_GT(&start));
132+
{
133+
dVAR;
134+
SV ** ax_p = (SV **)((size_t)(POPMARK)*sizeof(SV *)); /*ax_p = pointer, not the normal ax */
135+
if (PL_markstack_ptr+1 == PL_markstack_max)
136+
markstack_grow();
137+
{
138+
dSP;
139+
EXTEND(SP,CALL_PL_ST_EXTEND);//the one and only EXTEND, all users must
140+
//static assert against the constant
141+
{//compiler can toss some variables that EXTEND used
142+
SV **mark = &(W32A_ST(ax_p));
143+
SV ** items_sv = (size_t)sp - (size_t)mark;
144+
SV* api;
145+
ax_p++;
146+
if (items_sv == 0)
147+
croak_xs_usage(cv, "api, ...");
148+
api = W32A_ST(ax_p);
149+
items_sv--; /* make ST(0)/api obj on Perl Stack disapper */
150+
ax_p++;
151+
Call_body(aTHX_ (APICONTROL *) SvPVX(SvRV(api)), IS_CALL, ax_p, items_sv);
152+
}
153+
}
154+
}
155+
156+
}
157+
158+
STATIC void Call_body(pTHX_ const APICONTROL * const control, UCHAR rt_flags, SV ** ax_p,
159+
SV ** items_sv) {
160+
dVAR;
161+
dSP;
162+
APIPARAM *params;
163+
APIPARAM * param;
164+
size_t param_len;
165+
SV* in_type;
166+
167+
AV* pparray;
168+
SV** ppref;
169+
170+
SV** code;
171+
172+
long_ptr tin;
173+
SV * sentinal;
174+
#endif //#ifndef W32A_SPLITHEAD
97175
{
98176
/* all but -1 are unsigned, so we have ~65K params, not 32K
99177
turn short -1 into int -1, but turn short -2 into unsigned int 65534 */
@@ -104,8 +182,6 @@ XS(XS_Win32__API_Call)
104182
croak("Wrong number of parameters: expected %d, got %d.\n", nin, items);
105183
}
106184
}
107-
//intypes = control->intypes;
108-
109185
if(nin) {
110186
{
111187
SV ** ax_i;
@@ -120,26 +196,28 @@ XS(XS_Win32__API_Call)
120196
Call_asm(), so the ST() slots ARENT always what the caller passed in
121197
*/
122198
params = (APIPARAM *) _alloca(nin * sizeof(APIPARAM));
199+
// SSE copying, unknown if i386 or SSE copying is faster
200+
/* {
201+
__m128i * param_dst = (__m128i *)param;
202+
__m128i * param_src = (__m128i *)&(control->param);
203+
__m128i * params_end = (__m128i *)((size_t)&(control->param)+param_len);
204+
do {
205+
*param_dst = *param_src;
206+
param_src++;
207+
param_dst++;
208+
} while (param_src != params_end);
209+
}*/
123210
{
124211
__m128i * param_dst = params;
125212
__m128i * param_src = &(control->param);
126213
__m128i * params_end = (size_t)&(control->param)+(size_t)(nin * sizeof(APIPARAM));
127214
do {
215+
//todo, make it copy 16 bytes in 1 loop pass, not 8
128216
*param_dst = *param_src;
129217
param_src++;
130218
param_dst++;
131219
} while (param_src != params_end);
132220
}
133-
//{
134-
// __int64 * param_dst = params;
135-
// __int64 * param_src = &(control->param);
136-
// __int64 * params_end = (size_t)&(control->param)+(size_t)(nin * sizeof(APIPARAM));
137-
// do {
138-
// *param_dst = *param_src;
139-
// param_src++;
140-
// param_dst++;
141-
// } while (param_src != params_end);
142-
//}
143221
//memcpy(params, &(control->param), nin * sizeof(APIPARAM));
144222

145223
/* #### FIRST PASS: initialize params #### */
@@ -404,7 +482,7 @@ XS(XS_Win32__API_Call)
404482
WIN32_API_PROFF(W32A_Prof_GT(&Call_asm_after));
405483
/* #### THIRD PASS: postfix pointers/structures #### */
406484
if(rt_flags & NEEDS_POST_CALL_LOOP) {
407-
#ifndef WIN32_API_DEBUG
485+
#ifdef WIN32_API_DEBUG
408486
int i = 0;
409487
#endif
410488
SV ** ax_i;
@@ -420,15 +498,16 @@ XS(XS_Win32__API_Call)
420498
char * sen = SvPVX(sentinal);
421499
char * end = SvEND(sv);
422500
end -= (sizeof(SENTINAL_STRUCT));
501+
//todo replace with inline comparison
423502
if(memcmp(end, sen, sizeof(SENTINAL_STRUCT))){
424503
HV * env = get_hv("ENV", GV_ADD);
425504
SV ** buf_check = hv_fetchs(env, "WIN32_API_SORRY_I_WAS_AN_IDIOT", 0);
426505
if(buf_check && sv_true(*buf_check)) {0;}
427-
else{croak("Win32::API::Call: parameter %d had a buffer overflow", i+1);}
506+
else{croak("Win32::API::Call: parameter %d had a buffer overflow", param->idx1);}
428507
}else{ //remove the sentinal off the buffer
429508
SvCUR_set(sv, SvCUR(sv)-sizeof(SENTINAL_STRUCT));
430509
}
431-
if(control->has_proto && control->is_more){ /* bad VC optimizer && is always a branch */
510+
if(*(char *)&control->whole_bf & (CTRL_IS_MORE|CTRL_HAS_PROTO)){ /* bad VC optimizer && is always a branch */
432511
callPack(aTHX_ control, param, sv, PARAM3_UNPACK);
433512
//pointerCall3Param(aTHX_ control->api, AvARRAY(control->intypes)[i], sv, PARAM3_UNPACK );
434513
}
@@ -477,29 +556,25 @@ XS(XS_Win32__API_Call)
477556
#ifdef WIN32_API_DEBUG
478557
printf("(XS)Win32::API::Call: returning %Id.\n", retval.l);
479558
#endif
480-
//retsv = newSViv(retval.l);
481559
sv_setiv(TARG, retval.l);
482560
break;
483561
case (T_INTEGER|T_FLAG_UNSIGNED):
484562
case (T_NUMBER|T_FLAG_UNSIGNED):
485563
#ifdef WIN32_API_DEBUG
486564
printf("(XS)Win32::API::Call: returning %Iu.\n", retval.l);
487565
#endif
488-
//retsv = newSVuv(retval.l);
489566
sv_setuv(TARG, retval.l);
490567
break;
491568
case T_SHORT:
492569
#ifdef WIN32_API_DEBUG
493570
printf("(XS)Win32::API::Call: returning %hd.\n", retval.l);
494571
#endif
495-
//retsv = newSViv((IV)(short)retval.l);
496572
sv_setiv(TARG, (IV)(short)retval.l);
497573
break;
498574
case (T_SHORT|T_FLAG_UNSIGNED):
499575
#ifdef WIN32_API_DEBUG
500576
printf("(XS)Win32::API::Call: returning %hu.\n", retval.l);
501577
#endif
502-
//retsv = newSVuv((UV)(unsigned short)retval.l);
503578
sv_setuv(TARG, (UV)(unsigned short)retval.l);
504579
break;
505580
#ifdef T_QUAD
@@ -509,7 +584,6 @@ XS(XS_Win32__API_Call)
509584
#ifdef WIN32_API_DEBUG
510585
printf("(XS)Win32::API::Call: returning %I64d.\n", retval.q);
511586
#endif
512-
//retsv = newSVpvn((char *)&retval.q, sizeof(retval.q));
513587
sv_setpvn(TARG, (char *)&retval.q, sizeof(retval.q));
514588
if(control->UseMI64){
515589
SP--; /*remove TARG from PL stack */
@@ -529,14 +603,12 @@ XS(XS_Win32__API_Call)
529603
#ifdef WIN32_API_DEBUG
530604
printf("(XS)Win32::API::Call: returning %I64d.\n", retval.q);
531605
#endif
532-
//retsv = newSViv(retval.q);
533606
sv_setiv(TARG, retval.q);
534607
break;
535608
case (T_QUAD|T_FLAG_UNSIGNED):
536609
#ifdef WIN32_API_DEBUG
537610
printf("(XS)Win32::API::Call: returning %I64d.\n", retval.q);
538611
#endif
539-
//retsv = newSVuv(retval.q);
540612
sv_setiv(TARG, retval.q);
541613
break;
542614
#endif //USEMI64
@@ -545,14 +617,12 @@ XS(XS_Win32__API_Call)
545617
#ifdef WIN32_API_DEBUG
546618
printf("(XS)Win32::API::Call: returning %f.\n", retval.f);
547619
#endif
548-
//retsv = newSVnv((double) retval.f);
549620
sv_setnv(TARG, (double) retval.f);
550621
break;
551622
case T_DOUBLE:
552623
#ifdef WIN32_API_DEBUG
553624
printf("(XS)Win32::API::Call: returning %f.\n", retval.d);
554625
#endif
555-
//retsv = newSVnv(retval.d);
556626
sv_setnv(TARG, retval.d);
557627
break;
558628
case T_POINTER:
@@ -562,7 +632,7 @@ XS(XS_Win32__API_Call)
562632
#endif
563633
RET_PTR_NULL:
564634
if(!control->is_more) sv_setiv(TARG, 0);
565-
else goto return_undef; //undef much clearer
635+
else goto return_undef; //undef much clearer, IV 0 is for back compat reasons
566636
} else {
567637
#ifdef WIN32_API_DEBUG
568638
printf("(XS)Win32::API::Call: returning 0x%x '%s'\n", retval.p, retval.p);
@@ -601,8 +671,7 @@ XS(XS_Win32__API_Call)
601671
printf("(XS)Win32::API::Call: returning UNDEF.\n");
602672
#endif
603673
W32A_ST(ax_p) = &PL_sv_undef;
604-
return;
605-
//goto return_no_mortal;
674+
return; /*dont call SvSETMAGIC or use TARG */
606675
}
607676
//retsv = sv_2mortal(retsv);
608677
//return_no_mortal:
@@ -613,7 +682,7 @@ XS(XS_Win32__API_Call)
613682
WIN32_API_PROFF(W32A_Prof_GT(&return_time2));
614683
///*
615684
WIN32_API_PROFF(printf("freq %I64u start %I64u loopprep %I64u loopstart %I64u Call_asm_b4 %I64u Call_asm_after %I64u rtn_time %I64u rtn_time2\n",
616-
my_freq, /* 12 is bulk88's Core 2 TSC increment unit*/
685+
my_freq, /* 12 is bulk88's Core 2 TSC increment unit, eyes hurt less comparing the numbers*/
617686
(loopprep.QuadPart - start.QuadPart - (return_time2.QuadPart-return_time.QuadPart))/12,
618687
(loopstart.QuadPart - loopprep.QuadPart -(return_time2.QuadPart-return_time.QuadPart))/12,
619688
(Call_asm_b4.QuadPart - loopstart.QuadPart - (return_time2.QuadPart-return_time.QuadPart))/12,
@@ -625,10 +694,11 @@ XS(XS_Win32__API_Call)
625694
return; /* don't use CODE:'s boilerplate */
626695
}//tout scope
627696
}//call_asm scope
697+
#ifndef W32A_SPLITHEAD
628698
}
629699
}
630700
}
631701
}
632-
}
702+
#endif
633703
}
634704
#undef W32AST

0 commit comments

Comments
 (0)