While doing some reverse engineering of ptxas I discovered that their lexer was generated by lex in fast mode (lex -f). Knowing that nvidia trying to hide from us as much as possible it would be good to extract what tokens their lexer able to consume. Surprisingly I was unable to find in google solution for this simple task of tokens recovery. And even worse - seems that nobody understand how 40 year code in lex DFA works. So as usually I had do it by myself
Code
struct yy_trans_info
{
flex_int32_t yy_verify;
flex_int32_t yy_nxt;
};
static const struct yy_trans_info *yy_start_state_list[3] =
{
&yy_transition[1],
&yy_transition[3],
&yy_transition[24],
} ;
if ( ! (yy_start) )
(yy_start) = 1; /* first start state */
while(1) {
yy_current_state = yy_start_state_list[(yy_start)];
yy_match:
{
const struct yy_trans_info *yy_trans_info;
YY_CHAR yy_c;
for ( yy_c = YY_SC_TO_UI(*yy_cp);
(yy_trans_info = &yy_current_state[yy_c])->yy_verify == yy_c;
yy_c = YY_SC_TO_UI(*++yy_cp) )
{
yy_current_state += yy_trans_info->yy_nxt;
if ( yy_current_state[-1].yy_nxt )
{
(yy_last_accepting_state) = yy_current_state;
(yy_last_accepting_cpos) = yy_cp;
}
}
yy_find_action:
yy_act = yy_current_state[-1].yy_nxt;
do_action:
switch ( yy_act )
{ /* beginning of action switch */
case 0: /* must back up */
/* undo the effects of YY_DO_BEFORE_ACTION */
*yy_cp = (yy_hold_char);
yy_cp = (yy_last_accepting_cpos) + 1;
yy_current_state = (yy_last_accepting_state);
goto yy_find_action;
- final actions selected as yy_current_state[-1].yy_nxt
- initial value of yy_current_state is yy_start_state_list[(yy_start)] where yy_start = 1 - this is root. I frankly don't know why table yy_start_state_list has 3 items - perhaps to support so called start conditions
- yy_current_state get new value depending of input character yy_c
- level - depth of recursion to limit size of token
- old - previous value of yy_current_state
- string with collected symbols
- map of visited states - to avoid dead-loops
Complexity
Data to extract
struct yy_trans_info
{
flex_int16_t yy_verify;
flex_int16_t yy_nxt;
};
yy_state_list dq offset yy_transition ; DATA XREF: yy_get_prev_state+4↑o
; yylex:loc_55B15B92DB30↑o
dq offset dword_55B15CE894B8
dq offset unk_55B15CE898C8
db 0
Results
| token index | rule number | token |
|---|---|---|
| 104 | 528 | call |
| 108 | 53 | .ftz/WARP_SZ |
| 109 | 54 | << |
| 10a | 55 | >> |
| 10f | 38 | .extern |
| 110 | 39 | .visible |
| 111 | 40 | .weak |
| 112 | 41 | .common |
| 113 | 490 | .surfref |
| 114 | 22 | .entry |
| 115 | 43 | .FORCE_INLINE |
| 116 | 44 | .proto |
| 117 | 23 | .maxnreg |
| 118 | 24 | .maxntid |
| 119 | 25 | .maxnctapersm |
| 11a | 26 | .minnctapersm |
| 11b | 27 | .reqntid |
| 11c | 34 | .reqnctapercluster |
| 11d | 35 | .explicitcluster |
| 11e | 37 | .maxclusterrank |
| 11f | 36 | .blocksareclusters |
| 12a | 246 | .rand |
| 12d | 63 | .reg |
| 12e | 65 | .const[10] |
| 12f | 66 | .global |
| 130 | 67 | .local |
| 131 | 68 | .param |
| 132 | 73 | .shared |
| 133 | 526 | .tex |
| 137 | 71 | .shared::cta |
| 139 | 69 | .param::entry |
| 13a | 70 | .param::func |
| 13f | 45 | .ptr |
| 14d | 21 | .func |
| 14e | 17 | .align |
| 14f | 18 | .allocno |
| 150 | 19 | .retaddr_allocno |
| 153 | 4 | .version |
| 154 | 5 | .target |
| 155 | 6 | .address_size |
| 156 | 20 | .scratch |
| 157 | 542 | @@DWARF |
| 158 | 7 | .section |
| 159 | 8 | .file |
| 15a | 9 | .loc |
| 15b | 15 | .pragma |
| 15c | 543 | @progbits |
| 15e | 10 | inlined_at |
| 15f | 11 | function_name |
| 16c | 2 | .MACRO |
| 171 | 50 | .branchtargets |
| 172 | 51 | .calltargets |
| 173 | 52 | .callprototype |
| 174 | 46 | .attribute |
| 175 | 47 | .managed |
| 176 | 28 | .noreturn |
| 177 | 29 | .unique |
| 178 | 30 | .local_maxnreg |
| 179 | 42 | .hidden |
| 17a | 31 | .abi_preserve |
| 17d | 48 | .unified |
| 17e | 49 | .reserved |
| 17f | 12 | .metadata_section |
| 180 | 14 | .metadata |
| 181 | 13 | .metadata_index |
| 184 | 487 | .L2::256B |
| 18f | 124 | .mbarrier_init |
| 198 | 207 | .seq |
| 199 | 220 | .4g |
| 19d | 222 | .L2::cache_hint |
| 1a5 | 523 | ::st |
Комментариев нет:
Отправить комментарий