Entry by tsh73

Entry for Rod's Variables (2105) challenge

It's got too big to put it on a forum. Actually, I tried to do a tokenizer so it figures out all things including numbers and strings. But after figuring things out it outputs only things that relevant to this task (variables, subs/user functions, labels).
Uses keyword list etc from Rod's entry.
Tested on biggest program I've ever seen in LB - freeform404.bas ;)

'variable challenge
'tsh73, Jan 2015
 
global t.maxTokens, t.name, t.type, tp.line, tp.stmnt, tp.num
global curTokenNum, curTokNum, nLines, curStmntNum, contLine
 
 
[nonUsedLabel] dummy=1:dummy2=dummy 'non-used line with a (:)
unDimmed(3)=1   'example of un-dimmed array
 
gosub [setLists]
 
'fname$="vars01.bas"
'fname$="vars02.bas"
'fname$="test.bas"
filedialog "Select file to process";chr$(0);"open", "*.bas", fname$
if fname$="" then print "No file selected - bye": end
print "Processing "; fname$
 
t.maxTokens=100000
t.name = 0
t.type = 1
dim token$(t.maxTokens, 1)
tp.line=0
tp.stmnt=1
tp.num=2
dim tokenPos(t.maxTokens, 2)
curTokenNum=0   'global
curTokNum=0     'in a statement (that is between (:))
 
verbose = 0 '1
 
open fname$ for input as #1
nLines=1
isContinuation=0
print "Reading parsing line by line..."
t0=time$("ms")
while not(eof(#1))
    nLines=nLines+1
    line input #1, aLine$
    if verbose then print ">"; aLine$
    curStmntNum=1
    curTokNum=0
 
    if isContinuation then
        if contLine=0 then contLine = nLines-1
        isContinuation=0
    else
        contLine=0
    end if
 
    while 1
        scan
        'reading a line splitting it to tokens
        '1) skip all starting spaces
        aLine$=remStartSpaces$(aLine$)
        'exit if nothing left
        if aLine$="" then exit while
        'print ":"; aLine$
        '2) check special sequences
        ''comment
        if left$(aLine$,1)="'" then 'skip line as comment
            if verbose then print "   comment skipped"
            exit while
        end if
        '3) recognize / read / cut token
        select case
        '3.1) check special sequences
            '[label]
        case left$(aLine$,1)="["
            label$=upto$(aLine$, "]")+"]"
            aLine$=after$(aLine$, "]")
            if verbose then print "   label: ";label$
            call storeToken label$, "lbl"
        '3.2) check special sequences
            '"string"
        case left$(aLine$,1)=qq$  'string
            aLine$=mid$(aLine$,2)   'cut left (")
            aString$=qq$+upto$(aLine$,qq$)+qq$
            aLine$=after$(aLine$, qq$)
            if verbose then print "   string: "; aString$
            call storeToken aString$, "str"
 
        case left$(aLine$,1)="_" and instr(varChars$, mid$(aLine$,2,1))<>0 'winConst
            token$=left$(aLine$,1)
            aLine$=mid$(aLine$,2)
            while instr(varChars$;"_", left$(aLine$,1))<>0
                token$=token$+left$(aLine$,1)
                aLine$=mid$(aLine$,2)
                if aLine$="" then exit while
            wend
            if verbose then print "   Windows constant: ";token$
            call storeToken token$, "winConst"
 
        case left$(aLine$,1)="_"
            aLine$=mid$(aLine$,2)   'cut token
            if verbose then print "   continuation char"
            'call storeToken "_", "_" 'do not store?
             isContinuation=1
 
        case left$(aLine$,1)=":"
            aLine$=mid$(aLine$,2)   'cut token
            if verbose then print "   operator separator"
            call storeToken ":", ":"
            curStmntNum=curStmntNum+1
            curTokNum=0
 
        case left$(aLine$,1)="|"
            aLine$=mid$(aLine$,2)   'cut token
            if verbose then print "   menu separator"
            call storeToken "|", "|"
 
        case left$(aLine$,1)=","
            aLine$=mid$(aLine$,2)   'cut token
            if verbose then print "   parameter separator"
            call storeToken ",", ","
 
        case left$(aLine$,1)=";"
            aLine$=mid$(aLine$,2)   'cut token
            if verbose then print "   concatenation"
            call storeToken ";", ";"
 
        case left$(aLine$,1)="="
            aLine$=mid$(aLine$,2)   'cut token
            if verbose then print "   assignment or equal"
            call storeToken "=", "="
 
        case left$(aLine$,1)="("
            aLine$=mid$(aLine$,2)   'cut token
            if verbose then print "   opening ("
            call storeToken "(", "("
        case left$(aLine$,1)=")"
            aLine$=mid$(aLine$,2)   'cut token
            if verbose then print "   closing )"
            call storeToken ")", ")"
 
        case left$(aLine$,1)="#"  'handle
            token$=left$(aLine$,1)
            aLine$=mid$(aLine$,2)
            while instr(varChars$, left$(aLine$,1))<>0
                token$=token$+left$(aLine$,1)
                aLine$=mid$(aLine$,2)
                if aLine$="" then exit while
            wend
            if verbose then print "   handle: ";token$
            call storeToken token$, "hndl"
 
        case instr(firstVarChars$, left$(aLine$,1))<>0  'name (var, arr, sub, func) or keyword
            token$=left$(aLine$,1)
            aLine$=mid$(aLine$,2)
            while instr(varChars$, left$(aLine$,1))<>0 _
                or (left$(aLine$,1)="_" and instr(varChars$, mid$(aLine$,2,1))<>0 )
                token$=token$+left$(aLine$,1)
                aLine$=mid$(aLine$,2)
                if aLine$="" then exit while
            wend
            select case
            case instr(comlist$;typlist$;opelist$, " ";lower$(token$);" ")<>0
                if verbose then print "   keyword: ";token$
                call storeToken token$, "kwrd"
                'if REM
                if lower$(token$) = "rem" then 'comment
                    if verbose then print "   comment skipped"
                    exit while
                end if
 
            case else
                if verbose then print "name (var, array, sub, or func): ";token$
                call storeToken token$, "name"
            end select
 
        case instr(firstNumChars$, left$(aLine$,1))<>0 'number?
        notANumber=0
        select case
            case instr(digits$, left$(aLine$,1))<>0
                token$=left$(aLine$,1)
                aLine$=mid$(aLine$,2)
            case instr("-.", left$(aLine$,1))<>0 and IsNumber(left$(aLine$,2))<>0
                token$=left$(aLine$,2)
                aLine$=mid$(aLine$,3)
            case left$(aLine$,2)="-." and IsNumber(left$(aLine$,3))<>0
                token$=left$(aLine$,3)
                aLine$=mid$(aLine$,4)
            case else
                notANumber=1
            end select
            if notANumber=0 then
            'read rest of a number
                while IsNumber(token$)<>0
                    token$=token$+left$(aLine$,1)
                    aLine$=mid$(aLine$,2)
                    if aLine$="" then exit while
                wend
                if IsNumber(token$)=0 then
                    'one char extra
                    aLine$=right$(token$, 1)+aLine$
                    token$=left$(token$, len(token$)-1)
                end if
                if verbose then print "number: ";token$
                call storeToken token$, "num"
            else    'should be single "-" (or not compiles)
                token$=left$(aLine$,1)
                aLine$=mid$(aLine$,2)
                if verbose then print "   operator ";token$
                call storeToken token$, "op"
            end if
 
        'should be moved after "numbers" so "-1" does not process as "-" "1"
        case instr("<>+-*/^", left$(aLine$,1))<>0
            token$=left$(aLine$,1)
            aLine$=mid$(aLine$,2)   'cut token
            if verbose then print "   operator ";token$
            call storeToken token$, "op"
 
        case else
                token$=left$(aLine$,1)
                aLine$=mid$(aLine$,2)
                if verbose then print "??char: ";token$
                call storeToken token$, "???"
        end select
    wend
wend
close #1
print "----------------"
print "nLines=",nLines
print "numTokens=";curTokenNum
 
print time$("ms")-t0;" ms"
t0=time$("ms")
 
goto [SkipPrint]
for i = 1 to curTokenNum
    print tokenPos(i, tp.line);" ";_
      tokenPos(i, tp.stmnt);" ";_
      tokenPos(i, tp.num),_
      token$(i, t.type),_
      token$(i, t.name)
next
[SkipPrint]
 
'second pass along token$ array, check for next "(" - to tell functions/arrays
Print "second pass along token$ array..."
i=0
while i <= curTokenNum
    i=i+1
    if token$(i, t.type)="name" then
        if i+1<=curTokenNum then
            if token$(i+1, t.type)="(" and _
                tokenPos(i, tp.line) = tokenPos(i+1, tp.line) and _
                tokenPos(i, tp.stmnt) = tokenPos(i+1, tp.stmnt) then
                    if instr(funlist$, " ";lower$(token$(i, t.name));" ")<>0 then
                        token$(i, t.type)="buildInFunc"
                    else
                        token$(i, t.type)="UDForArray"
                    end if
            end if
        end if
    end if
wend
print time$("ms")-t0;" ms"
t0=time$("ms")
 
'third pass, check for functions/subs/arrays
Print "third pass along token$ array..."
i=0
while i <= curTokenNum
    i=i+1
    if tokenPos(i, tp.num) = 1 then     'first token on a stmnt
        select case
        case lower$(token$(i, t.name)) ="dim" or  lower$(token$(i, t.name)) ="redim"
            curLine = tokenPos(i, tp.line)
            curStmnt = tokenPos(i, tp.stmnt)
            while i+1<=curTokenNum
                if curLine <> tokenPos(i+1, tp.line) _
                    or curStmnt <> tokenPos(i+1, tp.stmnt) then exit while
                i=i+1
                if token$(i, t.type)="UDForArray" then token$(i, t.type)="dimmedArray"
                'mark all other instances
                for j = 1 to curTokenNum
                    if token$(j, t.name)=token$(i, t.name) and _
                        token$(j, t.type)="UDForArray" then token$(j, t.type)="dimmedArray"
                next
            wend
 
        case  lower$(token$(i, t.name)) ="sub" or  lower$(token$(i, t.name)) ="call"
            i=i+1
            token$(i, t.type)="sub"
 
        case  lower$(token$(i, t.name)) ="function"
            i=i+1
            token$(i, t.type)="UDF"
            'mark all other instances
            for j = 1 to curTokenNum
                if token$(j, t.name)=token$(i, t.name) and _
                    token$(j, t.type)="UDForArray" then token$(j, t.type)="UDF"
            next
        end select
    end if
wend
print time$("ms")-t0;" ms"
t0=time$("ms")
 
'all other "UDForArray" are undimmed arrays
Print "last pass along token$ array..."
for j = 1 to curTokenNum
    if token$(j, t.type)="UDForArray" then token$(j, t.type)="unDimmedArray"
next
 
goto [SkipPrint2]
print "----------------"
for i = 1 to curTokenNum
    print tokenPos(i, tp.line);" ";_
      tokenPos(i, tp.stmnt);" ";_
      tokenPos(i, tp.num),_
      token$(i, t.type),_
      token$(i, t.name)
next
 
print "----------------"
 
[SkipPrint2]
 
print time$("ms")-t0;" ms"
t0=time$("ms")
 
'count variables
Print "Counting ..."
 
un.typeName=0
un.count=1
dim uniqueName$(curTokenNum,1)
 
aIndex$ = ""    'aIndex would be in a form ######|word|, where ###### index in a()
    'You can guess ###### restricts max len to 999999
aLen=0
'POSSIBLE TYPES
'(    )    ,    :    ;    ???    =    buildInFunc    dimmedArray    hndl    kwrd    lbl    name    num    op    str    sub    UDF    unDimmedArray
types2Skip$="(    )    ,    :    ;    =   |   num    op    str    buildInFunc   kwrd "
for i = 1 to curTokenNum
    w$=token$(i, t.type);":";token$(i, t.name)
    if instr(types2Skip$, token$(i, t.type))<>0 then [skipToken]
    toFind$="|"+w$+"|"
    pos=instr(aIndex$, toFind$)
    if pos =0 then
        'add it in array
        aLen = aLen+1
        uniqueName$(aLen, un.typeName)=w$
        uniqueName$(aLen, un.count)="1" 'first time
        aIndex$=aIndex$+using("######",aLen)+toFind$
    else
        'get index for a word. FAST.
        j = val(mid$(aIndex$, pos-6,6))
        cnt = val(uniqueName$(j, un.count))
        uniqueName$(j, un.count)=str$(cnt+1)
    end if
[skipToken]
next
 
print time$("ms")-t0;" ms"
t0=time$("ms")
 
Print "sorting ..."
sort uniqueName$(),1,aLen,0
 
print time$("ms")-t0;" ms"
 
print "nDiffWords", aLen
print "----------------"
print "Counter", "Type", "Name"
print "-------------------------------------------"
for i = 1 to aLen
    print uniqueName$(i, un.count), word$(uniqueName$(i, un.typeName),1,":"), _
        word$(uniqueName$(i, un.typeName),2,":")
next
 
print "-over-----------"
 
end
'----------------------
[setLists]
    qq$ = chr$(34)    '(")
 
    digits$="1234567890"
    letters$=""
    for i=asc("A") to asc("Z")
        letters$=letters$+chr$(i)
    next
    letters$=letters$+lower$(letters$)
    firstVarChars$=letters$
    varChars$=letters$+digits$+".$"
    firstNumChars$=digits$+".-"
 
    'from Rod's solution
    'command list
    comlist$=" xor while wend wait until unloadbmp trace to titlebar timer then texteditor "
    comlist$=comlist$+"textbox sub stylebits struct stopmidi stop step statictext sort select seek "
    comlist$=comlist$+"scan run return resume restore rem redim readjoystick read randomize "
    comlist$=comlist$+"radiobutton put prompt printerdialog print popupmenu playwave playmidi "
    comlist$=comlist$+"password out or open oncomerror notice nomainwin next name mod menu "
    comlist$=comlist$+"maphandle mainwin lprint loop loadbmp listbox line let kill input if "
    comlist$=comlist$+"groupbox graphicbox goto gosub global gettrim get function for fontdialog "
    comlist$=comlist$+"files filedialog field exit error end else dump do dim data cursor confirm "
    comlist$=comlist$+"combobox colordialog cls close checkbox case callfn calldll callback call "
    comlist$=comlist$+"button bmpsave bmpbutton beep as and "
 
    'type operators
    typlist$=" word void ushort ulong short ptr none long dword double boolean "
    'command operators
    opelist$=" window text random output graphics dll dialog byref binary append "
    opelist$=opelist$+"horizscrollbar vertscrollbar on off min max window_nf window_popup graphics_fs graphics_nsb graphics_fs_nsb graphics_nf_nsb "
    opelist$=opelist$+"text_fs text_nsb text_nsb_ins dialog_modal dialog_nf dialog_nf_modal dialog_fs dialog_nf_fs dialog_popup "
    opelist$=opelist$+"yellow brown red darkred pink darkpink blue darkblue green darkgreen "
    opelist$=opelist$+"cyan darkcyan white black lightgray darkgray buttonface "
 
    'function list
    funlist$=" word$ winstring val using  upper$ txcount trim$ time$ tan tab str$ sqr "
    funlist$=funlist$+"space$ sin rnd rmdir right$ not mkdir min midipos mid$ max lower$ log "
    funlist$=funlist$+"lof loc len left$ int instr inputto$ input$ inp hwnd hexdec hbmp exp "
    funlist$=funlist$+"eval eval$ eof dechex$ date$ cos chr$ atn asn asc acs abs "
    funlist$=funlist$+"upto$ after$ afterlast$ endswith remchar$ "
return
'---------------------
 
sub storeToken tkn$, type$
    curTokenNum=curTokenNum+1
    curTokNum=curTokNum+1
    token$(curTokenNum, t.name)=tkn$
    token$(curTokenNum, t.type)=type$
    tokenPos(curTokenNum, tp.line)=iif(contLine<>0, contLine, nLines)
    tokenPos(curTokenNum, tp.stmnt)=curStmntNum
    tokenPos(curTokenNum, tp.num) = curTokNum
end sub
 
'---------------------
function remStartSpaces$(aLine$)
    whiteSpaces$=chr$(9)+" "
    for i = 1 to len(aLine$)
        c$=mid$(aLine$,i,1)
        if instr(whiteSpaces$, c$)=0 then
            remStartSpaces$=mid$(aLine$,i)
            exit function
        end if
    next
end function
 
'-----------------------------------------------
function IsNumber(input$)
'checks input$ for being valid number. Returns 1 if yes, 0 otherwise.
    IsNumber = 0
    'check sign
    ns = eatUp(input$, "+-")
    if ns>1 then exit function 'with False
    'now, digits
    n1 = eatUp(input$, "0123456789")
    'could be decimal point
    nd = eatUp(input$, ".")
    if nd>1 then  exit function
    'then again, digits
    n2 = eatUp(input$, "0123456789")
    if n1+n2<1 then  exit function
    'now, exponent
    ne = eatUp(input$, "e")
    if ne<>0 then   'we have exponent
        if ne>1 then  exit function
        'check sign
        ns = eatUp(input$, "+-")
        if ns>1 then  exit function
        'now, digits
        n1 = eatUp(input$, "0123456789")
        if n1<1 then  exit function
    end if
 
    if input$="" then IsNumber = 1: exit function
    'else we have leftovers - over with False
end function
 
function eatUp(byRef input$, chars2eat$)
'trims all leading chars from  input$ that match chars in chars2eat$
'return count of trimmed characters
    count = 0
    while len(input$)>0
        if instr( chars2eat$, left$( input$,1))<>0 then
            input$ = mid$(input$,2)
            count = count +1
        else
            exit while
        end if
    wend
    eatUp = count
end function
 
function iif(test, valYes, valNo)
    iif =  valNo
    if test then iif =  valYes
end function