Older Version
Newer Version
tsh73
Jan 7, 2015
**Entry forRod's[[http://libertybasic.conforums.com/index.cgi?action=display&board=contests&num=1418803980&start=0#1419329462|Rod's Variables (2105)challenge**challenge]] **Text of page.It's got too big to put it on a forum. Actually, I tried to do a tokenizer so it figures out all things including numbers and strings. But after figuring things out it outputs only things that relevant to this task (variables, subs/user functions, labels). Uses keyword list etc from Rod's entry. Tested on biggest program I've ever seen in LB - freeform404.bas ;) [[code format="lb"]] 'variable challenge 'tsh73, Jan 2015 global t.maxTokens, t.name, t.type, tp.line, tp.stmnt, tp.num global curTokenNum, curTokNum, nLines, curStmntNum, contLine [nonUsedLabel] dummy=1:dummy2=dummy 'non-used line with a (:) unDimmed(3)=1 'example of un-dimmed array gosub [setLists] 'fname$="vars01.bas" 'fname$="vars02.bas" 'fname$="test.bas" filedialog "Select file to process";chr$(0);"open", "*.bas", fname$ if fname$="" then print "No file selected - bye": end print "Processing "; fname$ t.maxTokens=100000 t.name = 0 t.type = 1 dim token$(t.maxTokens, 1) tp.line=0 tp.stmnt=1 tp.num=2 dim tokenPos(t.maxTokens, 2) curTokenNum=0 'global curTokNum=0 'in a statement (that is between (:)) verbose = 0 '1 open fname$ for input as #1 nLines=1 isContinuation=0 print "Reading parsing line by line..." t0=time$("ms") while not(eof(#1)) nLines=nLines+1 line input #1, aLine$ if verbose then print ">"; aLine$ curStmntNum=1 curTokNum=0 if isContinuation then if contLine=0 then contLine = nLines-1 isContinuation=0 else contLine=0 end if while 1 scan 'reading a line splitting it to tokens '1) skip all starting spaces aLine$=remStartSpaces$(aLine$) 'exit if nothing left if aLine$="" then exit while 'print ":"; aLine$ '2) check special sequences ''comment if left$(aLine$,1)="'" then 'skip line as comment if verbose then print " comment skipped" exit while end if '3) recognize / read / cut token select case '3.1) check special sequences '[label] case left$(aLine$,1)="[" label$=upto$(aLine$, "]")+"]" aLine$=after$(aLine$, "]") if verbose then print " label: ";label$ call storeToken label$, "lbl" '3.2) check special sequences '"string" case left$(aLine$,1)=qq$ 'string aLine$=mid$(aLine$,2) 'cut left (") aString$=qq$+upto$(aLine$,qq$)+qq$ aLine$=after$(aLine$, qq$) if verbose then print " string: "; aString$ call storeToken aString$, "str" case left$(aLine$,1)="_" and instr(varChars$, mid$(aLine$,2,1))<>0 'winConst token$=left$(aLine$,1) aLine$=mid$(aLine$,2) while instr(varChars$;"_", left$(aLine$,1))<>0 token$=token$+left$(aLine$,1) aLine$=mid$(aLine$,2) if aLine$="" then exit while wend if verbose then print " Windows constant: ";token$ call storeToken token$, "winConst" case left$(aLine$,1)="_" aLine$=mid$(aLine$,2) 'cut token if verbose then print " continuation char" 'call storeToken "_", "_" 'do not store? isContinuation=1 case left$(aLine$,1)=":" aLine$=mid$(aLine$,2) 'cut token if verbose then print " operator separator" call storeToken ":", ":" curStmntNum=curStmntNum+1 curTokNum=0 case left$(aLine$,1)="|" aLine$=mid$(aLine$,2) 'cut token if verbose then print " menu separator" call storeToken "|", "|" case left$(aLine$,1)="," aLine$=mid$(aLine$,2) 'cut token if verbose then print " parameter separator" call storeToken ",", "," case left$(aLine$,1)=";" aLine$=mid$(aLine$,2) 'cut token if verbose then print " concatenation" call storeToken ";", ";" case left$(aLine$,1)="=" aLine$=mid$(aLine$,2) 'cut token if verbose then print " assignment or equal" call storeToken "=", "=" case left$(aLine$,1)="(" aLine$=mid$(aLine$,2) 'cut token if verbose then print " opening (" call storeToken "(", "(" case left$(aLine$,1)=")" aLine$=mid$(aLine$,2) 'cut token if verbose then print " closing )" call storeToken ")", ")" case left$(aLine$,1)="#" 'handle token$=left$(aLine$,1) aLine$=mid$(aLine$,2) while instr(varChars$, left$(aLine$,1))<>0 token$=token$+left$(aLine$,1) aLine$=mid$(aLine$,2) if aLine$="" then exit while wend if verbose then print " handle: ";token$ call storeToken token$, "hndl" case instr(firstVarChars$, left$(aLine$,1))<>0 'name (var, arr, sub, func) or keyword token$=left$(aLine$,1) aLine$=mid$(aLine$,2) while instr(varChars$, left$(aLine$,1))<>0 _ or (left$(aLine$,1)="_" and instr(varChars$, mid$(aLine$,2,1))<>0 ) token$=token$+left$(aLine$,1) aLine$=mid$(aLine$,2) if aLine$="" then exit while wend select case case instr(comlist$;typlist$;opelist$, " ";lower$(token$);" ")<>0 if verbose then print " keyword: ";token$ call storeToken token$, "kwrd" 'if REM if lower$(token$) = "rem" then 'comment if verbose then print " comment skipped" exit while end if case else if verbose then print "name (var, array, sub, or func): ";token$ call storeToken token$, "name" end select case instr(firstNumChars$, left$(aLine$,1))<>0 'number? notANumber=0 select case case instr(digits$, left$(aLine$,1))<>0 token$=left$(aLine$,1) aLine$=mid$(aLine$,2) case instr("-.", left$(aLine$,1))<>0 and IsNumber(left$(aLine$,2))<>0 token$=left$(aLine$,2) aLine$=mid$(aLine$,3) case left$(aLine$,2)="-." and IsNumber(left$(aLine$,3))<>0 token$=left$(aLine$,3) aLine$=mid$(aLine$,4) case else notANumber=1 end select if notANumber=0 then 'read rest of a number while IsNumber(token$)<>0 token$=token$+left$(aLine$,1) aLine$=mid$(aLine$,2) if aLine$="" then exit while wend if IsNumber(token$)=0 then 'one char extra aLine$=right$(token$, 1)+aLine$ token$=left$(token$, len(token$)-1) end if if verbose then print "number: ";token$ call storeToken token$, "num" else 'should be single "-" (or not compiles) token$=left$(aLine$,1) aLine$=mid$(aLine$,2) if verbose then print " operator ";token$ call storeToken token$, "op" end if 'should be moved after "numbers" so "-1" does not process as "-" "1" case instr("<>+-*/^", left$(aLine$,1))<>0 token$=left$(aLine$,1) aLine$=mid$(aLine$,2) 'cut token if verbose then print " operator ";token$ call storeToken token$, "op" case else token$=left$(aLine$,1) aLine$=mid$(aLine$,2) if verbose then print "??char: ";token$ call storeToken token$, "???" end select wend wend close #1 print "----------------" print "nLines=",nLines print "numTokens=";curTokenNum print time$("ms")-t0;" ms" t0=time$("ms") goto [SkipPrint] for i = 1 to curTokenNum print tokenPos(i, tp.line);" ";_ tokenPos(i, tp.stmnt);" ";_ tokenPos(i, tp.num),_ token$(i, t.type),_ token$(i, t.name) next [SkipPrint] 'second pass along token$ array, check for next "(" - to tell functions/arrays Print "second pass along token$ array..." i=0 while i <= curTokenNum i=i+1 if token$(i, t.type)="name" then if i+1<=curTokenNum then if token$(i+1, t.type)="(" and _ tokenPos(i, tp.line) = tokenPos(i+1, tp.line) and _ tokenPos(i, tp.stmnt) = tokenPos(i+1, tp.stmnt) then if instr(funlist$, " ";lower$(token$(i, t.name));" ")<>0 then token$(i, t.type)="buildInFunc" else token$(i, t.type)="UDForArray" end if end if end if end if wend print time$("ms")-t0;" ms" t0=time$("ms") 'third pass, check for functions/subs/arrays Print "third pass along token$ array..." i=0 while i <= curTokenNum i=i+1 if tokenPos(i, tp.num) = 1 then 'first token on a stmnt select case case lower$(token$(i, t.name)) ="dim" or lower$(token$(i, t.name)) ="redim" curLine = tokenPos(i, tp.line) curStmnt = tokenPos(i, tp.stmnt) while i+1<=curTokenNum if curLine <> tokenPos(i+1, tp.line) _ or curStmnt <> tokenPos(i+1, tp.stmnt) then exit while i=i+1 if token$(i, t.type)="UDForArray" then token$(i, t.type)="dimmedArray" 'mark all other instances for j = 1 to curTokenNum if token$(j, t.name)=token$(i, t.name) and _ token$(j, t.type)="UDForArray" then token$(j, t.type)="dimmedArray" next wend case lower$(token$(i, t.name)) ="sub" or lower$(token$(i, t.name)) ="call" i=i+1 token$(i, t.type)="sub" case lower$(token$(i, t.name)) ="function" i=i+1 token$(i, t.type)="UDF" 'mark all other instances for j = 1 to curTokenNum if token$(j, t.name)=token$(i, t.name) and _ token$(j, t.type)="UDForArray" then token$(j, t.type)="UDF" next end select end if wend print time$("ms")-t0;" ms" t0=time$("ms") 'all other "UDForArray" are undimmed arrays Print "last pass along token$ array..." for j = 1 to curTokenNum if token$(j, t.type)="UDForArray" then token$(j, t.type)="unDimmedArray" next goto [SkipPrint2] print "----------------" for i = 1 to curTokenNum print tokenPos(i, tp.line);" ";_ tokenPos(i, tp.stmnt);" ";_ tokenPos(i, tp.num),_ token$(i, t.type),_ token$(i, t.name) next print "----------------" [SkipPrint2] print time$("ms")-t0;" ms" t0=time$("ms") 'count variables Print "Counting ..." un.typeName=0 un.count=1 dim uniqueName$(curTokenNum,1) aIndex$ = "" 'aIndex would be in a form ######|word|, where ###### index in a() 'You can guess ###### restricts max len to 999999 aLen=0 'POSSIBLE TYPES '( ) , : ; ??? = buildInFunc dimmedArray hndl kwrd lbl name num op str sub UDF unDimmedArray types2Skip$="( ) , : ; = | num op str buildInFunc kwrd " for i = 1 to curTokenNum w$=token$(i, t.type);":";token$(i, t.name) if instr(types2Skip$, token$(i, t.type))<>0 then [skipToken] toFind$="|"+w$+"|" pos=instr(aIndex$, toFind$) if pos =0 then 'add it in array aLen = aLen+1 uniqueName$(aLen, un.typeName)=w$ uniqueName$(aLen, un.count)="1" 'first time aIndex$=aIndex$+using("######",aLen)+toFind$ else 'get index for a word. FAST. j = val(mid$(aIndex$, pos-6,6)) cnt = val(uniqueName$(j, un.count)) uniqueName$(j, un.count)=str$(cnt+1) end if [skipToken] next print time$("ms")-t0;" ms" t0=time$("ms") Print "sorting ..." sort uniqueName$(),1,aLen,0 print time$("ms")-t0;" ms" print "nDiffWords", aLen print "----------------" print "Counter", "Type", "Name" print "-------------------------------------------" for i = 1 to aLen print uniqueName$(i, un.count), word$(uniqueName$(i, un.typeName),1,":"), _ word$(uniqueName$(i, un.typeName),2,":") next print "-over-----------" end '---------------------- [setLists] qq$ = chr$(34) '(") digits$="1234567890" letters$="" for i=asc("A") to asc("Z") letters$=letters$+chr$(i) next letters$=letters$+lower$(letters$) firstVarChars$=letters$ varChars$=letters$+digits$+".$" firstNumChars$=digits$+".-" 'from Rod's solution 'command list comlist$=" xor while wend wait until unloadbmp trace to titlebar timer then texteditor " comlist$=comlist$+"textbox sub stylebits struct stopmidi stop step statictext sort select seek " comlist$=comlist$+"scan run return resume restore rem redim readjoystick read randomize " comlist$=comlist$+"radiobutton put prompt printerdialog print popupmenu playwave playmidi " comlist$=comlist$+"password out or open oncomerror notice nomainwin next name mod menu " comlist$=comlist$+"maphandle mainwin lprint loop loadbmp listbox line let kill input if " comlist$=comlist$+"groupbox graphicbox goto gosub global gettrim get function for fontdialog " comlist$=comlist$+"files filedialog field exit error end else dump do dim data cursor confirm " comlist$=comlist$+"combobox colordialog cls close checkbox case callfn calldll callback call " comlist$=comlist$+"button bmpsave bmpbutton beep as and " 'type operators typlist$=" word void ushort ulong short ptr none long dword double boolean " 'command operators opelist$=" window text random output graphics dll dialog byref binary append " opelist$=opelist$+"horizscrollbar vertscrollbar on off min max window_nf window_popup graphics_fs graphics_nsb graphics_fs_nsb graphics_nf_nsb " opelist$=opelist$+"text_fs text_nsb text_nsb_ins dialog_modal dialog_nf dialog_nf_modal dialog_fs dialog_nf_fs dialog_popup " opelist$=opelist$+"yellow brown red darkred pink darkpink blue darkblue green darkgreen " opelist$=opelist$+"cyan darkcyan white black lightgray darkgray buttonface " 'function list funlist$=" word$ winstring val using upper$ txcount trim$ time$ tan tab str$ sqr " funlist$=funlist$+"space$ sin rnd rmdir right$ not mkdir min midipos mid$ max lower$ log " funlist$=funlist$+"lof loc len left$ int instr inputto$ input$ inp hwnd hexdec hbmp exp " funlist$=funlist$+"eval eval$ eof dechex$ date$ cos chr$ atn asn asc acs abs " funlist$=funlist$+"upto$ after$ afterlast$ endswith remchar$ " return '--------------------- sub storeToken tkn$, type$ curTokenNum=curTokenNum+1 curTokNum=curTokNum+1 token$(curTokenNum, t.name)=tkn$ token$(curTokenNum, t.type)=type$ tokenPos(curTokenNum, tp.line)=iif(contLine<>0, contLine, nLines) tokenPos(curTokenNum, tp.stmnt)=curStmntNum tokenPos(curTokenNum, tp.num) = curTokNum end sub '--------------------- function remStartSpaces$(aLine$) whiteSpaces$=chr$(9)+" " for i = 1 to len(aLine$) c$=mid$(aLine$,i,1) if instr(whiteSpaces$, c$)=0 then remStartSpaces$=mid$(aLine$,i) exit function end if next end function '----------------------------------------------- function IsNumber(input$) 'checks input$ for being valid number. Returns 1 if yes, 0 otherwise. IsNumber = 0 'check sign ns = eatUp(input$, "+-") if ns>1 then exit function 'with False 'now, digits n1 = eatUp(input$, "0123456789") 'could be decimal point nd = eatUp(input$, ".") if nd>1 then exit function 'then again, digits n2 = eatUp(input$, "0123456789") if n1+n2<1 then exit function 'now, exponent ne = eatUp(input$, "e") if ne<>0 then 'we have exponent if ne>1 then exit function 'check sign ns = eatUp(input$, "+-") if ns>1 then exit function 'now, digits n1 = eatUp(input$, "0123456789") if n1<1 then exit function end if if input$="" then IsNumber = 1: exit function 'else we have leftovers - over with False end function function eatUp(byRef input$, chars2eat$) 'trims all leading chars from input$ that match chars in chars2eat$ 'return count of trimmed characters count = 0 while len(input$)>0 if instr( chars2eat$, left$( input$,1))<>0 then input$ = mid$(input$,2) count = count +1 else exit while end if wend eatUp = count end function function iif(test, valYes, valNo) iif = valNo if test then iif = valYes end function [[code]]