Building a Brainfuck translator in TurboAssembler.

To begin with, let's build the interpreter in a high-level language, for example, in Pascal.

First of all, write a program that will output a character using the sum of + as own ascii-code.

So, we only need the bf-commands + and .

    var
     data_mem: array[1..10] of integer;    // data array
     command_mem: string;                 // command array
     i: integer;                         // index of command array
     j: integer;                        // index of data array
    begin
     j:=1;                  
     readln(command_mem);       
     
     for i:=1 to length(command_mem) do begin   // in the cycle we process the string 
      if (command_mem[i]='+') then data_mem[j]:= data_mem[j]+1;
      if (command_mem[i]='.') then write(chr(data_mem[j]));
     end;
    end.

bf-code +++++++++++++++++++++++++++++++++. will issue !
(the ascii-code of the symbol ! is 33).

You may ensure that the program is correct via the link.

Next, replace for operator with goto operator and add the bf-commands — < > .

    LABEL prev,next;
    var
     data_mem: array[1..10] of integer;    
     command_mem: string;                     
     i,j,k: integer;                       
    begin
     j:=1;                 
     i:=1;
     readln(command_mem);       
     prev:
     if i>length(command_mem) then goto next; 
        if (command_mem[i]='+') then data_mem[j]:= data_mem[j]+1;
        if (command_mem[i]='-') then data_mem[j]:= data_mem[j]-1;
        if (command_mem[i]='>') then j:=j+1;
        if (command_mem[i]='<') then j:=j-1;
        if (command_mem[i]='.') then write(chr(data_mem[j])); 
     
     i:=i+1;
     goto prev;
     next:
    for k:=1 to 10 do begin 
    write(data_mem[k]);
    write(' ');
    end;
    end.

ideone.com

Next, let's add [ and ]

Add another variable i_stor for bracket [ ] loop.

If the current item has successfully passed the check for [, load i to i_stor (if the current item greater than zero).

By the processing of the closing bracket ] (if the data_mem not equal zero) load the address of the opening bracket [ into i from i_stor

    LABEL prev,next;
    var
     data_mem: array[1..10] of integer;    
     command_mem: string;                     
     i,j,k: integer;                       
     i_stor: integer; 
    begin
     j:=1;                  
     i:=1;
     readln(command_mem);      
     prev:
     if i>length(command_mem) then goto next; 
        if (command_mem[i]='+') then data_mem[j]:= data_mem[j]+1;
        if (command_mem[i]='-') then data_mem[j]:= data_mem[j]-1;
        if (command_mem[i]='>') then j:=j+1;
        if (command_mem[i]='<') then j:=j-1;
        if (command_mem[i]='.') then write(chr(data_mem[j]));
        if (command_mem[i]='[') then
         begin  
          if data_mem[j]>0 then i_stor:=i;
         end;
        if (command_mem[i]=']') then
         begin  
          if data_mem[j]>0 then 
           begin
           i:=i_stor;
           end;
         end;
     
     i:=i+1;
     goto prev;
     next:
    for k:=1 to 10 do begin 
    write(data_mem[k]);
    write(' ');
    end;
    end.

bf-code +++++[>+<-] transfers the number 5 to the neighboring cell 0 5 0 0 0 0 0 0 0 0

ideone.com

A «Hello, World!» program looks like ideone.com

Moving on to TASM

To organize the loop put the number of the loop stages in the CX register, then put the label prev: (on which the transition will be made) at the end of the stage — by the command loop

mov CX, 28h     ; count of the stages of the loop
prev:           ; label
; do stuff
loop prev       ; go back to label prev


Let's create the data array data_mem.
Let's create the command array command_mem.
Content the data array with 1 to illustrative as the elements look like.

In the loop compare the current symbol with the symbol + and, if the characters are equal, increment the value of the current cell

text segment                      ; bf1.asm 
assume cs:text, ds:data, ss:stk
begin: 
  mov AX,data          ; set the data segment                                      
  mov DS,AX             
  mov DL, command_mem   ;  load the 1st command in the DL
  mov CX, 0Ah          ; 10 stages
prev:                    
 cmp DL, '+'           ; the cell contains +
 jne next              ; no, go to the label next:  
 mov BL, 00h           ; load into BL the index of data_mem
 inc data_mem[BX]      ; yes, we increase the value in the cell by 1 (inc means increment)
 next:
 inc i                 ; go to the next character in the array of commands
 mov BL, i
 mov DL, command_mem [BX]   
 loop prev 
         
  mov AX, 4c00h        ; terminate the program
  int 21h 
text ends

data segment           
command_mem DB  '+', '+', '+', '$'   
data_mem DB 1,1,1,1,1,1,1,1,1,1,'$' 
i DB 0                  ; command_mem index
data ends

stk segment stack      
 db 100h dup (0)       ;  reserve 256 cells
stk ends
end begin      

Assembling (translation) is performed by the command tasm.exe bf1.asm.

Linking is done with the command tlink.exe bf1.obj.

As the execution is done, there are commands +++ starting from the address 0130, then comes the data array data_mem, then comes the variable i equal 0Ah in the TurboDebugger.



Next, add the bf-commands — < > .
Use the function 02h of the interrupt int 21h to output a single character.
Put the character code in register DL before calling the interrupt.

 mov AH,2 
 mov DL, character_code
 int 21h

Let's write the program entirely

text segment                      ; bf2.asm 
assume cs:text, ds:data, ss:stk
begin: 
mov AX,data        ; set the data segment                                       
  mov DS,AX             
  mov DL, command_mem  
  mov CX, 0Ah        
prev:                    
 cmp DL, '+'         
 jne next              
 mov BL, j           
 inc data_mem[BX]    
next: 
 cmp DL, '-'         
 jne next1             
 mov BL, j 
 dec data_mem[BX]     
next1: 
 cmp DL, '>'        
 jne next2            
 inc j              
next2: 
 cmp DL, '<'        
 jne next3            
 dec j              
next3: 
 cmp DL, '.'        
 jne next4            
 mov AH,2           
 mov BL, j
 mov DL, data_mem[BX]
 int 21h
 next4:
 inc i                 
 mov BL, i
 mov DL, command_mem [BX]   
 loop prev  
         
  mov AX, 4c00h        ; terminate the program
  int 21h 
text ends

data segment           
command_mem DB  '+', '>', '+', '+', '$' ;  
data_mem DB 0,0,0,0,0,0,0,0,0,0,'$' 
i DB 0                  ; command_mem index
j DB 0                  ; data_mem index
data ends

stk segment stack      
 db 100h dup (0)       ;  reserve 256 cells
stk ends
end begin      



The cycle works like this:

if the current element of command-mem not equal + then jump to the label next: (otherwise, perform +)
if the current element of command-mem not equal- then jump to the label next1:
if the current element of command-mem not equal> then jump to the label next2:
if the current element of command-mem not equal< then jump to the label next3:
if the current element of command-mem not equal. then jump to the label next4:
After the label next4: increment the index of the command_mem and jump to the label prev: (the beginning of the cycle)

Next, add [ and ]
Add the variable i_stor
If the current item has successfully passed the check for [, check the current data_mem element for zero, and, if the current item is equal zero, jump further (on the next label); otherwise load i to i_stor

next4:
 cmp DL, '['         ; the cell contains [
 jne next5           ; no, go to the label next5
 mov BL, j
 mov DL, data_mem[BX]
 cmp DL, 00h          ; yes, check current data_mem element for zero
 jz next5            ; if zero, jump further
 mov DL, i           ; otherwise load i to i_stor
 mov i_stor, DL     
next5:

By the processing of the closing bracket ] (if the data_mem not equal zero) load the address of the opening bracket [ into i from i_stor

next5:
 cmp DL, ']'         ; the cell contains ]
 jne next6           ; no, go to the label next6
 mov BL, j
 mov DL, data_arr[BX]
 cmp DL, 00h          ; yes, check current data_mem element for zero
 jz next6            ; if zero, jump further
 mov DL, i_stor      ; otherwise load i_stor to i
 mov i, DL           
next6:

Check the bf-code ++++[>+<-]

text segment                      ; bf3.asm 
assume cs:text, ds:data, ss:stk
begin: 
mov AX,data        ; set the data segment                                       
  mov DS,AX             
  mov DL, command_mem  
  mov CX, 50h        
  
prev:                    
 cmp DL, '+'         
 jne next             
 mov BL, j           
 inc data_mem[BX]    
next: 
 cmp DL, '-'         
 jne next1             
 mov BL, j 
 dec data_mem[BX]     
next1: 
 cmp DL, '>'         
 jne next2           
 inc j               
next2: 
 cmp DL, '<'         
 jne next3           
 dec j               
next3: 
 cmp DL, '.'         
 jne next4           
 mov AH,2            
 mov BL, j
 mov DL, data_mem[BX]
 int 21h
next4:
 cmp DL, '['         ; the cell contains [
 jne next5           ; no, go to the label next5
 mov BL, j
 mov DL, data_mem[BX]
 cmp DL, 00          ; yes, check the current data_mem element for zero  
 jz next5            ;  if zero, jump further
 mov DL, i           ; otherwise load i to i_stor
 mov i_stor, DL       
next5:
 cmp DL, ']'         ; the cell contains ]
 jne next6           ; no, go to the label next6
 mov BL, j
 mov DL, data_mem[BX]
 cmp DL, 00          ; yes, check current data_mem element for zero  
 jz next6            ; if zero, jump further
 mov DL, i_stor      ; otherwise load i_stor to i 
 mov i, DL           
next6:
 inc i               
 mov BL, i
 mov DL, command_mem[BX]   
 loop prev            
         
  mov AX, 4c00h        ; terminate the program
  int 21h 
text ends

data segment           
command_mem DB  '+','+','+','+','[','>','+','<','-',']', '$'   
data_mem DB 0,0,0,0,0,0,0,0,0,0,'$' 
i DB 0                  ; command_mem index
j DB 0                  ; data_mem index
i_stor DB 0
data ends

stk segment stack      
 db 100h dup (0)       ;  reserve 256 cells
stk ends
end begin      



Add the input function 3fh interrupt 21h

mov ah, 3fh         ; input function
mov cx, 100h        ; the number of bytes you want to read from the input
mov dx,OFFSET command_mem
int 21h

the loop is finished when the current character/command is '$'

cmp DL, '$'
je  exit_loop

Change loop to jmp

mov ah, 3fh           ; input function
mov cx, 100h          ; the number of bytes you want to read from input
mov dx,OFFSET command_mem
int 21h
mov DL, command_mem   ; load the 1st command in the DL 
;mov CX, 100h        
prev:
cmp DL, '$'          ; check  the current command for '$'
je  exit_loop       ; jump if the check has successfully passed 

Add the JUMPS directive.
The JUMPS directive enables automatic conditional jump extending in TASM. If the target of a conditional jump is out of range, TASM converts the jump into a local jump/JMP pair. For example:

                JE      EQUAL_PLACE
     becomes:
                JNE     @@A
                JMP     EQUAL_PLACE
                @@A:


After all

JUMPS                                ; bf4.asm
text segment                    
assume cs:text,ds:data, ss: stk
begin:  
  mov AX,data                                             
  mov DS,AX
  ;;;
  mov ah, 3fh        
  mov cx, 100h	     
  mov dx,OFFSET command_mem
  int 21h
  ;;;             
  mov DL, command_mem    
  ;mov CX, 100h        
prev:
 cmp DL, '$' 
 je  exit_loop
 cmp DL, '+'                                
 jne next             
 mov BL, j                        
 inc data_mem[BX]     
next: 
 cmp DL, '-'                                
 jne next1             
 mov BL, j 
 dec data_mem[BX]     
next1: 
 cmp DL, '>'         
 jne next2             
 inc j               
next2: 
 cmp DL, '<'         
 jne next3             
 dec j               
next3: 
 cmp DL, '.'         
 jne next4             
 mov AH,2            
 mov BL, j
 mov DL, data_mem[BX]
 int 21h
next4:
 cmp DL, '['         
 jne next5           
 mov BL, j
 mov DL, data_mem[BX]
 cmp DL, 00            
 jz next5            
 mov DL, i           
 mov i_stor, DL      
next5:
 cmp DL, ']'         
 jne next6           
 mov BL, j
 mov DL, data_mem[BX]
 cmp DL, 00            
 jz next6            
 mov DL, i_stor      
 mov i, DL            
next6:
 inc i               
 mov BL, i
 mov DL, command_mem[BX]   
 jmp prev
 exit_loop: 
 
 MOV    AH,2       
 MOV    DL,0Ah     
 INT    21h        
 mov AX, 4c00h        
 int 21h 
text ends

data segment           
 command_mem DB 256h DUP('$')	
 data_mem DB 0,0,0,0,0,0,0,0,0,0,'$'  
 i DB 0,'$'                              
 j DB 0,'$'                            
 i_stor DB 0,'$'
data ends

stk segment para stack 
 db 100h dup (0)       
stk ends
end begin 

github.com

Комментарии (6)


  1. gecube
    03.11.2018 23:28
    +1

    с каких пор хабр стал англоязычным ресурсом?


    1. Koneru
      03.11.2018 23:31
      +2

      Как стал из Хабрахабра Хабром. Теперь вроде есть место статьям на английском.


    1. MaxVetrov
      04.11.2018 05:17

      Можете прочитать тоже самое на русском habr.com/post/423121


  1. Sdima1357
    04.11.2018 01:08
    +2

    TASM конечно был неплохим ассемблером для своего времени, но статье явно не хватает метки «стимпанк». Впрочем поностальгировал, спасибо.
    PS turbodebAgger даже у Борланда пишется через u.
    PPS. Директива ".intel_syntax noprefix" позволяет писать в GNU AS используя синтаксис МАSMa/ TASMa


  1. impwx
    04.11.2018 11:58

    Перевод очень слабый. С тем же успехом потенциальный англоговорящий читатель мог бы открыть исходную статью на русском и скормить ее автопереводчику в браузере.


  1. evgenWebm
    04.11.2018 20:47

    Осталось понять, зачем?