Yuki CPU Assembler

Dieses Projekt besteht aus einem Assembler für meine Yuki CPU nach dem Zwei-Durchlauf Prinzip. Es wandelt pseudo-Assembler Dateien in die durch den Mirkocode festgelegten Befehlsbytes um.

Funktionen:
  • Unterstützung von 1, 2 und 3 byte Ganzzahlen
  • Unterstüzung von nullterminierten sowie Längenpräfix-Zeichenketten.
  • Unterstüzung von labels
  • Unterstüzung von export sowie import von anderen Assembler und label-Dateien.
  • Unterstüzung von Zeilenrelative Sprüngen
  • Unterstüzung von Datenverschiebung

Der Assembler übersetzt die Befehle in einem ersten Schritt mit Platzhaltern für die label Addressen bis er diese in einem zweiten Schritt einfüllt. Der Assembler ist so ausgelegt dass die hinterlegten Befehle einfach geändert werden können sollte der der Mikrocode des Prozessor geändert werden.

Hier ist ein Beispiel für eine typische Eingangsdatei:

# Title:       Yuki Monitor for Yuki Soc
# Author:      R.Lux
# Last edited: 06.01.2018

.org 0x000000
.glbl DEBUG_ADDRESS 0xFFCD55
.glbl SIO0_OUT 0xFFCFEF
.glbl SIO0_BAUD 0xFFCFE1
.glbl SIO0_INT_MODE 0xFFCFE2
.glbl SIO0_INT_THRESHOLD 0xFFCFE0
.glbl SIO0_RX_DATA? 0xFFCFE3
.glbl SIO0_TX_READY? 0xFFCFE4
.glbl FUNCTIONSPACE0 0x000FF0
.glbl FUNCTIONSPACE1 0x000FF1
.glbl FUNCTIONSPACE2 0x000FF2
.glbl FUNCTIONSPACE3 0x000FF3
.glbl FUNCTIONSPACE4 0x000FF4
.glbl FUNCTIONSPACE5 0x000FF5
.glbl FUNCTIONSPACE6 0x000FF6
.glbl FUNCTIONSPACE7 0x000FF7
.glbl FUNCTIONSPACE8 0x000FF8


# Jump to the monitor start
JMP MONITOR_START
NOP

# Functions...
.glbl F_OUTPUT_STRINGZ
NOP
# ===== Output a zero-terminated string (start address in X) on SIO0 =====
# Save working registers..
PUSH R0
NOP
# Start address of zero-terminated string is in the X register
.lbl F_OUTPUT_STRINGZLOOP
LD R0 (X)
INC X
TST R0 0x00
JZ F_OUTPUT_STRINGZEND
NOP
# Output char
CALL F_OUTPUT_R0
# Repeat until 0x00 terminator
JMP F_OUTPUT_STRINGZLOOP
.lbl F_OUTPUT_STRINGZEND
# Restore working registers
POP R0
# Return to function caller
RET
NOP


.glbl F_OUTPUT_R0
# ==== Output R0 on SIO0 ======
PUSH R1
.lbl F_OUTPUT_R0_WAIT
LD R1 (SIO0_TX_READY?)
TST R1 0x00
JZ F_OUTPUT_R0_WAIT
ST R0 (SIO0_OUT)
POP R1
# Return to caller
RET
NOP


.glbl F_INPUT_R0
# ==== Get 1 byte from SIO0 and store it in R0
.lbl F_INPUT_R0_WAIT
LD R0 (SIO0_RX_DATA?)
TST R0 0x00
JZ F_INPUT_R0_WAIT
LD R0 (SIO0_OUT)
# Return to caller
RET
NOP


.glbl F_INPUT_UNTIL_\n
# ==== Start address in X, generate a zero-terminated string using the data of SIO0 until \n is input ====
# Last \n is not saved
# Save working registers
PUSH X
PUSH R0
PUSH R1
.lbl F_INPUT_UNTIL_\n_WAIT
LD R1 (SIO0_RX_DATA?)
TST R1 0x00
JZ F_INPUT_UNTIL_\n_WAIT
# We have a new char
LD R0 (SIO0_OUT)
# Delete last char
TST R0 0x08
JE F_INPUT_UNTIL_\n_DEL
JMP END_F_INPUT_UNTIL_\n_DEL
.lbl F_INPUT_UNTIL_\n_DEL
DEC X
LD R0 0x00
ST R0 (X)
LD R0 0x08
CALL F_OUTPUT_R0
JMP F_INPUT_UNTIL_\n_WAIT
.lbl END_F_INPUT_UNTIL_\n_DEL
TST R0 0x0A
JE F_INPUT_UNTIL_\n_FINISHED
CALL F_OUTPUT_R0
ST R0 (X)
INC X
JMP F_INPUT_UNTIL_\n_WAIT
.lbl F_INPUT_UNTIL_\n_FINISHED
# Save tailing 0
LD R0 0x00
ST R0 (X)
# Restore working registers
POP R1
POP R0
POP X
# Return to caller
RET
NOP


# Leave space for ISRs
.space 0xA0

.glbl MONITOR_START
# Initialise SIO0
LD R0 0d7# 115200 Baud
ST R0 (SIO0_BAUD)
LD R0 0d0# Interrupts disabled
ST R0 (SIO0_INT_MODE)
LD R0 0d1# Recognise all data even if it is only 1 byte
ST R0 (SIO0_INT_THRESHOLD)

# Allocate 256 bytes total for the stacks
LD USP 0x000F00
LD SSP 0x000F7F

# Output startup message
LD X hello_msg
CALL F_OUTPUT_STRINGZ
LD R0 0x55
ST R0 (DEBUG_ADDRESS)

.lbl MAINLOOP
LD R0 0x3E
CALL F_OUTPUT_R0
LD X 0x000FD0
CALL F_INPUT_UNTIL_\n
LD R1 (0x000FD0)

TST R1 0x72# Command: r (Read data from address)
JE r_CMD
JMP END_r_CMD


.glbl r_CMD
# syntax:r AAAAAA
# returns: byte at AAAAAA
LD R0 0x0A
CALL F_OUTPUT_R0
LD X 0x000FD2
CALL F_INPUT_PTR_ASCII
LD X (FUNCTIONSPACE3)
LD R0 (X)
PUSH R0
LD R0 0x00
ST R0 (0x000FD8)
LD X 0x000FD2
CALL F_OUTPUT_STRINGZ
LD R0 0x3A
CALL F_OUTPUT_R0
POP R0
CALL F_OUTPUT_R0_ASCII
LD R0 0x0A
CALL F_OUTPUT_R0
JMP MAINLOOP
.lbl END_r_CMD


TST R1 0x77# Command: w (Write data to address)
JE w_CMD
JMP END_w_CMD


.glbl w_CMD
# syntax:w AAAAAA BB
# returns: -
LD R0 0x0A
CALL F_OUTPUT_R0
LD X 0x000FD2
CALL F_INPUT_PTR_ASCII
LD X (FUNCTIONSPACE3)
PUSH X
LD X 0x000FD9
CALL F_CONVERT_ASCII_R0
POP X
ST R0 (X)
CALL F_OUTPUT_R0_ASCII
LD R0 0x0A
CALL F_OUTPUT_R0
JMP MAINLOOP
.lbl END_w_CMD


TST R1 0x52# Command: R (Read data between addresses)
JE R_CMD
JMP END_R_CMD

.glbl R_CMD
# syntax:R AAAAAA BBBBBB
# display all bytes between AAAAAA and BBBBBB
LD R0 0x0A
CALL F_OUTPUT_R0
LD X 0x000FD2
CALL F_INPUT_PTR_ASCII
# Mask off lower nibble of start address
LD Y (FUNCTIONSPACE3)
LD X 0x000FD9
CALL F_INPUT_PTR_ASCII
# Use R2 as rowcounter
LD R2 0x00
CALL F_OUTPUT_Y_ASCII
LD R0 0x3A
CALL F_OUTPUT_R0
.lbl R_CMD_LOOP
LD R0 (Y)
# Output data
CALL F_OUTPUT_R0_ASCII
# Seperate data by spaces
LD R0 0x20
CALL F_OUTPUT_R0
INC Y
# Loop until all bytes are output
JE Y (FUNCTIONSPACE3) R_CMD_LOOP_END
ADD R2 0x01
AND R2 0b00001111
TST R2 0x00
JE R_CMD_NEWLINE
JMP END_R_CMD_NEWLINE
.lbl R_CMD_NEWLINE
LD R0 0x0A
CALL F_OUTPUT_R0
CALL F_OUTPUT_Y_ASCII
LD R0 0x3A
CALL F_OUTPUT_R0
NOP
.lbl END_R_CMD_NEWLINE
# Loop until all bytes are output
JE Y (FUNCTIONSPACE3) R_CMD_LOOP_END
NOP
JMP R_CMD_LOOP
.lbl R_CMD_LOOP_END
TST R2 0x0F
JE R_CMD_NEWLINE_SEC
JMP END_R_CMD_NEWLINE_SEC
.lbl R_CMD_NEWLINE_SEC
LD R0 0x0A
CALL F_OUTPUT_R0
CALL F_OUTPUT_Y_ASCII
LD R0 0x3A
CALL F_OUTPUT_R0
NOP
.lbl END_R_CMD_NEWLINE_SEC
LD R0 (Y)
CALL F_OUTPUT_R0_ASCII
LD R0 0x0A
CALL F_OUTPUT_R0
JMP MAINLOOP
.lbl END_R_CMD


TST R1 0x57# Command: W(write multiple bytes)
JE W_CMD
JMP END_W_CMD_1

.lbl W_CMD
# syntax:
#W SSSSSS - start writing bytes at SSSSSS
# only send data after .
# send q to stop
# Use R3 as rowcounter
LD R3 0x00
LD X 0x000FD2
CALL F_INPUT_PTR_ASCII
LD Y (FUNCTIONSPACE3)
LD R0 0x0A
CALL F_OUTPUT_R0
CALL F_OUTPUT_Y_ASCII
LD R0 0x3A
CALL F_OUTPUT_R0
LD R0 0x2E
CALL F_OUTPUT_R0


.lbl W_CMD_LOOP
CALL F_INPUT_ASCII_R0
LD R2 (0x000FD0)
TST R2 0x71
JE END_W_CMD_0
LD R2 (0x000FD1)
TST R2 0x71
JE END_W_CMD_0
NOP
ST R0 (Y)
INC Y

ADD R3 0x01
AND R3 0b00001111
TST R3 0x00
JE W_CMD_NEWLINE
JMP END_W_CMD_NEWLINE

.lbl W_CMD_NEWLINE
LD R0 0x0A
CALL F_OUTPUT_R0
CALL F_OUTPUT_Y_ASCII
LD R0 0x3A
CALL F_OUTPUT_R0
NOP
.lbl END_W_CMD_NEWLINE
NOP

LD R0 0x7F
CALL F_OUTPUT_R0
LD R0 0x2E
CALL F_OUTPUT_R0
JMP W_CMD_LOOP

.lbl END_W_CMD_0
NOP
LD R0 0x0A
CALL F_OUTPUT_R0
JMP MAINLOOP
.lbl END_W_CMD_1


TST R1 0x67# Command: g (Jump to address)
JE g_CMD
JMP END_g_CMD

.glbl g_CMD
# Get address
LD X 0x000FD2
CALL F_INPUT_PTR_ASCII
# Jump to address
JMP (FUNCTIONSPACE3)
.lbl END_g_CMD

TST R1 0x63# Command: c (Call function)
JE c_CMD
JMP END_c_CMD

.glbl c_CMD
# Get address
LD X 0x000FD2
CALL F_INPUT_PTR_ASCII
# Jump to address
CALL (FUNCTIONSPACE3)
LD R0 0x0A
CALL F_OUTPUT_R0
.lbl END_c_CMD

TST R1 0x62# Command: b (enter binary mode)
JE b_CMD
JMP END_b_CMD

.glbl b_CMD
# This mode is useful for uploading and downloading larger data chunks
# R1 holds the command
# 0x00 response means sucess
# 0x55 response means ready
# 0xFF response means failure
CALL F_INPUT_R0
PUSH R0
POP R1

TST R1 0x01# test if binary mode is ready
JE b_CMD_ping
JMP END_b_CMD_ping

.lbl b_CMD_ping
LD R0 0x00
CALL F_OUTPUT_R0
JMP b_CMD
.lbl END_b_CMD_ping

TST R1 0x02# read a single value
JE b_CMD_sread
JMP END_b_CMD_sread

.lbl b_CMD_sread
CALL F_INPUT_R0
ST R0 (FUNCTIONSPACE0)
CALL F_INPUT_R0
ST R0 (FUNCTIONSPACE1)
CALL F_INPUT_R0
ST R0 (FUNCTIONSPACE2)
LD X (FUNCTIONSPACE0)
LD R0 (X)
CALL F_OUTPUT_R0
LD R0 0x00
CALL F_OUTPUT_R0
JMP b_CMD
.lbl END_b_CMD_sread


TST R1 0x03# write a single value
JE b_CMD_swrite
JMP END_b_CMD_swrite

.lbl b_CMD_swrite
CALL F_INPUT_R0
ST R0 (FUNCTIONSPACE0)
CALL F_INPUT_R0
ST R0 (FUNCTIONSPACE1)
CALL F_INPUT_R0
ST R0 (FUNCTIONSPACE2)
CALL F_INPUT_R0
ST R0 ((FUNCTIONSPACE0))
LD R0 0x00
CALL F_OUTPUT_R0
JMP b_CMD
.lbl END_b_CMD_swrite

TST R1 0x04# read multiple values
JE b_CMD_mread
JMP END_b_CMD_mread

.lbl b_CMD_mread
# Get start address of Y
CALL F_INPUT_R0
ST R0 (FUNCTIONSPACE0)
CALL F_INPUT_R0
ST R0 (FUNCTIONSPACE1)
CALL F_INPUT_R0
ST R0 (FUNCTIONSPACE2)
LD Y (FUNCTIONSPACE0)
# Get end address
CALL F_INPUT_R0
ST R0 (FUNCTIONSPACE0)
CALL F_INPUT_R0
ST R0 (FUNCTIONSPACE1)
CALL F_INPUT_R0
ST R0 (FUNCTIONSPACE2)
NOP
.lbl b_CMD_mread_loop
LD R0 (Y)
CALL F_OUTPUT_R0
JE Y (FUNCTIONSPACE0) b_CMD_mread_loop_end
INC Y
JMP b_CMD_mread_loop
.lbl b_CMD_mread_loop_end
LD R0 0x00
CALL F_OUTPUT_R0
JMP b_CMD
.lbl END_b_CMD_mread

TST R1 0x05# write multiple values
JE b_CMD_mwrite
JMP END_b_CMD_mwrite

.lbl b_CMD_mwrite
# Get start address of Y
CALL F_INPUT_R0
ST R0 (FUNCTIONSPACE0)
CALL F_INPUT_R0
ST R0 (FUNCTIONSPACE1)
CALL F_INPUT_R0
ST R0 (FUNCTIONSPACE2)
LD Y (FUNCTIONSPACE0)
# Get end address
CALL F_INPUT_R0
ST R0 (FUNCTIONSPACE3)
CALL F_INPUT_R0
ST R0 (FUNCTIONSPACE4)
CALL F_INPUT_R0
ST R0 (FUNCTIONSPACE5)
NOP
.lbl b_CMD_mwrite_loop
CALL F_INPUT_R0
ST R0 (Y)
JE Y (FUNCTIONSPACE3) b_CMD_mwrite_loop_end
# Indicate being ready
LD R0 0x55
CALL F_OUTPUT_R0
INC Y
JMP b_CMD_mwrite_loop
.lbl b_CMD_mwrite_loop_end
LD R0 0x00
CALL F_OUTPUT_R0
JMP b_CMD
.lbl END_b_CMD_mwrite

TST R1 0x06# exit binary mode
JE b_CMD_exit
JMP END_b_CMD_exit

.lbl b_CMD_exit
LD R0 0x00
CALL F_OUTPUT_R0
JMP END_b_CMD
.lbl END_b_CMD_exit

JMP b_CMD

.lbl END_b_CMD
JMP MAINLOOP
HALT

LD R0 0x00
ADD R0 0x01
JMP- 2L
# Data section
.glbl hello_msg
.strz "Yuki Monitor V1.0\n"

# Include file section
.inc ascii_functions.s

# Save all global labels
.exp_lbl yuki_labels.lbl

Der Quellcode des Assemblers:

# Title:       Yuki CPU assembler main file
# Author:      R.Lux
# Last edited: 14.01.2018

import codecs
import math
import time

global label_to_address
global address_to_referanced_label
global constant_to_value
   
global PROGRAM_MEMORY_SIZE
global PROGRAM_MEMORY
global PROGRAM_MEMORY_START
global PROGRAM_MEMORY_POINTER
global PROGRAM_MEMORY_END
global current_line






# =================== Look up tables ====================

# This dictionary is used to convert the argument to its corrensponding number
arg_to_id = { "R0" : 0,
              "R1" : 1,
              "R2" : 2,
              "R3" : 3,
              "imm8" : 4,
              "imm24" : 5,
              "(imm24)" : 6,
              "((imm24))" : 7,
              "(X)" : 8,
              "(Y)" : 9,
              "X" : 10,
              "Y" : 11,
              "USP" : 12,
              "SSP" : 13,
              "immL8" : 14}


# =================== Convertion functions ====================
def isint(string):
    val = 0
   
    try:
        val = int(string)
        return True
   
    except:
        return False
       


def byte(number, i):
    return (number & (0xff << (i * 8))) >> (i * 8)



def getitembits(item):
    bits = 0
    if "(" in item:
        # Remove parantheses
        item = item.replace("(", "").replace(")", "")

    if item[1] == "b":
        # Base: binary
        bits = len(item) - 2

    elif item[1] == "d":
        # Base: decimal
        bits = math.log((10 ^ (len(item) - 2) - 1), 2)

    elif item[1] == "x":
        # Base: hexadecimal
        bits = (len(item) - 2) * 4
   
    return bits


def converttoint(string):
    global error

   
    if "(" in string:
        # We are only interested in the value
        string = string.replace("(", "").replace(")", "")
       
    outval = 0
    if(string[0:2] == "0b"):
        # Base: Binary
        outval = int(string[2:], base=2)

    elif(string[0:2] == "0x"):
        # Base: Hexadecimal
        outval = int(string[2:], base=16)
   
    elif(string[0:2] == "0d"):
        # Base: Decimal
        outval = int(string[2:], base=10)

    else:
        print("Error: " + displayrealline(currentline) + ": Couldnt recognise value '" + string + "'!")
        error = True

    return outval
       


# =================== Start of assembler  ====================

def load_instructions():
    global instructions
    # Load a table of all instructions and their operands

    # Instruction list format:
    # mnemonic,number of args,argtype0,argtype1,argtype2,instructionnumber
    # argtypes:
    # 0 = register R0
    # 1 = register R1
    # 2 = register R2
    # 3 = register R3
    # 4 = immidiate 8 bit
    # 5 = immidiate 24 bit
    # 6 = indirect 24 bit
    # 7 = double indirect 24 bit
    # 8 = X index
    # 9 = Y index
    # 10 = X
    # 11 = Y
    # 12 = USP
    # 13 = SSP

    instrfile = open("instructions.dat")
    instrlines = instrfile.readlines()

    instructions = []
    for linenum,line in enumerate(instrlines):
        oldline = line
        line = line.split(",")
        instruction = []
        for i in range(0, 6):
            if i == 0:
                instruction.append(line[i])
            else:
                instruction.append(int(line[i]))

        instructions.append(instruction)


def replace_immidiates_labels(ininstruction):
    global error
   
    instruction = ininstruction[:]
    operandlist = [0, 0, 0]
    operandtypelist = [0, 0, 0]

   
    # Replace immidiates and future labels with placeholders
    # First element isnt needed because it represents the instruction itself
    for index, item in enumerate(instruction[1:]):
        # Check if item is a known register
        if item in arg_to_id:
            # There is no value that has to be filled in into the operandlist

            # But we have to save the type of the operand
            operandtypelist[index] = arg_to_id[item]

        elif ("0x" in item) or ("0d" in item) or ("0b" in item):
            # We have an  immidiate value
           
            if (getitembits(item) < 9):
                # 8 bit immidiate
               
                # Save operand first
                operandlist[index] = converttoint(item)

                # Replace with placeholder
                instruction[(index + 1)] = "imm8"

            else:
                # 24 bit immidiate

                if item.count("(") == 0:
                    # direct
                   
                    # Save operand
                    operandlist[index] = converttoint(item)

                    # Replace with placeholder
                    instruction[(index + 1)] = "imm24"
                   
                elif item.count("(") == 1:
                    # single indirect

                    # Remove parantheses
                    item = item.replace("(", "")
                    item = item.replace(")", "")
                   
                    # Save operand
                    operandlist[index] = converttoint(item)
                   
                    # Replace with placeholder
                    instruction[(index + 1)] = "(imm24)"
                   
                elif item.count("(") == 2:
                    # double indirect
                   
                    # Remove parantheses
                    item = item.replace("(", "")
                    item = item.replace(")", "")
                   
                    # Save operand
                    operandlist[index] = converttoint(item)

                    # Replace with placeholder
                    instruction[(index + 1)] = "((imm24))"

                else:
                    print("Error: " + displayrealline(currentline) + ": Couldnt recognise value '" + item + "'!")
                    error = True

        elif (item[(len(item) - 1)] == "L") and (isint(item[:(len(item) - 1)])):
            # It seems to be a relative jump offset
            # Save placeholder
            operandlist[index] = -1

            # Replace with placeholder
            instruction[(index + 1)] = "immL8"

        else:
            # The item is neither a register nor an immidiate value

            # It has to be an label
            # Find out its type
            if item.count("(") == 0:
                # It seems to be an 24 bit direct one
                instruction[(index + 1)] = "imm24"

                # Save -1 as placeholder so when it is put into memory the assembler remembers the invocation address
                operandlist[index] = -1
               
            elif item.count("(") == 1:
                # It seems to be an 24 bit indirect one
                instruction[(index + 1)] = "(imm24)"

                # Save -1 as placeholder so when it is put into memory the assembler remembers the invocation address
                operandlist[index] = -1

            elif item.count("(") == 2:
                # It seems to be an  24 bit double indirect one
                instruction[(index + 1)] = "((imm24))"

                # Save -1 as placeholder so when it is put into memory the assembler remembers the invocation address
                operandlist[index] = -1


    return instruction, operandlist, operandtypelist







def write_instruction_to_memory(instructionid, operandlist, operandtypelist, operandnum, thisinstruction):
    global PROGRAM_MEMORY_POINTER
    global address_to_referenced_label

    PROGRAM_MEMORY[PROGRAM_MEMORY_POINTER] = instructionid
    PROGRAM_MEMORY_POINTER += 1

    for operandindex in range(0, operandnum):
        if operandtypelist[operandindex] == 4:
            PROGRAM_MEMORY[PROGRAM_MEMORY_POINTER] = operandlist[operandindex]
            PROGRAM_MEMORY_POINTER += 1

        if (operandtypelist[operandindex] == 5) or (operandtypelist[operandindex] == 6):
            operand = 0
               
               
            # Check if the operand is immidiate and is therefore already there or if it is unknown at the moment
            if operandlist[operandindex] != -1:
                # Seems like the operand was immidiate
                operand = operandlist[operandindex]

            else:
                # The operand was not defined so its value will be filled in later
                # Save its address and its name (without parantheses) to fill it in later
                address_to_referenced_label[PROGRAM_MEMORY_POINTER] = thisinstruction[(operandindex + 1)].replace("(", "").replace(")", "")
               

                # Use 0 as placeholder for now...
                operand = 0

               
            # Write the operand into memory
            PROGRAM_MEMORY[PROGRAM_MEMORY_POINTER] = byte(operand, 2)
            PROGRAM_MEMORY_POINTER += 1
            PROGRAM_MEMORY[PROGRAM_MEMORY_POINTER] = byte(operand, 1)
            PROGRAM_MEMORY_POINTER += 1
            PROGRAM_MEMORY[PROGRAM_MEMORY_POINTER] = byte(operand, 0)
            PROGRAM_MEMORY_POINTER += 1


        if operandtypelist[operandindex] == 14:
            # We have a relative line jump offset here!

            # Save its address and the line offset
            if "+" in thisinstruction[0]:
                # We should jump forward
                arg = thisinstruction[(operandindex + 1)]

                # Remove trailing L
                arg = arg.replace("L", "")

                # Save offset and current line
                address_to_referenced_line[PROGRAM_MEMORY_POINTER] = (int(arg), currentline)
               
            elif "-" in thisinstruction[0]:
                # We should jump back
                arg = thisinstruction[(operandindex + 1)]

                # Remove trailing L
                arg = arg.replace("L", "")

                # Save offset
                address_to_referenced_line[PROGRAM_MEMORY_POINTER] = (((-1)*int(arg)), currentline)
               

            # Use 0 as placeholder for now...
            operand = 0

            # Write the operand into memory
            PROGRAM_MEMORY[PROGRAM_MEMORY_POINTER] = operand
            PROGRAM_MEMORY_POINTER += 1
   






def compileinstruction(thisinstruction):
    global PROGRAM_MEMORY_POINTER
    global error
    # Split instruction into instruction and operands
    thisinstruction = thisinstruction.split(" ")
    operandnum = len(thisinstruction) - 1 # One string part is the instruction
    instructionid = 300 # No valid instruction id yet

    # Convert immidiates into their abstract forms
    newinstruction, operandlist, operandtypelist = replace_immidiates_labels(thisinstruction)
               
    # Convert operand strings into id numbers
    for index,element in enumerate(newinstruction[1:]):
        operandtypelist[index] = arg_to_id[element]

    # Collect data for the instruction search
    searched_instruction = [newinstruction[0], operandnum]



    # Complete the search mask for the instruction
    for addindex in range(0, 3):
        if operandnum > addindex:
            # Add operandinfo to instruction mask

            operandtype = operandtypelist[addindex]
           
            if operandtype == 14:
                # immL8 is the same value type as imm8
                operandtype = 4

            # Add to mask
            searched_instruction.append(operandtype)
        else:
            # Pad with zeros
            searched_instruction.append(0)

    # Look up if instruction is defined
    for tryelement in instructions:
        if tryelement[0:5] == searched_instruction:
            instructionid = tryelement[5]
            break

    # Instruction wasnt found!
    if instructionid == 300:
        print("Error: " + displayrealline(currentline) + ": Couldnt find instruction " + " ".join(newinstruction))
        error = True

    # Write instruction and operands into memory
    write_instruction_to_memory(instructionid, operandlist, operandtypelist, operandnum, thisinstruction)







def clear_create_tables():
    global label_to_address
    global address_to_referenced_label
    global line_to_address
    global address_to_referenced_line
    global end_to_original_line_numbers
   
    global PROGRAM_MEMORY_SIZE
    global PROGRAM_MEMORY
    global PROGRAM_MEMORY_START
    global PROGRAM_MEMORY_POINTER
    global PROGRAM_MEMORY_END
    global PROGRAM_MEMORY_MAX
    global PROGRAM_MEMORY_MIN
    global PROGRAM_MEMORY_DATA
    global current_line

    # Absolute jumps
    label_to_address = {}
    address_to_referenced_label = {}

    # Relative jumps
    line_to_address = {}
    address_to_referenced_line = {}

    # Original line numbers to end line numbers
    end_to_original_line_numbers = {}
   
    PROGRAM_MEMORY_SIZE = 1024 * 1024 * 16
    PROGRAM_MEMORY = [0] * PROGRAM_MEMORY_SIZE
    PROGRAM_MEMORY_START = 0
    PROGRAM_MEMORY_POINTER = 0
    PROGRAM_MEMORY_END = 0
    PROGRAM_MEMORY_MAX = 0
    PROGRAM_MEMORY_MIN = 0
    PROGRAM_MEMORY_DATA = 0




   
# Internal assembler functions
def internal_org(line):
    global PROGRAM_MEMORY_POINTER
    # Start address of program is given

    # Warn user that he is possibly overwriting his own code
    if PROGRAM_MEMORY_POINTER > converttoint(line[1]):
        print("Warning: " + displayrealline(currentline) + ": origin was set to probably already used memory!")
                   
    PROGRAM_MEMORY_START = converttoint(line[1])
    PROGRAM_MEMORY_POINTER = converttoint(line[1])
    print("Info: Origin at {0:06X}".format(converttoint(line[1])))

   
def internal_dat_str(line):
    global PROGRAM_MEMORY_POINTER
    global PROGRAM_MEMORY_DATA
    # Only a string
    # Get entirestring from input line
    entirestring = " ".join(line[2:])
    entirelist = list(entirestring)

    startpos = entirelist.index('"')
    # Replace first " character with space to find the second one
    entirelist[startpos] = " "
                   

    # Find ending " character
    endpos = entirelist.index('"')


    # Remove spaces added before
    entirelist = entirelist[1:]
    entirelist = entirelist[:(endpos - 1)]

    # Update end position
    endpos = endpos - 1

    found_n = False

                   
    # Copy the string to memory
    for index in range(startpos, endpos):
        endoflist = (index == (endpos - 1))
        val = ord(entirelist[index])
                       
        if not endoflist:
            nextval = ord(entirelist[(index + 1)])
        else:
            nextval = 0


        if found_n:
            found_n = False
            return

        else:
            PROGRAM_MEMORY[PROGRAM_MEMORY_POINTER] = val
            PROGRAM_MEMORY_POINTER += 1
            PROGRAM_MEMORY_DATA += 1
                           
        if (val == 92) and (nextval == 110) and not endoflist:
            # We have an \n (linefeed)
            PROGRAM_MEMORY[PROGRAM_MEMORY_POINTER] = 10
            PROGRAM_MEMORY_POINTER += 1
            PROGRAM_MEMORY_DATA += 1
            found_n = True


                           
def internal_dat_strz(line):
    global PROGRAM_MEMORY_POINTER
    global PROGRAM_MEMORY_DATA
    global error
    # It seems to be an zero-terminated string
                   
    # Get entirestring from input line
    entirestring = " ".join(line[1:])
    entirelist = list(entirestring)

    startpos = entirelist.index('"')
    # Replace first " character with space to find the second one
    entirelist[startpos] = " "
                   

    # Find ending " character
    try:
        endpos = entirelist.index('"')
    except ValueError:
        print("Error: " + displayrealline(currentline) + ": Unclosed string '" + "".join(entirelist) + "'")
        error = True


    # Remove spaces added before
    entirelist = entirelist[1:]
    entirelist = entirelist[:(endpos - 1)]

    # Update end position
    endpos = endpos - 1

    found_n = False

                   
    # Copy the string to memory
    for index in range(startpos, endpos):
        endoflist = (index == (endpos - 1))
        val = ord(entirelist[index])
                       
        if not endoflist:
            nextval = ord(entirelist[(index + 1)])
        else:
            nextval = 0


        if found_n:
            found_n = False
            break
                           
        if (val == 92) and (nextval == 110) and not endoflist:
            # We have an \n (linefeed)
            PROGRAM_MEMORY[PROGRAM_MEMORY_POINTER] = 10
            PROGRAM_MEMORY_POINTER += 1
            PROGRAM_MEMORY_DATA += 1
            found_n = True

        else:
            PROGRAM_MEMORY[PROGRAM_MEMORY_POINTER] = val
            PROGRAM_MEMORY_POINTER += 1
            PROGRAM_MEMORY_DATA += 1

                           


    # Store trailing 0
    PROGRAM_MEMORY[PROGRAM_MEMORY_POINTER] = 0
    PROGRAM_MEMORY_POINTER += 1


                   
def internal_dat_lstr(line):
    global PROGRAM_MEMORY_POINTER
    global PROGRAM_MEMORY_DATA
    # It seems to be a length-preceded string

    # Get entirestring from input line
    entirestring = " ".join(line[1:])
    entirelist = list(entirestring)

    startpos = entirelist.index('"')
    # Replace first " character with space to find the second one
    entirelist[startpos] = " "
                   

    # Find ending " character
    endpos = entirelist.index('"')


    # Remove spaces added before
    entirelist = entirelist[1:]
    entirelist = entirelist[:(endpos - 1)]

    # Update end position
    endpos = endpos - 1

    # Store length of string
    PROGRAM_MEMORY[PROGRAM_MEMORY_POINTER] = endpos - startpos
    PROGRAM_MEMORY_POINTER += 1
                   
    # Copy the string to memory
    for index in range(startpos, endpos):
        endoflist = (index == (endpos - 1))
        val = ord(entirelist[index])
                       
        if not endoflist:
            nextval = ord(entirelist[(index + 1)])
        else:
            nextval = 0


        if found_n:
            found_n = False
            return

        else:
            PROGRAM_MEMORY[PROGRAM_MEMORY_POINTER] = val
            PROGRAM_MEMORY_POINTER += 1
            PROGRAM_MEMORY_DATA += 1
                           
        if (val == 92) and (nextval == 110) and not endoflist:
            # We have an \n (linefeed)
            PROGRAM_MEMORY[PROGRAM_MEMORY_POINTER] = 10
            PROGRAM_MEMORY_POINTER += 1
            PROGRAM_MEMORY_DATA += 1
            found_n = True

           
def internal_dat_int(line):
    global PROGRAM_MEMORY_POINTER
    global PROGRAM_MEMORY_DATA
    # It seems to be an 1 byte integer

    # Check if there are multiple data values
    if "," in line[1]:
        # multiple data values
        line[1] = line[1].split(",")

        # Go through all values
        for value in line[1]:
            PROGRAM_MEMORY[PROGRAM_MEMORY_POINTER] = converttoint(value)
            PROGRAM_MEMORY_POINTER += 1
            PROGRAM_MEMORY_DATA += 1

    else:
        # Only a single value
        PROGRAM_MEMORY[PROGRAM_MEMORY_POINTER] = converttoint(line[2])
        PROGRAM_MEMORY_POINTER += 1
        PROGRAM_MEMORY_DATA += 1

       
def internal_dat_int16(line):
    global PROGRAM_MEMORY_POINTER
    global PROGRAM_MEMORY_DATA
    # It seems to be an 2 byte integer

    # Check if there are multiple data values
    if "," in line[1]:
        # multiple data values
        line[1] = line[1].split(",")

        # Go through all values
        for value in line[1]:
            data_value = converttoint(value)
            PROGRAM_MEMORY[PROGRAM_MEMORY_POINTER] = byte(data_value, 1)
            PROGRAM_MEMORY_POINTER += 1
            PROGRAM_MEMORY[PROGRAM_MEMORY_POINTER] = byte(data_value, 0)
            PROGRAM_MEMORY_POINTER += 1
            PROGRAM_MEMORY_DATA += 2

    else:
        # Only a single value
        data_value = converttoint(line[2])
        PROGRAM_MEMORY[PROGRAM_MEMORY_POINTER] = byte(data_value, 1)
        PROGRAM_MEMORY_POINTER += 1
        PROGRAM_MEMORY[PROGRAM_MEMORY_POINTER] = byte(data_value, 0)
        PROGRAM_MEMORY_POINTER += 1
        PROGRAM_MEMORY_DATA += 2

                       
def internal_dat_int24(line):
    global PROGRAM_MEMORY_POINTER
    global PROGRAM_MEMORY_DATA
    # It seems to be an 3 byte integer

    # Check if there are multiple data values
    if "," in line[1]:
        # multiple data values
        line[1] = line[1].split(",")

        # Go through all values
        for value in line[1]:
            data_value = converttoint(value)
            PROGRAM_MEMORY[PROGRAM_MEMORY_POINTER] = byte(data_value, 2)
            PROGRAM_MEMORY_POINTER += 1
            PROGRAM_MEMORY[PROGRAM_MEMORY_POINTER] = byte(data_value, 1)
            PROGRAM_MEMORY_POINTER += 1
            PROGRAM_MEMORY[PROGRAM_MEMORY_POINTER] = byte(data_value, 0)
            PROGRAM_MEMORY_POINTER += 1
            PROGRAM_MEMORY_DATA += 3

    else:
        data_value = converttoint(line[2])
        PROGRAM_MEMORY[PROGRAM_MEMORY_POINTER] = byte(data_value, 2)
        PROGRAM_MEMORY_POINTER += 1
        PROGRAM_MEMORY[PROGRAM_MEMORY_POINTER] = byte(data_value, 1)
        PROGRAM_MEMORY_POINTER += 1
        PROGRAM_MEMORY[PROGRAM_MEMORY_POINTER] = byte(data_value, 0)
        PROGRAM_MEMORY_POINTER += 1
        PROGRAM_MEMORY_DATA += 3

                       
def internal_dat_space(line):
    global PROGRAM_MEMORY_POINTER
    global PROGRAM_MEMORY_DATA
    # Fill the program memory with zero
    zeros = converttoint(line[1])

    for index in range(PROGRAM_MEMORY_POINTER, (PROGRAM_MEMORY_POINTER + zeros)):
        PROGRAM_MEMORY[index] = 0

    PROGRAM_MEMORY_POINTER += zeros
    PROGRAM_MEMORY_DATA += zeros

                   
def internal_dat_spacealign(line):
    global PROGRAM_MEMORY_POINTER
    global PROGRAM_MEMORY_DATA
    # Fill the program memory until the address is a multiple of n

    # n is the operand
    align_n = converttoint(line[1])

    while (PROGRAM_MEMORY_POINTER%align_n) != 0:
        # Fill the space with 0
        PROGRAM_MEMORY[PROGRAM_MEMORY_POINTER] = 0
        PROGRAM_MEMORY_POINTER += 1
        PROGRAM_MEMORY_DATA += 1

    # Done

   
def internal_inc(line):
    global PROGRAM_MEMORY_POINTER
    global currentline
    global filelines
    global total_lines
    global label_to_address
    global end_to_original_line_numbers

    incfilename = line[1]

    try:
        incfile = open(incfilename)
        incfilelines = incfile.readlines()

        # Strip empty lines
        # Remove \n in filelines
        incfilelinestotal = len(incfilelines)
        for i in range(0, incfilelinestotal):
            incfilelines[i] = incfilelines[i].rstrip("\n")
 
        # Remove everything after # because they show the start of comments
        for i in range(0, incfilelinestotal):
            if "#" in incfilelines[i]:
                incfilelines[i] = incfilelines[i].split("#")[0]

        # Calculate of actual lines
        actuallines = len(list(filter(lambda a: a != "", incfilelines)))
       
       
        if incfilename.split(".")[1] == "s":
            # Assembly file
           
            # Shift all line numbers forward
            end_to_original_line_numbers_copy = {}
           
            for endline in end_to_original_line_numbers:
                if endline > currentline:
                    oldvalue = end_to_original_line_numbers[endline]
                    end_to_original_line_numbers_copy[(endline + actuallines)] = oldvalue
                else:
                    end_to_original_line_numbers_copy[endline] = end_to_original_line_numbers[endline]

            end_to_original_line_numbers = end_to_original_line_numbers_copy

           
            # Remove empty lines
            incfilelinesnew = []
           
            for linenum,incfileline in enumerate(incfilelines):
                if incfileline != "":
                    incfilelinesnew.append(incfileline)
                    # Line number in the included file
                    end_to_original_line_numbers[(currentline + len(incfilelinesnew))] = (str(linenum + 1) + ":" + incfilename)

            incfilelines = incfilelinesnew


            # Update number of lines
            total_lines += len(incfilelines)

       
            startline = currentline

       
            for insertindex in range((currentline), (currentline + len(incfilelines))):
                filelines.insert((insertindex + 1), incfilelines[(insertindex - startline)])
               
            currentline = startline


            print("Info: " + displayrealline(currentline) + ": Included file " + incfilename)

        elif incfilename.split(".")[1] == "lbl":
            # Label file
            for incfileline in incfilelines:
                label_to_address[incfileline.split(":")[0]] = int(incfileline.split(":")[1])


            print("Info: " + displayrealline(currentline) + ": Included file " + incfilename)

        else:
            print("Error: " + displayrealline(currentline) + ": Unknown file type ." + incfilename.split(".")[1])
           
           

       
    except FileNotFoundError:
        print("Error: " + displayrealline(currentline) + ": Include file " + incfilename + " not found!")
        error = True


       


def internal_lbl(line):
    global PROGRAM_MEMORY_POINTER
    global error
    # Local labels
    if len(line) == 2:
        # No Address is given so it has to be the current one
        label_to_address[line[1]] = [PROGRAM_MEMORY_POINTER, 0]

    elif len(line) == 3:
        # We have an address here so we use that instead of the current address
        label_to_address[line[1]] = [converttoint(line[2]), 0]
                   
    else:
        print("Error: " + displayrealline(currentline) + ": Unknown arguments for " + " ".join(line))
        error = True


def internal_glbl(line):
    global PROGRAM_MEMORY_POINTER
    global error
    # Global labels(will be saved in a file later but can be used just the same)
    if len(line) == 2:
        # No Address is given so it has to be the current one
        label_to_address[line[1]] = [PROGRAM_MEMORY_POINTER, 1]

    elif len(line) == 3:
        # We have an address here so we use that instead of the current address
        label_to_address[line[1]] = [converttoint(line[2]), 1]
                   
    else:
        print("Error: " + displayrealline(currentline) + ": Unknown assembler instruction " + " ".join(line))
        error = True
   

def internal_exp_lbl(line):
    filename = line[1]
    labelfile = open(filename, "w")

    for label in label_to_address:
        # Only export global labels
        if label_to_address[label][1] == 1:
            labelfile.write(label + ":" + str(label_to_address[label][0]) + "\n")

    print("Info: " + displayrealline(currentline) + ": Created label file " + filename)
    labelfile.close()



# String to internal assembler function table
internal_funcs = { "org"            : internal_org,
                   "str"            : internal_dat_str,
                   "strz"           : internal_dat_strz,
                   "lstr"           : internal_dat_lstr,
                   "int"            : internal_dat_int,
                   "int16"          : internal_dat_int16,
                   "int24"          : internal_dat_int24,
                   "space"          : internal_dat_space,
                   "spacealign"     : internal_dat_spacealign,
                   "inc"            : internal_inc,
                   "lbl"            : internal_lbl,
                   "glbl"           : internal_glbl,
                   "exp_lbl"        : internal_exp_lbl}



def displaystats(asmfilename, outfilename):
    glabels = 0
    llabels = 0
   
    for label in label_to_address:
        if label_to_address[label][1] == 0:
            llabels += 1
        elif label_to_address[label][1] == 1:
            glabels += 1

           
    # Give some info about the assembly
    print(" ===== Assemlby stats ===== ")
    print("Input file      : " + asmfilename)
    print("Output file     : " + outfilename)
    print("Total lines     : " + str(total_lines))
    print("Global labels   : " + str(glabels))
    print("Local labels    : " + str(llabels))
    print("Total bytes     : " + str(PROGRAM_MEMORY_MAX - PROGRAM_MEMORY_MIN))
    print("Data bytes      : " + str(PROGRAM_MEMORY_DATA))
    print("Assembly time   : " + str((time.time() - starttime) // 1) + " s")
    print("End status      : ", end="")

    if error:
        print("not sucessful")
    else:
        print("sucessful")

def displayrealline(endline):
    try:
        originalline,originfile = end_to_original_line_numbers[endline].split(":")
    except KeyError:
        print(endline)
        originfile = "???"
        originalline = 0
    return((originfile + ": L." + str(originalline)))


   
def compilefile(asmfilename, outfilename):
    global PROGRAM_MEMORY_START
    global PROGRAM_MEMORY_POINTER
    global PROGRAM_MEMORY_MAX
    global PROGRAM_MEMORY_MIN
    global PROGRAM_MEMORY_DATA
    global PROGRAM_MEMORY
    global currentline
    global end_to_original_line_numbers
    global total_lines
    global filelines
    global error
    global starttime

    starttime = time.time()
   
    clear_create_tables()
    asmfile = open(asmfilename)
    filelines = asmfile.readlines()
   
    if outfilename[:3] == "bin":
        # Access outfile as binary file
        outfile = open(outfilename, "wb")
    else:
        # Access outfile as regular file
        outfile = open(outfilename, "w")

    # Load instructions from file
    load_instructions()
   

    # Remove \n in filelines
    for i in range(0, len(filelines)):
        filelines[i] = filelines[i].rstrip("\n")

    # Remove everything after # because they show the start of comments
    for i in range(0, len(filelines)):
        if "#" in filelines[i]:
            filelines[i] = filelines[i].split("#")[0]

    # Remove empty lines
    filelinesnew = []
    for linenum,fileline in enumerate(filelines):
        if fileline != "":
            filelinesnew.append(fileline)
            # Line number in the main file
            end_to_original_line_numbers[(len(filelinesnew) - 1)] = (str(linenum + 1) + ":" + asmfilename)
           

    filelines = filelinesnew

    error = False
   
    print("Info: First pass...")        
    # First pass: compile code using placeholders for labels

    total_lines = len(filelines)
    currentline = 0
    while currentline < total_lines:
        # always save PROGRAM_MEMORY_POINTER for the current line
        line_to_address[currentline] = PROGRAM_MEMORY_POINTER
        fileline = filelines[currentline]
       
       
        if fileline[0] == ".":
            # Control words all start with a .

            # Split fileline at the spaces
            fileline = fileline.split(" ")

            # Remove .
            command = fileline[0][1:]


            try:
                # Look up function associated with instruction
                func = internal_funcs[command]
                func(fileline)

            except KeyError:
                print("Error: " + displayrealline(currentline) + ": Couldnt find internal assembler instruction" + " ".join(fileline))
                error = True
       
        else:
            # It seems to be a real machine instruction so assemble it!
            compileinstruction(fileline)

        # Go to next line
        currentline += 1

        # remember lowest and highest addresses
        if PROGRAM_MEMORY_POINTER < PROGRAM_MEMORY_MIN:
            PROGRAM_MEMORY_MIN = PROGRAM_MEMORY_POINTER


        if PROGRAM_MEMORY_POINTER > PROGRAM_MEMORY_MAX:
            PROGRAM_MEMORY_MAX = PROGRAM_MEMORY_POINTER


                           

    print("Info: Second pass...")
    # Second pass part 1: replace label placeholders using collected info
    for address in address_to_referenced_label:
        referenced_label = address_to_referenced_label[address]
       
        if referenced_label in label_to_address:
            # We found the value of the label!
            # Replace the placeholders!
            value = label_to_address[referenced_label][0]

            PROGRAM_MEMORY[address] = byte(value, 2)
            PROGRAM_MEMORY[(address + 1)] = byte(value, 1)
            PROGRAM_MEMORY[(address + 2)] = byte(value, 0)

        else:
            print("Error: " + displayrealline(currentline) + ": Unknown label " + referenced_label)
            error = True

   
    # Second pass part 2: replace line jump offsets
    for address in address_to_referenced_line:
        offset, line = address_to_referenced_line[address]

        # Check whether we have to jump forward or  back
        if offset < 0:
            # We have to jump back
            targetline = line + offset

            # Search the address of the target line
            destination_addr = line_to_address[targetline]

            # Search the address of the source line
            source_addr = line_to_address[line]

            # Calculate offset
            real_offset = (source_addr - destination_addr)

           
            # Replace placeholder with real offset
            PROGRAM_MEMORY[address] = real_offset

        else:
            # We have to jump forward
            targetline = line + offset

            # Search the address of the target line
            destination_addr = line_to_address[targetline]

            # Search the address of the source line
            source_addr = line_to_address[(line + 1)] - 1

            # Calculate offset
            real_offset = (destination_addr - source_addr)


            # Replace placeholder with real offset
            PROGRAM_MEMORY[address] = real_offset
           
           


    PROGRAM_MEMORY_END = PROGRAM_MEMORY_POINTER
    PROGRAM_MEMORY_SIZE = PROGRAM_MEMORY_END - PROGRAM_MEMORY_START

       

       
    print("Info: Writing to file...")
    # Now write output to file...
    if outfilename[-3:] == "bin":
        # Raw binary out
        for i in range(PROGRAM_MEMORY_START, PROGRAM_MEMORY_END):
            outfile.write(PROGRAM_MEMORY[i])

    elif outfilename[-3:] == "mif":
        # Memory initiation file(used by altera quartus)

        # Start with header...
        outfile.write("DEPTH = " + str(PROGRAM_MEMORY_SIZE) + ";\n")
        outfile.write("WIDTH = 8;\n")
        outfile.write("ADDRESS_RADIX = UNS;\n")
        outfile.write("DATA_RADIX = UNS;\n")
        outfile.write("CONTENT\nBEGIN\n0 : ")

        # Write data now
        for i in range(PROGRAM_MEMORY_START, PROGRAM_MEMORY_END):
            outfile.write(str(PROGRAM_MEMORY[i]) + " ")

        # End memory file
        outfile.write(";\nEND;")

    else:
        print("Error: Unknown memory file type " + outfilename[-4:])


    # Summarize results
    displaystats(asmfilename, outfilename)

   
    # Close files
    asmfile.close()
    outfile.close()
       
       
           
           
compilefile("yuki_monitor.s", "system_ram_content.mif")