Yuki CPU assembler

This project is a two-pass assembler for my Yuki CPU. It converts pseudo-assembler files into bytecode which are then executed on the Yuki CPU according to its microcode.

Features:
  • Support for 1, 2 and 3 byte integers
  • Support for zero-terminated strings as well as length-preceeded strings
  • Support for labels
  • Support for including other assembly and label files
  • Support for exporting all global labels into a file
  • Support for line relative jumps
  • Support for data-alignment
  • Easily customizeable instructionsets

The assembler, like mentioned above is a two-pass assembler, meaning it will assemble the instructions with placeholders for the labels and fill the values of the labels in later. I also tried to make it as easily customizeable as possible to allow for easy changes of the instructionset in case the microcode for the Yuki CPU is changed.

Here is a typical pseuodo-assembly file of the assembler:

# Title:       Yuki Monitor for Yuki Soc
# Author:      R.Lux
# Last edited: 06.01.2018

.org 0x000000
.glbl DEBUG_ADDRESS 0xFFCD55
.glbl SIO0_OUT 0xFFCFEF
.glbl SIO0_BAUD 0xFFCFE1
.glbl SIO0_INT_MODE 0xFFCFE2
.glbl SIO0_INT_THRESHOLD 0xFFCFE0
.glbl SIO0_RX_DATA? 0xFFCFE3
.glbl SIO0_TX_READY? 0xFFCFE4
.glbl FUNCTIONSPACE0 0x000FF0
.glbl FUNCTIONSPACE1 0x000FF1
.glbl FUNCTIONSPACE2 0x000FF2
.glbl FUNCTIONSPACE3 0x000FF3
.glbl FUNCTIONSPACE4 0x000FF4
.glbl FUNCTIONSPACE5 0x000FF5
.glbl FUNCTIONSPACE6 0x000FF6
.glbl FUNCTIONSPACE7 0x000FF7
.glbl FUNCTIONSPACE8 0x000FF8


# Jump to the monitor start
JMP MONITOR_START
NOP

# Functions...
.glbl F_OUTPUT_STRINGZ
NOP
# ===== Output a zero-terminated string (start address in X) on SIO0 =====
# Save working registers..
PUSH R0
NOP
# Start address of zero-terminated string is in the X register
.lbl F_OUTPUT_STRINGZLOOP
LD R0 (X)
INC X
TST R0 0x00
JZ F_OUTPUT_STRINGZEND
NOP
# Output char
CALL F_OUTPUT_R0
# Repeat until 0x00 terminator
JMP F_OUTPUT_STRINGZLOOP
.lbl F_OUTPUT_STRINGZEND
# Restore working registers
POP R0
# Return to function caller
RET
NOP


.glbl F_OUTPUT_R0
# ==== Output R0 on SIO0 ======
PUSH R1
.lbl F_OUTPUT_R0_WAIT
LD R1 (SIO0_TX_READY?)
TST R1 0x00
JZ F_OUTPUT_R0_WAIT
ST R0 (SIO0_OUT)
POP R1
# Return to caller
RET
NOP


.glbl F_INPUT_R0
# ==== Get 1 byte from SIO0 and store it in R0
.lbl F_INPUT_R0_WAIT
LD R0 (SIO0_RX_DATA?)
TST R0 0x00
JZ F_INPUT_R0_WAIT
LD R0 (SIO0_OUT)
# Return to caller
RET
NOP


.glbl F_INPUT_UNTIL_\n
# ==== Start address in X, generate a zero-terminated string using the data of SIO0 until \n is input ====
# Last \n is not saved
# Save working registers
PUSH X
PUSH R0
PUSH R1
.lbl F_INPUT_UNTIL_\n_WAIT
LD R1 (SIO0_RX_DATA?)
TST R1 0x00
JZ F_INPUT_UNTIL_\n_WAIT
# We have a new char
LD R0 (SIO0_OUT)
# Delete last char
TST R0 0x08
JE F_INPUT_UNTIL_\n_DEL
JMP END_F_INPUT_UNTIL_\n_DEL
.lbl F_INPUT_UNTIL_\n_DEL
DEC X
LD R0 0x00
ST R0 (X)
LD R0 0x08
CALL F_OUTPUT_R0
JMP F_INPUT_UNTIL_\n_WAIT
.lbl END_F_INPUT_UNTIL_\n_DEL
TST R0 0x0A
JE F_INPUT_UNTIL_\n_FINISHED
CALL F_OUTPUT_R0
ST R0 (X)
INC X
JMP F_INPUT_UNTIL_\n_WAIT
.lbl F_INPUT_UNTIL_\n_FINISHED
# Save tailing 0
LD R0 0x00
ST R0 (X)
# Restore working registers
POP R1
POP R0
POP X
# Return to caller
RET
NOP


# Leave space for ISRs
.space 0xA0

.glbl MONITOR_START
# Initialise SIO0
LD R0 0d7# 115200 Baud
ST R0 (SIO0_BAUD)
LD R0 0d0# Interrupts disabled
ST R0 (SIO0_INT_MODE)
LD R0 0d1# Recognise all data even if it is only 1 byte
ST R0 (SIO0_INT_THRESHOLD)

# Allocate 256 bytes total for the stacks
LD USP 0x000F00
LD SSP 0x000F7F

# Output startup message
LD X hello_msg
CALL F_OUTPUT_STRINGZ
LD R0 0x55
ST R0 (DEBUG_ADDRESS)

.lbl MAINLOOP
LD R0 0x3E
CALL F_OUTPUT_R0
LD X 0x000FD0
CALL F_INPUT_UNTIL_\n
LD R1 (0x000FD0)

TST R1 0x72# Command: r (Read data from address)
JE r_CMD
JMP END_r_CMD


.glbl r_CMD
# syntax:r AAAAAA
# returns: byte at AAAAAA
LD R0 0x0A
CALL F_OUTPUT_R0
LD X 0x000FD2
CALL F_INPUT_PTR_ASCII
LD X (FUNCTIONSPACE3)
LD R0 (X)
PUSH R0
LD R0 0x00
ST R0 (0x000FD8)
LD X 0x000FD2
CALL F_OUTPUT_STRINGZ
LD R0 0x3A
CALL F_OUTPUT_R0
POP R0
CALL F_OUTPUT_R0_ASCII
LD R0 0x0A
CALL F_OUTPUT_R0
JMP MAINLOOP
.lbl END_r_CMD


TST R1 0x77# Command: w (Write data to address)
JE w_CMD
JMP END_w_CMD


.glbl w_CMD
# syntax:w AAAAAA BB
# returns: -
LD R0 0x0A
CALL F_OUTPUT_R0
LD X 0x000FD2
CALL F_INPUT_PTR_ASCII
LD X (FUNCTIONSPACE3)
PUSH X
LD X 0x000FD9
CALL F_CONVERT_ASCII_R0
POP X
ST R0 (X)
CALL F_OUTPUT_R0_ASCII
LD R0 0x0A
CALL F_OUTPUT_R0
JMP MAINLOOP
.lbl END_w_CMD


TST R1 0x52# Command: R (Read data between addresses)
JE R_CMD
JMP END_R_CMD

.glbl R_CMD
# syntax:R AAAAAA BBBBBB
# display all bytes between AAAAAA and BBBBBB
LD R0 0x0A
CALL F_OUTPUT_R0
LD X 0x000FD2
CALL F_INPUT_PTR_ASCII
# Mask off lower nibble of start address
LD Y (FUNCTIONSPACE3)
LD X 0x000FD9
CALL F_INPUT_PTR_ASCII
# Use R2 as rowcounter
LD R2 0x00
CALL F_OUTPUT_Y_ASCII
LD R0 0x3A
CALL F_OUTPUT_R0
.lbl R_CMD_LOOP
LD R0 (Y)
# Output data
CALL F_OUTPUT_R0_ASCII
# Seperate data by spaces
LD R0 0x20
CALL F_OUTPUT_R0
INC Y
# Loop until all bytes are output
JE Y (FUNCTIONSPACE3) R_CMD_LOOP_END
ADD R2 0x01
AND R2 0b00001111
TST R2 0x00
JE R_CMD_NEWLINE
JMP END_R_CMD_NEWLINE
.lbl R_CMD_NEWLINE
LD R0 0x0A
CALL F_OUTPUT_R0
CALL F_OUTPUT_Y_ASCII
LD R0 0x3A
CALL F_OUTPUT_R0
NOP
.lbl END_R_CMD_NEWLINE
# Loop until all bytes are output
JE Y (FUNCTIONSPACE3) R_CMD_LOOP_END
NOP
JMP R_CMD_LOOP
.lbl R_CMD_LOOP_END
TST R2 0x0F
JE R_CMD_NEWLINE_SEC
JMP END_R_CMD_NEWLINE_SEC
.lbl R_CMD_NEWLINE_SEC
LD R0 0x0A
CALL F_OUTPUT_R0
CALL F_OUTPUT_Y_ASCII
LD R0 0x3A
CALL F_OUTPUT_R0
NOP
.lbl END_R_CMD_NEWLINE_SEC
LD R0 (Y)
CALL F_OUTPUT_R0_ASCII
LD R0 0x0A
CALL F_OUTPUT_R0
JMP MAINLOOP
.lbl END_R_CMD


TST R1 0x57# Command: W(write multiple bytes)
JE W_CMD
JMP END_W_CMD_1

.lbl W_CMD
# syntax:
#W SSSSSS - start writing bytes at SSSSSS
# only send data after .
# send q to stop
# Use R3 as rowcounter
LD R3 0x00
LD X 0x000FD2
CALL F_INPUT_PTR_ASCII
LD Y (FUNCTIONSPACE3)
LD R0 0x0A
CALL F_OUTPUT_R0
CALL F_OUTPUT_Y_ASCII
LD R0 0x3A
CALL F_OUTPUT_R0
LD R0 0x2E
CALL F_OUTPUT_R0


.lbl W_CMD_LOOP
CALL F_INPUT_ASCII_R0
LD R2 (0x000FD0)
TST R2 0x71
JE END_W_CMD_0
LD R2 (0x000FD1)
TST R2 0x71
JE END_W_CMD_0
NOP
ST R0 (Y)
INC Y

ADD R3 0x01
AND R3 0b00001111
TST R3 0x00
JE W_CMD_NEWLINE
JMP END_W_CMD_NEWLINE

.lbl W_CMD_NEWLINE
LD R0 0x0A
CALL F_OUTPUT_R0
CALL F_OUTPUT_Y_ASCII
LD R0 0x3A
CALL F_OUTPUT_R0
NOP
.lbl END_W_CMD_NEWLINE
NOP

LD R0 0x7F
CALL F_OUTPUT_R0
LD R0 0x2E
CALL F_OUTPUT_R0
JMP W_CMD_LOOP

.lbl END_W_CMD_0
NOP
LD R0 0x0A
CALL F_OUTPUT_R0
JMP MAINLOOP
.lbl END_W_CMD_1


TST R1 0x67# Command: g (Jump to address)
JE g_CMD
JMP END_g_CMD

.glbl g_CMD
# Get address
LD X 0x000FD2
CALL F_INPUT_PTR_ASCII
# Jump to address
JMP (FUNCTIONSPACE3)
.lbl END_g_CMD

TST R1 0x63# Command: c (Call function)
JE c_CMD
JMP END_c_CMD

.glbl c_CMD
# Get address
LD X 0x000FD2
CALL F_INPUT_PTR_ASCII
# Jump to address
CALL (FUNCTIONSPACE3)
LD R0 0x0A
CALL F_OUTPUT_R0
.lbl END_c_CMD

TST R1 0x62# Command: b (enter binary mode)
JE b_CMD
JMP END_b_CMD

.glbl b_CMD
# This mode is useful for uploading and downloading larger data chunks
# R1 holds the command
# 0x00 response means sucess
# 0x55 response means ready
# 0xFF response means failure
CALL F_INPUT_R0
PUSH R0
POP R1

TST R1 0x01# test if binary mode is ready
JE b_CMD_ping
JMP END_b_CMD_ping

.lbl b_CMD_ping
LD R0 0x00
CALL F_OUTPUT_R0
JMP b_CMD
.lbl END_b_CMD_ping

TST R1 0x02# read a single value
JE b_CMD_sread
JMP END_b_CMD_sread

.lbl b_CMD_sread
CALL F_INPUT_R0
ST R0 (FUNCTIONSPACE0)
CALL F_INPUT_R0
ST R0 (FUNCTIONSPACE1)
CALL F_INPUT_R0
ST R0 (FUNCTIONSPACE2)
LD X (FUNCTIONSPACE0)
LD R0 (X)
CALL F_OUTPUT_R0
LD R0 0x00
CALL F_OUTPUT_R0
JMP b_CMD
.lbl END_b_CMD_sread


TST R1 0x03# write a single value
JE b_CMD_swrite
JMP END_b_CMD_swrite

.lbl b_CMD_swrite
CALL F_INPUT_R0
ST R0 (FUNCTIONSPACE0)
CALL F_INPUT_R0
ST R0 (FUNCTIONSPACE1)
CALL F_INPUT_R0
ST R0 (FUNCTIONSPACE2)
CALL F_INPUT_R0
ST R0 ((FUNCTIONSPACE0))
LD R0 0x00
CALL F_OUTPUT_R0
JMP b_CMD
.lbl END_b_CMD_swrite

TST R1 0x04# read multiple values
JE b_CMD_mread
JMP END_b_CMD_mread

.lbl b_CMD_mread
# Get start address of Y
CALL F_INPUT_R0
ST R0 (FUNCTIONSPACE0)
CALL F_INPUT_R0
ST R0 (FUNCTIONSPACE1)
CALL F_INPUT_R0
ST R0 (FUNCTIONSPACE2)
LD Y (FUNCTIONSPACE0)
# Get end address
CALL F_INPUT_R0
ST R0 (FUNCTIONSPACE0)
CALL F_INPUT_R0
ST R0 (FUNCTIONSPACE1)
CALL F_INPUT_R0
ST R0 (FUNCTIONSPACE2)
NOP
.lbl b_CMD_mread_loop
LD R0 (Y)
CALL F_OUTPUT_R0
JE Y (FUNCTIONSPACE0) b_CMD_mread_loop_end
INC Y
JMP b_CMD_mread_loop
.lbl b_CMD_mread_loop_end
LD R0 0x00
CALL F_OUTPUT_R0
JMP b_CMD
.lbl END_b_CMD_mread

TST R1 0x05# write multiple values
JE b_CMD_mwrite
JMP END_b_CMD_mwrite

.lbl b_CMD_mwrite
# Get start address of Y
CALL F_INPUT_R0
ST R0 (FUNCTIONSPACE0)
CALL F_INPUT_R0
ST R0 (FUNCTIONSPACE1)
CALL F_INPUT_R0
ST R0 (FUNCTIONSPACE2)
LD Y (FUNCTIONSPACE0)
# Get end address
CALL F_INPUT_R0
ST R0 (FUNCTIONSPACE3)
CALL F_INPUT_R0
ST R0 (FUNCTIONSPACE4)
CALL F_INPUT_R0
ST R0 (FUNCTIONSPACE5)
NOP
.lbl b_CMD_mwrite_loop
CALL F_INPUT_R0
ST R0 (Y)
JE Y (FUNCTIONSPACE3) b_CMD_mwrite_loop_end
# Indicate being ready
LD R0 0x55
CALL F_OUTPUT_R0
INC Y
JMP b_CMD_mwrite_loop
.lbl b_CMD_mwrite_loop_end
LD R0 0x00
CALL F_OUTPUT_R0
JMP b_CMD
.lbl END_b_CMD_mwrite

TST R1 0x06# exit binary mode
JE b_CMD_exit
JMP END_b_CMD_exit

.lbl b_CMD_exit
LD R0 0x00
CALL F_OUTPUT_R0
JMP END_b_CMD
.lbl END_b_CMD_exit

JMP b_CMD

.lbl END_b_CMD
JMP MAINLOOP
HALT

LD R0 0x00
ADD R0 0x01
JMP- 2L
# Data section
.glbl hello_msg
.strz "Yuki Monitor V1.0\n"

# Include file section
.inc ascii_functions.s

# Save all global labels
.exp_lbl yuki_labels.lbl

And here is a source listing of the assembler

# Title:       Yuki CPU assembler main file
# Author:      R.Lux
# Last edited: 14.01.2018

import codecs
import math
import time

global label_to_address
global address_to_referanced_label
global constant_to_value
   
global PROGRAM_MEMORY_SIZE
global PROGRAM_MEMORY
global PROGRAM_MEMORY_START
global PROGRAM_MEMORY_POINTER
global PROGRAM_MEMORY_END
global current_line






# =================== Look up tables ====================

# This dictionary is used to convert the argument to its corrensponding number
arg_to_id = { "R0" : 0,
              "R1" : 1,
              "R2" : 2,
              "R3" : 3,
              "imm8" : 4,
              "imm24" : 5,
              "(imm24)" : 6,
              "((imm24))" : 7,
              "(X)" : 8,
              "(Y)" : 9,
              "X" : 10,
              "Y" : 11,
              "USP" : 12,
              "SSP" : 13,
              "immL8" : 14}


# =================== Convertion functions ====================
def isint(string):
    val = 0
   
    try:
        val = int(string)
        return True
   
    except:
        return False
       


def byte(number, i):
    return (number & (0xff << (i * 8))) >> (i * 8)



def getitembits(item):
    bits = 0
    if "(" in item:
        # Remove parantheses
        item = item.replace("(", "").replace(")", "")

    if item[1] == "b":
        # Base: binary
        bits = len(item) - 2

    elif item[1] == "d":
        # Base: decimal
        bits = math.log((10 ^ (len(item) - 2) - 1), 2)

    elif item[1] == "x":
        # Base: hexadecimal
        bits = (len(item) - 2) * 4
   
    return bits


def converttoint(string):
    global error

   
    if "(" in string:
        # We are only interested in the value
        string = string.replace("(", "").replace(")", "")
       
    outval = 0
    if(string[0:2] == "0b"):
        # Base: Binary
        outval = int(string[2:], base=2)

    elif(string[0:2] == "0x"):
        # Base: Hexadecimal
        outval = int(string[2:], base=16)
   
    elif(string[0:2] == "0d"):
        # Base: Decimal
        outval = int(string[2:], base=10)

    else:
        print("Error: " + displayrealline(currentline) + ": Couldnt recognise value '" + string + "'!")
        error = True

    return outval
       


# =================== Start of assembler  ====================

def load_instructions():
    global instructions
    # Load a table of all instructions and their operands

    # Instruction list format:
    # mnemonic,number of args,argtype0,argtype1,argtype2,instructionnumber
    # argtypes:
    # 0 = register R0
    # 1 = register R1
    # 2 = register R2
    # 3 = register R3
    # 4 = immidiate 8 bit
    # 5 = immidiate 24 bit
    # 6 = indirect 24 bit
    # 7 = double indirect 24 bit
    # 8 = X index
    # 9 = Y index
    # 10 = X
    # 11 = Y
    # 12 = USP
    # 13 = SSP

    instrfile = open("instructions.dat")
    instrlines = instrfile.readlines()

    instructions = []
    for linenum,line in enumerate(instrlines):
        oldline = line
        line = line.split(",")
        instruction = []
        for i in range(0, 6):
            if i == 0:
                instruction.append(line[i])
            else:
                instruction.append(int(line[i]))

        instructions.append(instruction)


def replace_immidiates_labels(ininstruction):
    global error
   
    instruction = ininstruction[:]
    operandlist = [0, 0, 0]
    operandtypelist = [0, 0, 0]

   
    # Replace immidiates and future labels with placeholders
    # First element isnt needed because it represents the instruction itself
    for index, item in enumerate(instruction[1:]):
        # Check if item is a known register
        if item in arg_to_id:
            # There is no value that has to be filled in into the operandlist

            # But we have to save the type of the operand
            operandtypelist[index] = arg_to_id[item]

        elif ("0x" in item) or ("0d" in item) or ("0b" in item):
            # We have an  immidiate value
           
            if (getitembits(item) < 9):
                # 8 bit immidiate
               
                # Save operand first
                operandlist[index] = converttoint(item)

                # Replace with placeholder
                instruction[(index + 1)] = "imm8"

            else:
                # 24 bit immidiate

                if item.count("(") == 0:
                    # direct
                   
                    # Save operand
                    operandlist[index] = converttoint(item)

                    # Replace with placeholder
                    instruction[(index + 1)] = "imm24"
                   
                elif item.count("(") == 1:
                    # single indirect

                    # Remove parantheses
                    item = item.replace("(", "")
                    item = item.replace(")", "")
                   
                    # Save operand
                    operandlist[index] = converttoint(item)
                   
                    # Replace with placeholder
                    instruction[(index + 1)] = "(imm24)"
                   
                elif item.count("(") == 2:
                    # double indirect
                   
                    # Remove parantheses
                    item = item.replace("(", "")
                    item = item.replace(")", "")
                   
                    # Save operand
                    operandlist[index] = converttoint(item)

                    # Replace with placeholder
                    instruction[(index + 1)] = "((imm24))"

                else:
                    print("Error: " + displayrealline(currentline) + ": Couldnt recognise value '" + item + "'!")
                    error = True

        elif (item[(len(item) - 1)] == "L") and (isint(item[:(len(item) - 1)])):
            # It seems to be a relative jump offset
            # Save placeholder
            operandlist[index] = -1

            # Replace with placeholder
            instruction[(index + 1)] = "immL8"

        else:
            # The item is neither a register nor an immidiate value

            # It has to be an label
            # Find out its type
            if item.count("(") == 0:
                # It seems to be an 24 bit direct one
                instruction[(index + 1)] = "imm24"

                # Save -1 as placeholder so when it is put into memory the assembler remembers the invocation address
                operandlist[index] = -1
               
            elif item.count("(") == 1:
                # It seems to be an 24 bit indirect one
                instruction[(index + 1)] = "(imm24)"

                # Save -1 as placeholder so when it is put into memory the assembler remembers the invocation address
                operandlist[index] = -1

            elif item.count("(") == 2:
                # It seems to be an  24 bit double indirect one
                instruction[(index + 1)] = "((imm24))"

                # Save -1 as placeholder so when it is put into memory the assembler remembers the invocation address
                operandlist[index] = -1


    return instruction, operandlist, operandtypelist







def write_instruction_to_memory(instructionid, operandlist, operandtypelist, operandnum, thisinstruction):
    global PROGRAM_MEMORY_POINTER
    global address_to_referenced_label

    PROGRAM_MEMORY[PROGRAM_MEMORY_POINTER] = instructionid
    PROGRAM_MEMORY_POINTER += 1

    for operandindex in range(0, operandnum):
        if operandtypelist[operandindex] == 4:
            PROGRAM_MEMORY[PROGRAM_MEMORY_POINTER] = operandlist[operandindex]
            PROGRAM_MEMORY_POINTER += 1

        if (operandtypelist[operandindex] == 5) or (operandtypelist[operandindex] == 6):
            operand = 0
               
               
            # Check if the operand is immidiate and is therefore already there or if it is unknown at the moment
            if operandlist[operandindex] != -1:
                # Seems like the operand was immidiate
                operand = operandlist[operandindex]

            else:
                # The operand was not defined so its value will be filled in later
                # Save its address and its name (without parantheses) to fill it in later
                address_to_referenced_label[PROGRAM_MEMORY_POINTER] = thisinstruction[(operandindex + 1)].replace("(", "").replace(")", "")
               

                # Use 0 as placeholder for now...
                operand = 0

               
            # Write the operand into memory
            PROGRAM_MEMORY[PROGRAM_MEMORY_POINTER] = byte(operand, 2)
            PROGRAM_MEMORY_POINTER += 1
            PROGRAM_MEMORY[PROGRAM_MEMORY_POINTER] = byte(operand, 1)
            PROGRAM_MEMORY_POINTER += 1
            PROGRAM_MEMORY[PROGRAM_MEMORY_POINTER] = byte(operand, 0)
            PROGRAM_MEMORY_POINTER += 1


        if operandtypelist[operandindex] == 14:
            # We have a relative line jump offset here!

            # Save its address and the line offset
            if "+" in thisinstruction[0]:
                # We should jump forward
                arg = thisinstruction[(operandindex + 1)]

                # Remove trailing L
                arg = arg.replace("L", "")

                # Save offset and current line
                address_to_referenced_line[PROGRAM_MEMORY_POINTER] = (int(arg), currentline)
               
            elif "-" in thisinstruction[0]:
                # We should jump back
                arg = thisinstruction[(operandindex + 1)]

                # Remove trailing L
                arg = arg.replace("L", "")

                # Save offset
                address_to_referenced_line[PROGRAM_MEMORY_POINTER] = (((-1)*int(arg)), currentline)
               

            # Use 0 as placeholder for now...
            operand = 0

            # Write the operand into memory
            PROGRAM_MEMORY[PROGRAM_MEMORY_POINTER] = operand
            PROGRAM_MEMORY_POINTER += 1
   






def compileinstruction(thisinstruction):
    global PROGRAM_MEMORY_POINTER
    global error
    # Split instruction into instruction and operands
    thisinstruction = thisinstruction.split(" ")
    operandnum = len(thisinstruction) - 1 # One string part is the instruction
    instructionid = 300 # No valid instruction id yet

    # Convert immidiates into their abstract forms
    newinstruction, operandlist, operandtypelist = replace_immidiates_labels(thisinstruction)
               
    # Convert operand strings into id numbers
    for index,element in enumerate(newinstruction[1:]):
        operandtypelist[index] = arg_to_id[element]

    # Collect data for the instruction search
    searched_instruction = [newinstruction[0], operandnum]



    # Complete the search mask for the instruction
    for addindex in range(0, 3):
        if operandnum > addindex:
            # Add operandinfo to instruction mask

            operandtype = operandtypelist[addindex]
           
            if operandtype == 14:
                # immL8 is the same value type as imm8
                operandtype = 4

            # Add to mask
            searched_instruction.append(operandtype)
        else:
            # Pad with zeros
            searched_instruction.append(0)

    # Look up if instruction is defined
    for tryelement in instructions:
        if tryelement[0:5] == searched_instruction:
            instructionid = tryelement[5]
            break

    # Instruction wasnt found!
    if instructionid == 300:
        print("Error: " + displayrealline(currentline) + ": Couldnt find instruction " + " ".join(newinstruction))
        error = True

    # Write instruction and operands into memory
    write_instruction_to_memory(instructionid, operandlist, operandtypelist, operandnum, thisinstruction)







def clear_create_tables():
    global label_to_address
    global address_to_referenced_label
    global line_to_address
    global address_to_referenced_line
    global end_to_original_line_numbers
   
    global PROGRAM_MEMORY_SIZE
    global PROGRAM_MEMORY
    global PROGRAM_MEMORY_START
    global PROGRAM_MEMORY_POINTER
    global PROGRAM_MEMORY_END
    global PROGRAM_MEMORY_MAX
    global PROGRAM_MEMORY_MIN
    global PROGRAM_MEMORY_DATA
    global current_line

    # Absolute jumps
    label_to_address = {}
    address_to_referenced_label = {}

    # Relative jumps
    line_to_address = {}
    address_to_referenced_line = {}

    # Original line numbers to end line numbers
    end_to_original_line_numbers = {}
   
    PROGRAM_MEMORY_SIZE = 1024 * 1024 * 16
    PROGRAM_MEMORY = [0] * PROGRAM_MEMORY_SIZE
    PROGRAM_MEMORY_START = 0
    PROGRAM_MEMORY_POINTER = 0
    PROGRAM_MEMORY_END = 0
    PROGRAM_MEMORY_MAX = 0
    PROGRAM_MEMORY_MIN = 0
    PROGRAM_MEMORY_DATA = 0




   
# Internal assembler functions
def internal_org(line):
    global PROGRAM_MEMORY_POINTER
    # Start address of program is given

    # Warn user that he is possibly overwriting his own code
    if PROGRAM_MEMORY_POINTER > converttoint(line[1]):
        print("Warning: " + displayrealline(currentline) + ": origin was set to probably already used memory!")
                   
    PROGRAM_MEMORY_START = converttoint(line[1])
    PROGRAM_MEMORY_POINTER = converttoint(line[1])
    print("Info: Origin at {0:06X}".format(converttoint(line[1])))

   
def internal_dat_str(line):
    global PROGRAM_MEMORY_POINTER
    global PROGRAM_MEMORY_DATA
    # Only a string
    # Get entirestring from input line
    entirestring = " ".join(line[2:])
    entirelist = list(entirestring)

    startpos = entirelist.index('"')
    # Replace first " character with space to find the second one
    entirelist[startpos] = " "
                   

    # Find ending " character
    endpos = entirelist.index('"')


    # Remove spaces added before
    entirelist = entirelist[1:]
    entirelist = entirelist[:(endpos - 1)]

    # Update end position
    endpos = endpos - 1

    found_n = False

                   
    # Copy the string to memory
    for index in range(startpos, endpos):
        endoflist = (index == (endpos - 1))
        val = ord(entirelist[index])
                       
        if not endoflist:
            nextval = ord(entirelist[(index + 1)])
        else:
            nextval = 0


        if found_n:
            found_n = False
            return

        else:
            PROGRAM_MEMORY[PROGRAM_MEMORY_POINTER] = val
            PROGRAM_MEMORY_POINTER += 1
            PROGRAM_MEMORY_DATA += 1
                           
        if (val == 92) and (nextval == 110) and not endoflist:
            # We have an \n (linefeed)
            PROGRAM_MEMORY[PROGRAM_MEMORY_POINTER] = 10
            PROGRAM_MEMORY_POINTER += 1
            PROGRAM_MEMORY_DATA += 1
            found_n = True


                           
def internal_dat_strz(line):
    global PROGRAM_MEMORY_POINTER
    global PROGRAM_MEMORY_DATA
    global error
    # It seems to be an zero-terminated string
                   
    # Get entirestring from input line
    entirestring = " ".join(line[1:])
    entirelist = list(entirestring)

    startpos = entirelist.index('"')
    # Replace first " character with space to find the second one
    entirelist[startpos] = " "
                   

    # Find ending " character
    try:
        endpos = entirelist.index('"')
    except ValueError:
        print("Error: " + displayrealline(currentline) + ": Unclosed string '" + "".join(entirelist) + "'")
        error = True


    # Remove spaces added before
    entirelist = entirelist[1:]
    entirelist = entirelist[:(endpos - 1)]

    # Update end position
    endpos = endpos - 1

    found_n = False

                   
    # Copy the string to memory
    for index in range(startpos, endpos):
        endoflist = (index == (endpos - 1))
        val = ord(entirelist[index])
                       
        if not endoflist:
            nextval = ord(entirelist[(index + 1)])
        else:
            nextval = 0


        if found_n:
            found_n = False
            break
                           
        if (val == 92) and (nextval == 110) and not endoflist:
            # We have an \n (linefeed)
            PROGRAM_MEMORY[PROGRAM_MEMORY_POINTER] = 10
            PROGRAM_MEMORY_POINTER += 1
            PROGRAM_MEMORY_DATA += 1
            found_n = True

        else:
            PROGRAM_MEMORY[PROGRAM_MEMORY_POINTER] = val
            PROGRAM_MEMORY_POINTER += 1
            PROGRAM_MEMORY_DATA += 1

                           


    # Store trailing 0
    PROGRAM_MEMORY[PROGRAM_MEMORY_POINTER] = 0
    PROGRAM_MEMORY_POINTER += 1


                   
def internal_dat_lstr(line):
    global PROGRAM_MEMORY_POINTER
    global PROGRAM_MEMORY_DATA
    # It seems to be a length-preceded string

    # Get entirestring from input line
    entirestring = " ".join(line[1:])
    entirelist = list(entirestring)

    startpos = entirelist.index('"')
    # Replace first " character with space to find the second one
    entirelist[startpos] = " "
                   

    # Find ending " character
    endpos = entirelist.index('"')


    # Remove spaces added before
    entirelist = entirelist[1:]
    entirelist = entirelist[:(endpos - 1)]

    # Update end position
    endpos = endpos - 1

    # Store length of string
    PROGRAM_MEMORY[PROGRAM_MEMORY_POINTER] = endpos - startpos
    PROGRAM_MEMORY_POINTER += 1
                   
    # Copy the string to memory
    for index in range(startpos, endpos):
        endoflist = (index == (endpos - 1))
        val = ord(entirelist[index])
                       
        if not endoflist:
            nextval = ord(entirelist[(index + 1)])
        else:
            nextval = 0


        if found_n:
            found_n = False
            return

        else:
            PROGRAM_MEMORY[PROGRAM_MEMORY_POINTER] = val
            PROGRAM_MEMORY_POINTER += 1
            PROGRAM_MEMORY_DATA += 1
                           
        if (val == 92) and (nextval == 110) and not endoflist:
            # We have an \n (linefeed)
            PROGRAM_MEMORY[PROGRAM_MEMORY_POINTER] = 10
            PROGRAM_MEMORY_POINTER += 1
            PROGRAM_MEMORY_DATA += 1
            found_n = True

           
def internal_dat_int(line):
    global PROGRAM_MEMORY_POINTER
    global PROGRAM_MEMORY_DATA
    # It seems to be an 1 byte integer

    # Check if there are multiple data values
    if "," in line[1]:
        # multiple data values
        line[1] = line[1].split(",")

        # Go through all values
        for value in line[1]:
            PROGRAM_MEMORY[PROGRAM_MEMORY_POINTER] = converttoint(value)
            PROGRAM_MEMORY_POINTER += 1
            PROGRAM_MEMORY_DATA += 1

    else:
        # Only a single value
        PROGRAM_MEMORY[PROGRAM_MEMORY_POINTER] = converttoint(line[2])
        PROGRAM_MEMORY_POINTER += 1
        PROGRAM_MEMORY_DATA += 1

       
def internal_dat_int16(line):
    global PROGRAM_MEMORY_POINTER
    global PROGRAM_MEMORY_DATA
    # It seems to be an 2 byte integer

    # Check if there are multiple data values
    if "," in line[1]:
        # multiple data values
        line[1] = line[1].split(",")

        # Go through all values
        for value in line[1]:
            data_value = converttoint(value)
            PROGRAM_MEMORY[PROGRAM_MEMORY_POINTER] = byte(data_value, 1)
            PROGRAM_MEMORY_POINTER += 1
            PROGRAM_MEMORY[PROGRAM_MEMORY_POINTER] = byte(data_value, 0)
            PROGRAM_MEMORY_POINTER += 1
            PROGRAM_MEMORY_DATA += 2

    else:
        # Only a single value
        data_value = converttoint(line[2])
        PROGRAM_MEMORY[PROGRAM_MEMORY_POINTER] = byte(data_value, 1)
        PROGRAM_MEMORY_POINTER += 1
        PROGRAM_MEMORY[PROGRAM_MEMORY_POINTER] = byte(data_value, 0)
        PROGRAM_MEMORY_POINTER += 1
        PROGRAM_MEMORY_DATA += 2

                       
def internal_dat_int24(line):
    global PROGRAM_MEMORY_POINTER
    global PROGRAM_MEMORY_DATA
    # It seems to be an 3 byte integer

    # Check if there are multiple data values
    if "," in line[1]:
        # multiple data values
        line[1] = line[1].split(",")

        # Go through all values
        for value in line[1]:
            data_value = converttoint(value)
            PROGRAM_MEMORY[PROGRAM_MEMORY_POINTER] = byte(data_value, 2)
            PROGRAM_MEMORY_POINTER += 1
            PROGRAM_MEMORY[PROGRAM_MEMORY_POINTER] = byte(data_value, 1)
            PROGRAM_MEMORY_POINTER += 1
            PROGRAM_MEMORY[PROGRAM_MEMORY_POINTER] = byte(data_value, 0)
            PROGRAM_MEMORY_POINTER += 1
            PROGRAM_MEMORY_DATA += 3

    else:
        data_value = converttoint(line[2])
        PROGRAM_MEMORY[PROGRAM_MEMORY_POINTER] = byte(data_value, 2)
        PROGRAM_MEMORY_POINTER += 1
        PROGRAM_MEMORY[PROGRAM_MEMORY_POINTER] = byte(data_value, 1)
        PROGRAM_MEMORY_POINTER += 1
        PROGRAM_MEMORY[PROGRAM_MEMORY_POINTER] = byte(data_value, 0)
        PROGRAM_MEMORY_POINTER += 1
        PROGRAM_MEMORY_DATA += 3

                       
def internal_dat_space(line):
    global PROGRAM_MEMORY_POINTER
    global PROGRAM_MEMORY_DATA
    # Fill the program memory with zero
    zeros = converttoint(line[1])

    for index in range(PROGRAM_MEMORY_POINTER, (PROGRAM_MEMORY_POINTER + zeros)):
        PROGRAM_MEMORY[index] = 0

    PROGRAM_MEMORY_POINTER += zeros
    PROGRAM_MEMORY_DATA += zeros

                   
def internal_dat_spacealign(line):
    global PROGRAM_MEMORY_POINTER
    global PROGRAM_MEMORY_DATA
    # Fill the program memory until the address is a multiple of n

    # n is the operand
    align_n = converttoint(line[1])

    while (PROGRAM_MEMORY_POINTER%align_n) != 0:
        # Fill the space with 0
        PROGRAM_MEMORY[PROGRAM_MEMORY_POINTER] = 0
        PROGRAM_MEMORY_POINTER += 1
        PROGRAM_MEMORY_DATA += 1

    # Done

   
def internal_inc(line):
    global PROGRAM_MEMORY_POINTER
    global currentline
    global filelines
    global total_lines
    global label_to_address
    global end_to_original_line_numbers

    incfilename = line[1]

    try:
        incfile = open(incfilename)
        incfilelines = incfile.readlines()

        # Strip empty lines
        # Remove \n in filelines
        incfilelinestotal = len(incfilelines)
        for i in range(0, incfilelinestotal):
            incfilelines[i] = incfilelines[i].rstrip("\n")
 
        # Remove everything after # because they show the start of comments
        for i in range(0, incfilelinestotal):
            if "#" in incfilelines[i]:
                incfilelines[i] = incfilelines[i].split("#")[0]

        # Calculate of actual lines
        actuallines = len(list(filter(lambda a: a != "", incfilelines)))
       
       
        if incfilename.split(".")[1] == "s":
            # Assembly file
           
            # Shift all line numbers forward
            end_to_original_line_numbers_copy = {}
           
            for endline in end_to_original_line_numbers:
                if endline > currentline:
                    oldvalue = end_to_original_line_numbers[endline]
                    end_to_original_line_numbers_copy[(endline + actuallines)] = oldvalue
                else:
                    end_to_original_line_numbers_copy[endline] = end_to_original_line_numbers[endline]

            end_to_original_line_numbers = end_to_original_line_numbers_copy

           
            # Remove empty lines
            incfilelinesnew = []
           
            for linenum,incfileline in enumerate(incfilelines):
                if incfileline != "":
                    incfilelinesnew.append(incfileline)
                    # Line number in the included file
                    end_to_original_line_numbers[(currentline + len(incfilelinesnew))] = (str(linenum + 1) + ":" + incfilename)

            incfilelines = incfilelinesnew


            # Update number of lines
            total_lines += len(incfilelines)

       
            startline = currentline

       
            for insertindex in range((currentline), (currentline + len(incfilelines))):
                filelines.insert((insertindex + 1), incfilelines[(insertindex - startline)])
               
            currentline = startline


            print("Info: " + displayrealline(currentline) + ": Included file " + incfilename)

        elif incfilename.split(".")[1] == "lbl":
            # Label file
            for incfileline in incfilelines:
                label_to_address[incfileline.split(":")[0]] = int(incfileline.split(":")[1])


            print("Info: " + displayrealline(currentline) + ": Included file " + incfilename)

        else:
            print("Error: " + displayrealline(currentline) + ": Unknown file type ." + incfilename.split(".")[1])
           
           

       
    except FileNotFoundError:
        print("Error: " + displayrealline(currentline) + ": Include file " + incfilename + " not found!")
        error = True


       


def internal_lbl(line):
    global PROGRAM_MEMORY_POINTER
    global error
    # Local labels
    if len(line) == 2:
        # No Address is given so it has to be the current one
        label_to_address[line[1]] = [PROGRAM_MEMORY_POINTER, 0]

    elif len(line) == 3:
        # We have an address here so we use that instead of the current address
        label_to_address[line[1]] = [converttoint(line[2]), 0]
                   
    else:
        print("Error: " + displayrealline(currentline) + ": Unknown arguments for " + " ".join(line))
        error = True


def internal_glbl(line):
    global PROGRAM_MEMORY_POINTER
    global error
    # Global labels(will be saved in a file later but can be used just the same)
    if len(line) == 2:
        # No Address is given so it has to be the current one
        label_to_address[line[1]] = [PROGRAM_MEMORY_POINTER, 1]

    elif len(line) == 3:
        # We have an address here so we use that instead of the current address
        label_to_address[line[1]] = [converttoint(line[2]), 1]
                   
    else:
        print("Error: " + displayrealline(currentline) + ": Unknown assembler instruction " + " ".join(line))
        error = True
   

def internal_exp_lbl(line):
    filename = line[1]
    labelfile = open(filename, "w")

    for label in label_to_address:
        # Only export global labels
        if label_to_address[label][1] == 1:
            labelfile.write(label + ":" + str(label_to_address[label][0]) + "\n")

    print("Info: " + displayrealline(currentline) + ": Created label file " + filename)
    labelfile.close()



# String to internal assembler function table
internal_funcs = { "org"            : internal_org,
                   "str"            : internal_dat_str,
                   "strz"           : internal_dat_strz,
                   "lstr"           : internal_dat_lstr,
                   "int"            : internal_dat_int,
                   "int16"          : internal_dat_int16,
                   "int24"          : internal_dat_int24,
                   "space"          : internal_dat_space,
                   "spacealign"     : internal_dat_spacealign,
                   "inc"            : internal_inc,
                   "lbl"            : internal_lbl,
                   "glbl"           : internal_glbl,
                   "exp_lbl"        : internal_exp_lbl}



def displaystats(asmfilename, outfilename):
    glabels = 0
    llabels = 0
   
    for label in label_to_address:
        if label_to_address[label][1] == 0:
            llabels += 1
        elif label_to_address[label][1] == 1:
            glabels += 1

           
    # Give some info about the assembly
    print(" ===== Assemlby stats ===== ")
    print("Input file      : " + asmfilename)
    print("Output file     : " + outfilename)
    print("Total lines     : " + str(total_lines))
    print("Global labels   : " + str(glabels))
    print("Local labels    : " + str(llabels))
    print("Total bytes     : " + str(PROGRAM_MEMORY_MAX - PROGRAM_MEMORY_MIN))
    print("Data bytes      : " + str(PROGRAM_MEMORY_DATA))
    print("Assembly time   : " + str((time.time() - starttime) // 1) + " s")
    print("End status      : ", end="")

    if error:
        print("not sucessful")
    else:
        print("sucessful")

def displayrealline(endline):
    try:
        originalline,originfile = end_to_original_line_numbers[endline].split(":")
    except KeyError:
        print(endline)
        originfile = "???"
        originalline = 0
    return((originfile + ": L." + str(originalline)))


   
def compilefile(asmfilename, outfilename):
    global PROGRAM_MEMORY_START
    global PROGRAM_MEMORY_POINTER
    global PROGRAM_MEMORY_MAX
    global PROGRAM_MEMORY_MIN
    global PROGRAM_MEMORY_DATA
    global PROGRAM_MEMORY
    global currentline
    global end_to_original_line_numbers
    global total_lines
    global filelines
    global error
    global starttime

    starttime = time.time()
   
    clear_create_tables()
    asmfile = open(asmfilename)
    filelines = asmfile.readlines()
   
    if outfilename[:3] == "bin":
        # Access outfile as binary file
        outfile = open(outfilename, "wb")
    else:
        # Access outfile as regular file
        outfile = open(outfilename, "w")

    # Load instructions from file
    load_instructions()
   

    # Remove \n in filelines
    for i in range(0, len(filelines)):
        filelines[i] = filelines[i].rstrip("\n")

    # Remove everything after # because they show the start of comments
    for i in range(0, len(filelines)):
        if "#" in filelines[i]:
            filelines[i] = filelines[i].split("#")[0]

    # Remove empty lines
    filelinesnew = []
    for linenum,fileline in enumerate(filelines):
        if fileline != "":
            filelinesnew.append(fileline)
            # Line number in the main file
            end_to_original_line_numbers[(len(filelinesnew) - 1)] = (str(linenum + 1) + ":" + asmfilename)
           

    filelines = filelinesnew

    error = False
   
    print("Info: First pass...")        
    # First pass: compile code using placeholders for labels

    total_lines = len(filelines)
    currentline = 0
    while currentline < total_lines:
        # always save PROGRAM_MEMORY_POINTER for the current line
        line_to_address[currentline] = PROGRAM_MEMORY_POINTER
        fileline = filelines[currentline]
       
       
        if fileline[0] == ".":
            # Control words all start with a .

            # Split fileline at the spaces
            fileline = fileline.split(" ")

            # Remove .
            command = fileline[0][1:]


            try:
                # Look up function associated with instruction
                func = internal_funcs[command]
                func(fileline)

            except KeyError:
                print("Error: " + displayrealline(currentline) + ": Couldnt find internal assembler instruction" + " ".join(fileline))
                error = True
       
        else:
            # It seems to be a real machine instruction so assemble it!
            compileinstruction(fileline)

        # Go to next line
        currentline += 1

        # remember lowest and highest addresses
        if PROGRAM_MEMORY_POINTER < PROGRAM_MEMORY_MIN:
            PROGRAM_MEMORY_MIN = PROGRAM_MEMORY_POINTER


        if PROGRAM_MEMORY_POINTER > PROGRAM_MEMORY_MAX:
            PROGRAM_MEMORY_MAX = PROGRAM_MEMORY_POINTER


                           

    print("Info: Second pass...")
    # Second pass part 1: replace label placeholders using collected info
    for address in address_to_referenced_label:
        referenced_label = address_to_referenced_label[address]
       
        if referenced_label in label_to_address:
            # We found the value of the label!
            # Replace the placeholders!
            value = label_to_address[referenced_label][0]

            PROGRAM_MEMORY[address] = byte(value, 2)
            PROGRAM_MEMORY[(address + 1)] = byte(value, 1)
            PROGRAM_MEMORY[(address + 2)] = byte(value, 0)

        else:
            print("Error: " + displayrealline(currentline) + ": Unknown label " + referenced_label)
            error = True

   
    # Second pass part 2: replace line jump offsets
    for address in address_to_referenced_line:
        offset, line = address_to_referenced_line[address]

        # Check whether we have to jump forward or  back
        if offset < 0:
            # We have to jump back
            targetline = line + offset

            # Search the address of the target line
            destination_addr = line_to_address[targetline]

            # Search the address of the source line
            source_addr = line_to_address[line]

            # Calculate offset
            real_offset = (source_addr - destination_addr)

           
            # Replace placeholder with real offset
            PROGRAM_MEMORY[address] = real_offset

        else:
            # We have to jump forward
            targetline = line + offset

            # Search the address of the target line
            destination_addr = line_to_address[targetline]

            # Search the address of the source line
            source_addr = line_to_address[(line + 1)] - 1

            # Calculate offset
            real_offset = (destination_addr - source_addr)


            # Replace placeholder with real offset
            PROGRAM_MEMORY[address] = real_offset
           
           


    PROGRAM_MEMORY_END = PROGRAM_MEMORY_POINTER
    PROGRAM_MEMORY_SIZE = PROGRAM_MEMORY_END - PROGRAM_MEMORY_START

       

       
    print("Info: Writing to file...")
    # Now write output to file...
    if outfilename[-3:] == "bin":
        # Raw binary out
        for i in range(PROGRAM_MEMORY_START, PROGRAM_MEMORY_END):
            outfile.write(PROGRAM_MEMORY[i])

    elif outfilename[-3:] == "mif":
        # Memory initiation file(used by altera quartus)

        # Start with header...
        outfile.write("DEPTH = " + str(PROGRAM_MEMORY_SIZE) + ";\n")
        outfile.write("WIDTH = 8;\n")
        outfile.write("ADDRESS_RADIX = UNS;\n")
        outfile.write("DATA_RADIX = UNS;\n")
        outfile.write("CONTENT\nBEGIN\n0 : ")

        # Write data now
        for i in range(PROGRAM_MEMORY_START, PROGRAM_MEMORY_END):
            outfile.write(str(PROGRAM_MEMORY[i]) + " ")

        # End memory file
        outfile.write(";\nEND;")

    else:
        print("Error: Unknown memory file type " + outfilename[-4:])


    # Summarize results
    displaystats(asmfilename, outfilename)

   
    # Close files
    asmfile.close()
    outfile.close()
       
       
           
           
compilefile("yuki_monitor.s", "system_ram_content.mif")