-src = $(wildcard src/*.asm)
+src = $(wildcard src/boot/*.asm) $(wildcard src/*.asm)
+obj = $(src:.asm=.o)
data = data/sprsheet.inc
-bin = game
+name = game
+elf = $(name).elf
+bin = $(name).bin
-QEMU_FLAGS = -fda floppy.img -serial file:serial.log -soundhw sb16 -d guest_errors
+ASFLAGS = -f elf32
+LDFLAGS = -m elf_i386 -T game.ld -print-gc-sections
+
+QEMU_FLAGS = -fda floppy.img -serial file:serial.log -d guest_errors
.PHONY: all
all: floppy.img
-bootldr: src/boot/boot.asm $(bin)
- nasm -o $@ -f bin -DBINSIZE=`ls -l $(bin) | awk '{print $$5}'` $<
-
-$(bin): $(src) $(data)
- nasm -o $@ -f bin -i src/ src/main.asm
+floppy.img: boot.img
+ dd of=$@ if=/dev/zero bs=512 count=2880
+ dd of=$@ if=$< conv=notrunc
-boot.img: bootldr $(bin)
+boot.img: bootldr.bin $(bin)
cat $^ >$@
-floppy.img: boot.img
- dd of=$@ if=/dev/zero bs=512 count=2880
- dd of=$@ if=$< bs=1 conv=notrunc
+bootldr.bin: $(elf)
+ objcopy -O binary -j '.boot*' $< $@
+
+$(bin): $(elf) $(data)
+ objcopy -O binary -R '.boot*' $< $@
+
+$(elf): $(obj)
+ $(LD) -o $@ $(obj) -Map link.map $(LDFLAGS)
+
+%.o: %.asm
+ nasm -o $@ $(ASFLAGS) $<
data/sprsheet.inc: data/sprsheet.png
img2tiles -o $@ -n -t 32x32 $<
.PHONY: clean
clean:
- rm -f $(bin) bootldr floppy.img boot.img
+ rm -f $(bin) $(obj) bootldr.img floppy.img boot.img
.PHONY: disasm
disasm: bootldr.disasm $(bin).disasm
-bootldr.disasm: bootldr
+bootldr.disasm: bootldr.img
ndisasm -b 16 -o 7c00h $< >$@
-$(bin).disasm: $(bin)
- ndisasm -b 16 -o 7e00h $< >$@
+$(name).disasm: $(elf)
+ objdump -d $< -j .text -m i386 >$@
+
+$(name).sym: $(elf)
+ objcopy --only-keep-debug $< $@
.PHONY: run
-run: $(bin)
+run: floppy.img
qemu-system-i386 $(QEMU_FLAGS)
.PHONY: debug
-debug: $(bin)
+debug: floppy.img
qemu-system-i386 $(QEMU_FLAGS) -s -S
+
+.PHONY: sym
+sym: $(name).sym
--- /dev/null
+; vi:filetype=nasm ts=8 sts=8 sw=8:
+; second stage boot loader
+ bits 16
+ section .boot2
+
+LOADADDR equ 100000h
+DRIVENO_ADDR equ 7bf0h
+
+ extern _boot2_size
+ extern _main_size
+ extern sect_per_track
+ extern num_heads
+
+boot2_start:
+ cli
+
+ xor eax, eax
+ mov al, [DRIVENO_ADDR]
+
+ call setup_serial
+
+ ; enter unreal mode
+ call unreal
+
+ mov al, 10
+ call ser_putchar
+
+ ; enable A20 address line
+ call enable_a20
+
+ ; load program into memory starting at 1MB
+ call load_main
+
+ ; switch video mode, can't do that easily from protected mode
+ mov ax, 13h
+ int 10h
+
+ ; load GDT and IDT
+ lgdt [gdt_lim]
+ lidt [idt_lim]
+
+ ; enter protected mode
+ mov eax, cr0
+ or eax, 1
+ mov cr0, eax
+ ; inter-segment jump to set cs selector to segment 1
+ jmp 0x8:.pmode
+
+ bits 32
+.pmode: ; set all data selectors to segment 2
+ mov ax, 10h
+ mov ds, ax
+ mov ss, ax
+ mov es, ax
+ mov gs, ax
+ mov fs, ax
+
+ jmp LOADADDR
+
+ align 4
+gdt_lim: dw 23
+gdt_base: dd gdt
+
+ align 4
+idt_lim: dw 111
+idt_base: dd idt
+
+ align 8
+gdt: ; 0: null segment
+ dd 0
+ dd 0
+ ; 1: code - base:0, lim:4g, G:4k, 32bit, avl, pres|app, dpl:0, type:code/non-conf/rd
+ dd 0000ffffh
+ dd 00cf9a00h
+ ; 2: data - base:0, lim:4g, G:4k, 32bit, avl, pres|app, dpl:0, type:data/rw
+ dd 0000ffffh
+ dd 00cf9200h
+
+ align 8
+idt: times 104 db 0
+ ; trap gate 13: general protection fault
+ dw prot_fault
+ dw 8
+ dw 8f00h ; type: trap, present, default
+ dw 0
+
+gpf_msg: db "GP fault "
+
+prot_fault:
+ mov eax, [esp]
+ shr eax, 3
+ call print_num
+ mov al, ':'
+ call putchar
+ mov eax, [esp + 4]
+ call print_num
+ mov al, 10
+ call putchar
+ hlt
+
+
+ bits 16
+unreal:
+ ; use the same GDT as above, will use data seg: 2
+ lgdt [gdt_lim]
+
+ mov eax, cr0
+ or eax, 1
+ mov cr0, eax
+ jmp .pm
+
+.pm: mov ax, 10h
+ mov ds, ax
+ mov es, ax
+ mov fs, ax
+ mov gs, ax
+ mov ss, ax
+
+ mov eax, cr0
+ and ax, 0fffeh
+ mov cr0, eax
+
+ xor ax, ax
+ mov ds, ax
+ mov es, ax
+ mov fs, ax
+ mov gs, ax
+ mov ss, ax
+ ret
+
+mainsz_msg: db 'Program size: ',0
+mainsz_msg2: db ' (',0
+mainsz_msg3: db ' sectors)',10,0
+
+first_sect: dd 0
+sect_left: dd 0
+cur_track: dd 0
+trk_sect: dd 0
+dest_ptr: dd 0
+
+load_main:
+ mov dword [dest_ptr], LOADADDR
+
+ ; calculate first sector
+ mov eax, _boot2_size
+ add eax, 511
+ shr eax, 9
+ ; add 1 to account for the boot sector
+ inc eax
+ mov [first_sect], eax
+
+ ; calculate the first track (first_sect / sect_per_track)
+ movzx ecx, word [sect_per_track]
+ xor edx, edx
+ div ecx
+ mov [cur_track], eax
+ ; remainder is sector within track
+ mov [trk_sect], edx
+
+ mov esi, mainsz_msg
+ call putstr
+ mov eax, _main_size
+ mov ecx, eax
+ call print_num
+
+ mov esi, mainsz_msg2
+ call putstr
+
+ ; calculate sector count
+ add eax, 511
+ shr eax, 9
+ mov [sect_left], eax
+
+ call print_num
+ mov esi, mainsz_msg3
+ call putstr
+
+ ; read a whole track into the buffer (or partial first track)
+.ldloop:
+ movzx ecx, word [sect_per_track]
+ sub ecx, [trk_sect]
+ push ecx
+ call read_track
+
+ ; copy to high memory
+ mov esi, buffer
+ mov edi, [dest_ptr]
+ mov ecx, [esp]
+ shl ecx, 9
+ add [dest_ptr], ecx
+ shr ecx, 2
+ a32 rep movsd
+
+ inc dword [cur_track]
+ ; other than the first track which might be partial, all the rest start from 0
+ mov dword [trk_sect], 0
+
+ pop ecx
+ sub [sect_left], ecx
+ ja .ldloop
+
+ ; the BIOS might have enabled interrupts
+ cli
+
+ ; if we were loaded from floppy, turn all floppy motors off
+ mov bl, [DRIVENO_ADDR]
+ and bl, 80h
+ jnz .notfloppy
+ mov dx, 3f2h
+ in al, dx
+ and al, 0fh
+ out dx, al
+.notfloppy:
+
+ mov ax, 10
+ call putchar
+
+ ret
+
+rdtrk_msg: db 'Reading track: ',0
+rdcyl_msg: db ' - cyl: ',0
+rdhead_msg: db ' head: ',0
+rdsect_msg: db ' start sect: ',0
+rdlast_msg: db ' ... ',0
+rdok_msg: db 'OK',10,0
+rdfail_msg: db 'failed',10,0
+
+read_retries: dw 0
+
+read_track:
+ ; set es to the start of the destination buffer to allo readin in
+ ; full 64k chunks if necessary
+ mov bx, buffer
+ shr bx, 4
+ mov es, bx
+ xor ebx, ebx
+
+ mov word [read_retries], 3
+
+.try:
+ ; print_track
+ mov esi, rdtrk_msg
+ call putstr
+ mov eax, [cur_track]
+ call print_num
+ mov esi, rdcyl_msg
+ call putstr
+
+ ; calc cylinder (cur_track / num_heads) and head (cur_track % num_heads)
+ mov eax, [cur_track]
+ movzx ecx, word [num_heads]
+ xor edx, edx
+ div ecx
+
+ ; print cylinder
+ push eax
+ call print_num
+ ; print head
+ mov esi, rdhead_msg
+ call putstr
+ movzx eax, dx
+ call print_num
+ pop eax
+
+ ; head on dh
+ mov dh, dl
+
+ ; cylinder low byte at ch and high bits at cl[7, 6]
+ mov ch, al
+ mov cl, ah
+ and cl, 3
+ ror cl, 2
+
+ ; print start sector
+ mov esi, rdsect_msg
+ call putstr
+ mov eax, [trk_sect]
+ call print_num
+ mov esi, rdlast_msg
+ call putstr
+
+ ; start sector (1-based) in cl[0, 5]
+ mov al, [trk_sect]
+ inc al
+ and al, 3fh
+ or cl, al
+
+ ; number of sectors in al
+ mov ax, [esp + 2]
+ ; call number (2) in ah
+ mov ah, 2
+ ; drive number in dl
+ mov dl, [DRIVENO_ADDR]
+ int 13h
+ jnc .success
+
+ ; abort after 3 attempts
+ dec word [read_retries]
+ jz .failed
+
+ ; error, reset controller and retry
+ xor ah, ah
+ int 13h
+ jmp .try
+
+.failed:
+ mov esi, rdfail_msg
+ call putstr
+ jmp abort_read
+
+.success:
+ mov esi, rdok_msg
+ call putstr
+
+ ; reset es to 0 before returning
+ xor ax, ax
+ mov es, ax
+ ret
+
+str_read_error: db 'Read error while reading track: ',0
+
+abort_read:
+ mov esi, str_read_error
+ call putstr
+ mov eax, [cur_track]
+ call print_num
+ mov al, 10
+ call putchar
+
+ cli
+.hlt: hlt
+ jmp .hlt
+
+
+ ; print routines
+cursor_x: dd 0
+cursor_y: dd 0
+
+putchar:
+ o32 pusha
+ call ser_putchar
+
+ cmp al, 10
+ jnz .notlf
+ call video_newline
+ jmp .end
+
+.notlf: push eax
+ mov eax, [cursor_y]
+ mov ecx, 80
+ mul ecx
+ add eax, [cursor_x]
+ mov ebx, eax
+ pop eax
+
+ mov edx, 0b8000h
+
+ mov [ebx * 2 + edx], al
+ mov byte [ebx * 2 + edx + 1], 7
+ inc dword [cursor_x]
+ cmp dword [cursor_x], 80
+ jnz .end
+ call video_newline
+
+.end: o32 popa
+ ret
+
+
+ ; expects string pointer in esi
+putstr:
+ mov al, [esi]
+ cmp al, 0
+ jz .end
+ call putchar
+ inc esi
+ jmp putstr
+.end: ret
+
+ ; expects number in eax
+print_num:
+ ; save registers
+ o32 pusha
+
+ mov esi, numbuf + 16
+ mov byte [esi], 0
+ mov ebx, 10
+.convloop:
+ xor edx, edx
+ div ebx
+ add dl, 48
+ dec esi
+ mov [esi], dl
+ cmp eax, 0
+ jnz .convloop
+
+ call putstr
+
+ ; restore regs
+ o32 popa
+ ret
+
+
+video_newline:
+ mov dword [cursor_x], 0
+ inc dword [cursor_y]
+ cmp dword [cursor_y], 25
+ jnz .end
+ dec dword [cursor_y]
+.end: ret
+
+clearscr:
+ mov edi, 0b8000h
+ ; clear with white-on-black spaces
+ mov eax, 07200720h
+ mov ecx, 1000
+ a32 rep stosd
+ ret
+
+UART_DATA equ 3f8h
+UART_DIVLO equ 3f8h
+UART_DIVHI equ 3f9h
+UART_FIFO equ 3fah
+UART_LCTL equ 3fbh
+UART_MCTL equ 3fch
+UART_LSTAT equ 3fdh
+
+DIV_9600 equ (115200 / 9600)
+LCTL_8N1 equ 03h
+LCTL_DLAB equ 80h
+FIFO_ENABLE_CLEAR equ 07h
+MCTL_DTR_RTS_OUT2 equ 0bh
+LST_TREG_EMPTY equ 20h
+
+setup_serial:
+ ; set clock divisor
+ mov al, LCTL_DLAB
+ mov dx, UART_LCTL
+ out dx, al
+ mov ax, DIV_9600
+ mov dx, UART_DIVLO
+ out dx, al
+ shr ax, 8
+ mov dx, UART_DIVHI
+ out dx, al
+ ; set format 8n1
+ mov al, LCTL_8N1
+ mov dx, UART_LCTL
+ out dx, al
+ ; clear and enable fifo
+ mov al, FIFO_ENABLE_CLEAR
+ mov dx, UART_FIFO
+ out dx, al
+ ; assert RTS and DTR
+ mov al, MCTL_DTR_RTS_OUT2
+ mov dx, UART_MCTL
+ out dx, al
+ ret
+
+ser_putchar:
+ push dx
+ cmp al, 10
+ jnz .notlf
+ push ax
+ mov al, 13
+ call ser_putchar
+ pop ax
+
+.notlf: mov ah, al
+ ; wait until the transmit register is empty
+ mov dx, UART_LSTAT
+.wait: in al, dx
+ and al, LST_TREG_EMPTY
+ jz .wait
+ mov dx, UART_DATA
+ mov al, ah
+ out dx, al
+
+ pop dx
+ ret
+
+ena20_msg: db 'A20 line enabled',13,0
+
+enable_a20:
+ call test_a20
+ jnc .done
+ call enable_a20_kbd
+ call test_a20
+ jnc .done
+ call enable_a20_fast
+ call test_a20
+ jnc .done
+ ; keep trying...
+ jmp enable_a20
+.done:
+ mov esi, ena20_msg
+ call putstr
+ ret
+
+ ; CF = 1 if A20 test fails (not enabled)
+test_a20:
+ mov ebx, 07c000h
+ mov edx, 17c000h
+ mov dword [ebx], 0xbaadf00d
+ mov dword [edx], 0xaabbcc42
+ sub dword [ebx], 0xbaadf00d
+ ret
+
+ ; enable A20 line through port 0x92 (fast A20)
+enable_a20_fast:
+ mov esi, ena20_fast_msg
+ call putstr
+
+ in al, 92h
+ or al, 2
+ out 92h, al
+ ret
+
+ena20_fast_msg: db 'Attempting fast A20 enable',10,0
+
+ ; enable A20 line through the keyboard controller
+KBC_DATA_PORT equ 60h
+KBC_CMD_PORT equ 64h
+KBC_STATUS_PORT equ 64h
+KBC_CMD_RD_OUTPORT equ 0d0h
+KBC_CMD_WR_OUTPORT equ 0d1h
+
+KBC_STAT_OUT_RDY equ 01h
+KBC_STAT_IN_FULL equ 02h
+
+enable_a20_kbd:
+ mov esi, ena20_kbd_msg
+ call putstr
+
+ call kbc_wait_write
+ mov al, KBC_CMD_WR_OUTPORT
+ out KBC_CMD_PORT, al
+ call kbc_wait_write
+ mov al, 0dfh
+ out KBC_DATA_PORT, al
+ ret
+
+ena20_kbd_msg: db 'Attempting KBD A20 enable',10,0
+
+ ; wait until the keyboard controller is ready to accept another byte
+kbc_wait_write:
+ in al, KBC_STATUS_PORT
+ and al, KBC_STAT_IN_FULL
+ jnz kbc_wait_write
+ ret
+
+numbuf: resb 16
+
+
+ ; this part is placed at the very end of all boot sections
+ section .bootend
+
+ ; buffer used by the track loader
+ align 16
+buffer:
; initializes the video hardware and graphics routines
; clear
; clears the framebuffer (not vmem)
-; clobbers: ax, cx, di
+; clobbers: eax, ecx, edi
; swap_buffers
; copies the framebuffer to video memory
-; clobbers: ax, cx, di, si
+; clobbers: eax, ecx, edi, esi
; wait_vsync
; clobbers: al, dx
; set_palette_entry(idx[al], r[ah], g[bl], b[bh])
; colors are 0-255
+ bits 32
+ section .text
-VIDMEM_SEG equ 0a000h
-FRAMEBUF_SEG equ 09000h
+VIDMEM_ADDR equ 0a0000h
+FRAMEBUF_ADDR equ 090000h
REG_CRTC_STATUS equ 3dah
CRTC_VBLANK_BIT equ 08h
REG_DAC_ADDR equ 3c8h
REG_DAC_DATA equ 3c9h
+ extern sprsheet_cmap
+ extern sprsheet_tiles
+
+
+ global init_gfx
init_gfx:
- ; video mode 13h (320x200 8bpp)
- mov ax, 13h
- int 10h
call clear
; setup the spritesheet palette
- mov si, sprsheet_cmap
+ mov esi, sprsheet_cmap
xor cl, cl
.cmaploop:
mov al, cl
- mov ah, [si]
- mov bl, [si + 1]
- mov bh, [si + 2]
- add si, 3
+ mov ah, [esi]
+ mov bl, [esi + 1]
+ mov bh, [esi + 2]
+ add esi, 3
call set_palette_entry
- dec cl
+ inc cl
jnz .cmaploop
-
ret
+
+ global clear
clear:
- push es
- mov ax, FRAMEBUF_SEG
- mov es, ax
- xor di, di
- xor ax, ax
- mov cx, 16000
+ mov edi, FRAMEBUF_ADDR
+ xor eax, eax
+ mov ecx, 16000
rep stosd
- pop es
ret
+ global swap_buffers
swap_buffers:
- push ds
- push es
- mov ax, FRAMEBUF_SEG
- mov ds, ax
- xor si, si
- mov ax, VIDMEM_SEG
- mov es, ax
- xor di, di
- mov cx, 16000
+ mov esi, FRAMEBUF_ADDR
+ mov edi, VIDMEM_ADDR
+ mov ecx, 16000
rep movsd
- pop es
- pop ds
ret
+ global wait_vsync
wait_vsync:
mov dx, REG_CRTC_STATUS
.wait_vblank_end:
pop dx
ret
- ; slow_sprite(short id, short x, short y)
+ ; slow_sprite(int id, int x, int y)
; assumptions: 32x32, one after the other, 0 is transparent
- ; XXX sprsheet needs to go to its own segment
+ global slow_sprite
slow_sprite:
- push bp
- mov bp, sp
+ push ebp
+ mov ebp, esp
pusha
- mov ax, FRAMEBUF_SEG
- mov es, ax
- mov ax, [bp + 8] ; ax <- y
- sub ax, 16 ; ax <- y - 16 (center sprite vertically)
- mov bx, ax
- shl ax, 8
- shl bx, 6
- add ax, bx ; ax <- (y - 16) * 320
- mov di, [bp + 6] ; di <- x
- sub di, 16 ; di <- x - 16 (center sprite horizontally)
- add di, ax ; di <- (y - 16) * 320 + (x - 16)
-
- mov si, sprsheet_tiles
+ mov eax, [ebp + 16] ; ax <- y
+ sub eax, 16 ; ax <- y - 16 (center sprite vertically)
+ mov ebx, eax
+ shl eax, 8
+ shl ebx, 6
+ add eax, ebx ; ax <- (y - 16) * 320
+ mov edi, [ebp + 12] ; di <- x
+ sub edi, 16 ; di <- x - 16 (center sprite horizontally)
+ add edi, eax ; di <- (y - 16) * 320 + (x - 16)
+ add edi, FRAMEBUF_ADDR
+
+ mov esi, sprsheet_tiles
; calculate sprite id offset (each spr is 32*32=1k)
- mov ax, [bp + 4]
- shl ax, 10
- add si, ax
+ mov eax, [ebp + 8]
+ shl eax, 10
+ add esi, eax
- mov cx, 32
+ mov ecx, 32
.yloop:
- xor bx, bx
+ xor ebx, ebx
.xloop:
- mov al, [si]
+ mov al, [esi]
cmp al, 0
;jz .skip_pixel
- mov [es:di + bx], al
+ mov [edi + ebx], al
.skip_pixel:
- inc si
- inc bx
- cmp bx, 32
+ inc esi
+ inc ebx
+ cmp ebx, 32
jnz .xloop
- add di, 320
- dec cx
+ add edi, 320
+ dec ecx
jnz .yloop
popa
- pop bp
+ pop ebp
ret