	TITLE	FASTMULP - Fast multisided convex poly filling blitter
	NAME	FMULTPX


	COMMENT	$

	Name:		FMULTPX

		Written and (c) by Dave Stampe 27/5/92
		Not for commercial use, so get permission
		before marketing code using this stuff!
		For private PD use only.

		$

		.MODEL LARGE

		.DATA

lverts		dd 40 dup (0AAh)
rverts		dd 40 dup (0BBh)

		.CODE
				; big table more eff. than masking
				; start byte lookup table
stmask:		REPT	80
		db	15,14,12,8
		ENDM
				; end byte lookup table
fnmask:         REPT	80
		db      1,3,7,15
		ENDM

	extrn	_dpaddr		; page base address

nv	equ	[bp+6]          ; arguments to _tpoly
verts	equ	[bp+8]
color	equ	[bp+12]

vline   equ	[bp-2]   	; video base addr. of line
lines	equ	[bp-4]		; number of lines to fill
l_incr	equ	[bp-8]
r_incr	equ	[bp-12]
aswap   equ     [bp-16]		; plane mask reg. swap value
lptr	equ	[bp-18]
rptr	equ	[bp-20]
cur_y	equ	[bp-22]


   ;
   ;   fastpoly(int count, int far *vertices)
   ;       vertices must be in CCW order and x,y order!

		PUBLIC	_fastpoly

_fastpoly	proc	far

	.386
	push	bp
	mov	bp,sp
	sub	sp,30
	push	si
	push	di
	push	dx
	push	cx         ; CREATE L, R SIDE LISTS OF VERTICES

	les	si,verts         ; find lowest y (any vertex on top of poly)
	mov	cx,nv
	mov	ax,30000
fmin:
	cmp	ax,es:[si+2]
	jl	notlower
	mov	ax,es:[si+2]
	mov	bx,si
	mov	dx,cx
notlower:
	add	si,4
	loop	fmin

	mov	cx,dx
	mov	si,bx
	mov	di,OFFSET lverts
clsloop:                            ; copy from lowest to end
	mov	eax,es:[si]
	mov	ds:[di],eax
	add	si,4
	add	di,4
	loop	clsloop

	mov	si,verts
cleloop:                            ; copy from start to lowest
	mov	eax,es:[si]
	mov	ds:[di],eax
	add	si,4
	add	di,4
	cmp	si,bx
	jb	cleloop

	mov	si,OFFSET lverts    ; right list in reverse order
	mov     lptr,si
	mov	di,OFFSET rverts    ; except first element same
	mov     rptr,di
	mov	eax,ds:[si]
	mov	ds:[di],eax
	add	si,4
	add	di,4
	mov	cx,nv
	dec	cx
	add	di,cx
	add	di,cx
	add	di,cx
	add	di,cx
	mov	ax,-1
	mov	ds:[di+2],ax	    ; "stopper"
crrloop:                            ; copy remaining elements
	sub	di,4
	mov	eax,ds:[si]
	mov	ds:[di],eax
	add	si,4
	loop	crrloop
	mov	ax,-1
	mov	WORD PTR ds:[si+4],-1
			      ; VIDEO SETUP
	cld
	mov	ax,03c5h		; setup DX adr. swap value
	mov	aswap,ax
	mov	ax,0a000h               ; set video segment
	mov	es,ax
	mov	bh,0FFh			; load color to latches
	mov	bl, BYTE PTR color
	mov	al,es:[bx]
	mov	ax,WORD PTR [lverts+2]           ; set current line
	mov     cur_y,ax
	mov	bl,80                   ; compute starting line adr
	mul	bl
	add	ax,WORD PTR ds:_dpaddr
	mov	vline,ax

	mov	cx,nv          ; FIND FIRST EDGES IN POLY
	dec	cx
	mov	bx,lptr
lslinit:
	push	cx
	call	near ptr slope    ; find first left edge
	pop	cx
	jnz	lfinit
	add	bx,4
	loop	lslinit

	pop	cx                ; unable to find next line: exit
	pop	dx
	pop	di
	pop	si
	mov	sp,bp
	pop	bp
	ret

lfinit:
	mov	l_incr,eax
	mov	lptr,bx

	mov	dx,[bx]           ; compute L start
	shl	edx,16
;	add	edx,08000h        ; force left side to round up

	mov	bx,rptr
rslinit:
	call	near ptr slope    ; find first right edge
	jnz	rfinit
	add	bx,4
	jmp	rslinit
rfinit:
	mov	r_incr,eax
	mov	rptr,bx

	mov	si,[bx]         ; compute R start
	shl	esi,16

				; POLY SLICE LOOP
sliceloop:
	mov	bx,rptr
	mov	ax,[bx+6]	; bottom of right edge
	mov	bx,lptr
	cmp	ax,[bx+6]	; bottom of left edge
	jb	fbots
	mov	ax,[bx+6]
fbots:
	mov	cx,ax
	sub	cx,cur_y                ; figure line count
	mov	cur_y,ax
	mov	lines,cx
	call	near ptr trapezoid	; draw it

	mov	bx,lptr
	mov	ax,cur_y
	cmp     ax,[bx+6]		; left edge ended?
	jnz	not_left

fll:
	add	bx,4
	call	near ptr slope
	jl	end_poly		; no more left edges
	jnz	fleft
	jmp	fll
fleft:
	mov	l_incr,eax
	mov	lptr,bx
	mov	dx,[bx]           ; compute L start
	shl	edx,16
;	add	edx,08000h        ; force left side to round up

not_left:
	mov	bx,rptr
	mov	ax,cur_y
	cmp	ax,[bx+6]
	jnz	not_right

frl:
	add	bx,4
	call	near ptr slope
	jl	end_poly	  ; no more left edges
	jnz	freft
	jmp	frl
freft:
	mov	r_incr,eax
	mov	rptr,bx
	mov	si,[bx]           ; compute R start
	shl	esi,16

not_right:
	jmp	sliceloop

end_poly:
	pop	cx
	pop	dx
	pop	di                ; exit code
	pop	si
	mov	sp,bp
	pop	bp
	ret

_fastpoly	endp





trapezoid:	; call with edx = (left+0.5)>>16, esi = right<<16
		; vline, lines, l_incr, r_incr all set up

	mov	ebx,edx			; convert fixed pt to integer
	sar	ebx,16
	mov	ecx,esi
	sar	ecx,16

nextline:
		; start of fast h line blitter:
		;  bx=left side, cx=right side, vline=line start

	xchg	dx,aswap

	mov	al,BYTE PTR cs:[bx+stmask]  ; left mask
	shr	bx,2                        ; left address

	mov	di,cx
	mov	ah,BYTE PTR cs:[di+fnmask]  ; right mask
	shr	cx,2                        ; right address

	mov	di,vline		    ; start address
	add	di,bx
	sub	cx,bx                       ; number of bytes-1
	je	short onebyte
	jc	short doneline              ; clip trap

	cmp	cx,8              ; test if big enough for word speedup
	jge	faster

	out	dx,al
	stosb		                    ; mask first byte
	dec	cx
	jz	nomore                        ; mask rest
	cmp	al,0fh
	je	noneed
	mov	al,0fh                     ; rep faster than test and jmp
	out	dx,al
noneed:
	rep	stosb
nomore:
	mov	al,ah
	out	dx,al
	mov	es:[di],ah                  ; mask last byte
	jmp	short doneline

faster:
	out	dx,al
	stosb		                    ; mask first byte
	dec	cx                          ; mask rest
	cmp	al,0fh
	je	noneed2
	mov	al,0fh                     ; rep faster than test and jmp
	out	dx,al
noneed2:
	test	di,1
	jz	notodd
	stosb
	dec	cx
notodd:
	mov	bx,cx
	shr	cx,1
	rep	stosw
	test	bx,1
	jz	noteodd
	stosb
noteodd:
	mov	al,ah
	out	dx,al
	mov	es:[di],al
	jmp	short doneline

onebyte:
	and	al,ah
	out	dx,al
	mov	es:[di],al		; single byte mask

doneline:
	xchg	dx,aswap
	mov	ax,80                   ; next line address
	add	vline,ax

	add	edx,DWORD PTR l_incr    ; step left, right edges
	add	esi,DWORD PTR r_incr
	mov	ebx,edx			; convert fixed pt to integer
	sar	ebx,16
	mov	ecx,esi
	sar	ecx,16

	dec	WORD PTR lines          ; done lines?
	jg	nextline

donetri:                                ; finished all drawing
exit:
	retn



slope:	; [bx] is top, [bx+4] is bottom vertex in pair
	; returns slope<<16 in eax, line count in cx
	; Z set if vertices on same line

	push	edx
	movzx	ecx,word ptr ds:[bx+6]
	sub	cx,ds:[bx+2]
	jle	round
	mov	ax,ds:[bx+4]
	sub	ax,ds:[bx]
	movsx	eax,ax      ; conv. to double prec. << 16
	je	round       ; zero slope
	cmp	cx,1
	je	round
	cdq
	shl	eax,16	    ; (x2-x1)/(y2-y1)
	idiv	ecx
	cmp    	eax,0       ; round up if pos (neg already rounded up)
	jle	round
	inc	eax
round:
	pop	edx
	or	cx,cx       ; Z set if on same line
	retn

	end
