Mandelbrot Generator
Posté le 15/06/2019 10:38
Hello allô
Default 1x zoom takes 7sec
Max zoom takes around 5-10min
It has a max zoom of 2^50: over one Quadrillion!
Going over 2^48 can be rather buggy
This is because numbers are limited to the 8 byte double variables
Attached file is both SH4 and SH3 compatible:
MANDEL.G1A
This does need the 'MonochromeLib' libs the code comes with it now
Controls
[-] Zoom out
[+] Zoom in
[F1] Hide/show HUD which contains Cords, Zoom level and Max Iterations. (Heads Up Display)
[F2] Changes colours of camera rectangle: Black, White & Inverted
[AC] Resets screen back to default state
[EXE] Draw set
[EXIT] Stop drawing the Mandelbrot (If it's taking too long)
[MENU] Return to the menu screen
[REPLAY] Move camera rectangle around (Arrow Keys: [LEFT], [RIGHT], [UP], [DOWN])
How can I optimize this code to run faster or zoom in further?
#include "fxlib.h"
#include "stdio.h"
#define TRUE 1
#define FALSE 0
#define ML_vram_adress (*(sc_cpv)sc0135)
typedef enum { ML_TRANSPARENT = -1, ML_WHITE, ML_BLACK, ML_XOR, ML_CHECKER } ML_Color;
typedef char* (*sc_cpv)(void);
const unsigned int sc0135[] = { 0xD201D002, 0x422B0009, 0x80010070, 0x0135 };
unsigned int key; //pause until key press
int kcode1, kcode2; //row & col keycode for Bkey_GetKeyWait()
char unused; //unused (cause CASIO dumb dumb)
unsigned short dispX, dispY; //cords on display when drawing mandelbrot
void ML_clear_vram() {
int i, end, * pointer_long, vram;
char* pointer_byte;
vram = (int)ML_vram_adress();
end = 4 - vram & 3;
pointer_byte = (char*)vram;
for (i = 0; i < end; i++) pointer_byte[i] = 0;
pointer_long = (int*)(vram + end);
for (i = 0; i < 255; i++) pointer_long[i] = 0;
pointer_byte += 1020 + end;
end = vram & 3;
for (i = 0; i < end; i++) pointer_byte[i] = 0;
}
void ML_display_vram() {
char* LCD_register_selector = (char*)0xB4000000, * LCD_data_register = (char*)0xB4010000, * vram;
int i, j;
vram = ML_vram_adress();
for (i = 0; i < 64; i++) {
*LCD_register_selector = 4;
*LCD_data_register = i | 192;
*LCD_register_selector = 4;
*LCD_data_register = 0;
*LCD_register_selector = 7;
for (j = 0; j < 16; j++)
*LCD_data_register = *vram++;
}
}
void ML_display_vram_row(int row) { //faster than ML_display_vram() which displays the entire screen instead of a single row
unsigned char i;
char* LCD_register_selector = (char*)0xB4000000, *LCD_data_register = (char*)0xB4010000, *vram;
vram = (row << 4) + ML_vram_adress();
*LCD_register_selector = 4;
*LCD_data_register = row | 192;
*LCD_register_selector = 4;
*LCD_data_register = 0;
*LCD_register_selector = 7;
for (i = 0; i < 16; i++)
* LCD_data_register = *vram++;
}
void ML_horizontal_line(int y, int x1, int x2, ML_Color color) {
int i;
char checker;
char* vram = ML_vram_adress();
if (y & ~63 || (x1 < 0 && x2 < 0) || (x1 > 127 && x2 > 127))
return;
if (x1 > x2) {
i = x1;
x1 = x2;
x2 = i;
}
if (x1 < 0)
x1 = 0;
if (x2 > 127)
x2 = 127;
switch (color) {
case ML_BLACK:
if (x1 >> 3 != x2 >> 3) {
vram[(y << 4) + (x1 >> 3)] |= 255 >> (x1 & 7);
vram[(y << 4) + (x2 >> 3)] |= 255 << 7 - (x2 & 7);
for (i = (x1 >> 3) + 1; i < x2 >> 3; i++)
vram[(y << 4) + i] = 255;
} else
vram[(y << 4) + (x1 >> 3)] |= (255 >> (x1 % 8 + 7 - x2 % 8)) << (7 - (x2 & 7));
break;
case ML_WHITE:
if (x1 >> 3 != x2 >> 3) {
vram[(y << 4) + (x1 >> 3)] &= 255 << 8 - (x1 & 7);
vram[(y << 4) + (x2 >> 3)] &= 255 >> 1 + (x2 & 7);
for (i = (x1 >> 3) + 1; i < x2 >> 3; i++)
vram[(y << 4) + i] = 0;
} else
vram[(y << 4) + (x1 >> 3)] &= (255 << 8 - (x1 & 7)) | (255 >> 1 + (x2 & 7));
break;
case ML_XOR:
if (x1 >> 3 != x2 >> 3) {
vram[(y << 4) + (x1 >> 3)] ^= 255 >> (x1 & 7);
vram[(y << 4) + (x2 >> 3)] ^= 255 << 7 - (x2 & 7);
for (i = (x1 >> 3) + 1; i < (x2 >> 3); i++)
vram[(y << 4) + i] ^= 255;
} else
vram[(y << 4) + (x1 >> 3)] ^= (255 >> ((x1 & 7) + 7 - (x2 & 7))) << (7 - (x2 & 7));
break;
case ML_CHECKER:
checker = (y & 1 ? 85 : 170);
if (x1 >> 3 != x2 >> 3) {
vram[(y << 4) + (x1 >> 3)] &= 255 << 8 - (x1 & 7);
vram[(y << 4) + (x2 >> 3)] &= 255 >> 1 + (x2 & 7);
vram[(y << 4) + (x1 >> 3)] |= checker & 255 >> (x1 & 7);
vram[(y << 4) + (x2 >> 3)] |= checker & 255 << 7 - (x2 & 7);
for (i = (x1 >> 3) + 1; i < x2 >> 3; i++)
vram[(y << 4) + i] = checker;
} else {
vram[(y << 4) + (x1 >> 3)] &= (255 << 8 - (x1 & 7)) | (255 >> 1 + (x2 & 7));
vram[(y << 4) + (x1 >> 3)] |= checker & (255 >> (x1 % 8 + 7 - x2 % 8)) << (7 - (x2 & 7));
}
break;
}
}
void ML_vertical_line(int x, int y1, int y2, ML_Color color) {
int i, j;
char checker, byte, * vram = ML_vram_adress();
if (x & ~127 || (y1 < 0 && y2 < 0) || (y1 > 63 && y2 > 63)) return;
if (y1 > y2) {
int tmp = y1;
y1 = y2;
y2 = tmp;
}
if (y1 < 0) y1 = 0;
if (y2 > 63) y2 = 63;
i = (y1 << 4) + (x >> 3);
j = (y2 << 4) + (x >> 3);
switch (color) {
case ML_BLACK:
byte = 128 >> (x & 7);
for (; i <= j; i += 16)
vram[i] |= byte;
break;
case ML_WHITE:
byte = ~(128 >> (x & 7));
for (; i <= j; i += 16)
vram[i] &= byte;
break;
case ML_XOR:
byte = 128 >> (x & 7);
for (; i <= j; i += 16)
vram[i] ^= byte;
break;
case ML_CHECKER:
byte = 128 >> (x & 7);
checker = y1 & 1 ^ x & 1;
for (; i <= j; i += 16) {
if (checker) vram[i] &= ~byte;
else vram[i] |= byte;
checker = !checker;
}
break;
}
}
void ML_pixel(int x, int y, ML_Color color) {
char* vram = ML_vram_adress();
if (x & ~127 || y & ~63) return;
switch (color) {
case ML_BLACK:
vram[(y << 4) + (x >> 3)] |= 128 >> (x & 7);
break;
case ML_WHITE:
vram[(y << 4) + (x >> 3)] &= ~(128 >> (x & 7));
break;
case ML_XOR:
vram[(y << 4) + (x >> 3)] ^= 128 >> (x & 7);
break;
case ML_CHECKER:
if (y & 1 ^ x & 1) vram[(y << 4) + (x >> 3)] &= ~(128 >> (x & 7));
else vram[(y << 4) + (x >> 3)] |= 128 >> (x & 7);
break;
}
}
double divByPow(double n, double x, int p) { //Divide OR Times n by x, p times (n / x^p): used for numbers bigger than 2^32 (int limit)
if (p < 0)
for (; p < 0; p++)
n *= x;
else
for (; p > 0; p--)
n /= x;
return n;
}
void stop(void) { //stops drawing set if user presses [EXIT] or [MENU]
if (Bkey_GetKeyWait(&kcode1, &kcode2, 1, 0, 1, &unused))
if (kcode1 == 4 && (kcode2 == 8 || kcode2 == 9)) {
dispX = 128; //Very hacky stop function
dispY = 64;
}
}
int AddIn_main(int isAppli, unsigned short OptionNum) { //Main function
unsigned int graphZoom = 1; //zoom level for graph
char screenZoom; //zoom level on screen (rectangle)
int screenX1, screenX2; //corner X cords for drawing rectangle to screen
int screenY1, screenY2; //corner Y cords for drawing rectangle to screen
unsigned char string[1]; //Used in converting int/double to char
char HUD = TRUE; //Heads Up Display: Cords, Zoom level & Max iteration: toggle with [F1]
char colour = ML_XOR; //Colour of rectangle: Black, White or Inverted
int screenX, screenY; //offset cords on screen from 0,0 for rectangle
double graphX = 0, graphY = 0; //cords on graph - where to center mandelbrot
double graphMove; //amount graphX & Y changes by when moving rectangle around
int screenMove; //amount screenX & Y changes by when moving rectangle around with arrow keys
short tempPixel = 0; //Write pixels to temp variable then write the entire 2bytes to VRAM all at once
register double zr, zi; //zr is real, zi imaginary
register double zr2, zi2; //zr2 = zr^2, zi2 = zi^2
register double x1 = -2.0; //bounding box cords on graph
register double x2 = 2.0; //bounding box cords on graph
register double y1 = -1.0; //bounding box cords on graph
register double y2 = 1.0; //bounding box cords on graph
register double x, y; //pixel cords on graph tested if in set
register double xIsz, yIsz; //amount x/y increases by when ploting graph
register unsigned short iMax = 32; //max iterations
register unsigned short i; //iterations
while (TRUE) {
register char* vram = ML_vram_adress();
SetTimer(1, 200, stop);
ML_clear_vram();
ML_display_vram();
xIsz = (x2 - x1) / 128;
yIsz = (y2 - y1) / 64;
y = y1;
for (dispY = 0; dispY < 64; dispY++) {
x = x1;
y += yIsz;
for (dispX = 0; dispX < 128; dispX++) {
zr = x;
zi = y;
for (i = 0; i < iMax; i++) {
zr2 = zr * zr;
zi2 = zi * zi;
if (zr2 + zi2 > 4)
break;
zi = zr * zi;
zi += zi + y;
zr = zr2 - zi2 + x;
}
tempPixel = (tempPixel << 1) | (i == iMax);
if ((dispX & 7) == 7)
*vram++ = tempPixel;
x += xIsz;
}
ML_display_vram_row(dispY);
}
SaveDisp(1);
KillTimer(1);
screenX = 0;
screenY = 0;
screenZoom = 1;
Bkey_GetKeyWait(&kcode1, &kcode2, 2, 1, 1, &unused);
do {
GetKey(&key);
screenMove = screenZoom > 4 ? 1 : divByPow(16, 2, screenZoom);
graphMove = screenZoom > 4 ? divByPow(1, 2, graphZoom - (double)screenZoom) : divByPow(16, 2, graphZoom);
switch (key) {
case KEY_CHAR_PLUS:
if (graphZoom < 51) {
graphZoom++;
screenZoom++;
}
break;
case KEY_CHAR_MINUS:
if (graphZoom) {
graphZoom--;
screenZoom--;
}
break;
case KEY_CTRL_UP:
screenY -= screenMove;
graphY -= graphMove;
break;
case KEY_CTRL_DOWN:
screenY += screenMove;
graphY += graphMove;
break;
case KEY_CTRL_LEFT:
screenX -= screenMove;
graphX -= graphMove;
break;
case KEY_CTRL_RIGHT:
screenX += screenMove;
graphX += graphMove;
break;
case KEY_CTRL_F1:
HUD = !HUD;
break;
case KEY_CTRL_F2:
if (colour)
colour--;
else
colour = ML_XOR;
break;
case KEY_CTRL_F3:
//Gray scale, by refreshing screen multiple times per sec at different max iterations (iMax)
break;
case KEY_CTRL_AC:
graphZoom = 1;
graphX = 0;
graphY = 0;
screenZoom = 1;
screenX = 0;
screenY = 0;
key = KEY_CTRL_EXE;
break;
}
RestoreDisp(1);
iMax = 8 * (graphZoom + 3);
if (screenZoom < 8) {
screenX1 = 65 - divByPow(128, 2, screenZoom) + screenX;
screenX2 = 62 + divByPow(128, 2, screenZoom) + screenX;
screenY1 = 32 - (screenZoom > 6 ? 1 : divByPow(64, 2, screenZoom)) + screenY;
screenY2 = 31 + (screenZoom > 6 ? 0 : divByPow(64, 2, screenZoom)) + screenY;
ML_horizontal_line(screenY1, screenX1, screenX2, colour);
ML_horizontal_line(screenY2, screenX1, screenX2, colour);
ML_vertical_line(screenX1 - 1, screenY1, screenY2, colour);
ML_vertical_line(screenX2 + 1, screenY1, screenY2, colour);
} else
ML_pixel(screenX + 64, screenY + 31, colour);
x1 = divByPow(-4, 2, graphZoom) + (0.03125 * graphX);
x2 = divByPow(4, 2, graphZoom) + (0.03125 * graphX);
y1 = divByPow(-2, 2, graphZoom) + (0.03125 * graphY);
y2 = divByPow(2, 2, graphZoom) + (0.03125 * graphY);
if (HUD == TRUE) {
sprintf(&string, "X1:%f", x1);
PrintMini(0, 0, string, 0);
sprintf(&string, "Y1:%f", y1);
PrintMini(0, 6, string, 0);
sprintf(&string, "X2:%f", x2);
PrintMini(81, 53, string, 0);
sprintf(&string, "Y2:%f", y2);
PrintMini(81, 59, string, 0);
sprintf(&string, "MaxI:%u", iMax);
PrintMini(0, 53, string, 0);
if (graphZoom > 32)
sprintf(&string, "Zoom:2^%ux", graphZoom - 1);
else
sprintf(&string, "Zoom:%ux", (int)divByPow(1, 2, -graphZoom + 1));
PrintMini(0, 59, string, 0);
}
ML_display_vram();
} while (key != KEY_CTRL_EXE);
}
return 0;
}
#pragma section _BR_Size
unsigned long BR_Size;
#pragma section
#pragma section _TOP
int InitializeSystem(int isAppli, unsigned short OptionNum) {
return INIT_ADDIN_APPLICATION(isAppli, OptionNum);
}
#pragma section
Fichier joint
Citer : Posté le 16/11/2019 03:46 | # | Fichier joint
oops
I removed it in code, but forgot to recompile
Once I have got the 64bit Fixed Point assembly code all working
I'll start adding more features
- Gray Scale
- Higher zoom level
- Faster rendering
- Customizable Iterations
- Customizable HUD
- Julia Set
MANDEL.G1A
Mrvoxy Invité
Citer : Posté le 16/11/2019 04:08 | #
Wow! I eagerly wait!
Citer : Posté le 16/11/2019 08:54 | #
Wow. I tried that with full overclock, that's some stunning results right there! Deep full-screen images take about 10s and the overall fractal is drawn in about 1s!
I know it would be slower on fx-CG 50 due to the larger screen (9× more pixels to draw), but the colors, ah... x)
Citer : Posté le 16/11/2019 10:10 | #
Im not sure how to sign format the numbers
0x07000000 07000000 converted to negative becomes 0xF8FFFFFF F9000000
but the negative output of the multiplier (or subtractors) is 0xF9000000 F9000000, this is because after inverting the number, it adds +1 to both limbs of the whole number, rather than just the lowest limb
I'm having huge problems with the inputs needing to be 'normal' negative numbers, but everything outputs the double +1 numbers
But if everything is kept postive (multiplier and adderr, no subtractors) its all fine
I'm confussed that other people online dont seem to have this problem, I wonder if it just doesn't matter, or they are using other methods that negate it
Citer : Posté le 16/11/2019 11:00 | #
It might be useful to remember the negation identity: -x = ~x + 1.
This is true whatever the size of the integer is. Here you are dealing with 64-bits, but it works just as well as with 32-bit integers. The main change is that you need to use carry-aware instructions to propagate changes from one half to the other.
Just as with addc, there is a negc instruction which does what you need. In fact, the example usage of addc in the manual is exactly this operation.
Before: r0,r1 = 00000000,00000001
After: r0,r1 = ffffffff,ffffffff
clrt
negc r1, r1
negc r0, r0
Citer : Posté le 21/11/2019 08:33 | #
The reason why no one else had problems with negative numbers is because
1. They didn't bother, and their design wouldn't work with negatives
or 2. They detected if the number is negative and made the number postive then readded the sign at the end
so thats what I did
some half Optimized code
; r4, r5, r6, r7, r8, r9, 10, 11
; 12 * 34 = 10*30 + 2*30 + 10*4 + 2*4 = 300 + 60 + 40 + 8 = 408
;x1x0 * y1y0 = x1*y1 + x0*y1 + x1*y0 + x0*y0 =
;Decimal point is 8bits right, from the left 1:7:56 Sign:Int:Frac
mov.l r8, @-r15
mov.l r9, @-r15
mov.l r10, @-r15
mov.l r11, @-r15
mov.l #1, r1
mov.l #0, r10
mov.l #0, r11
cmp/ge r10, r4
bt _positiveX
negc r5, r5
negc r4, r4
xor r1, r11
_positiveX:
cmp/ge r10, r6
bt _positiveY
negc r7, r7
negc r6, r6
xor r1, r11
_positiveY:
mov.l #0, r1
dmulu.l r5, r7 ;x0 * y0
sts mach, r2
clrt
dmulu.l r5, r6 ;x0 * y1
sts macl, r8
addc r8, r2
sts mach, r8
addc r8, r1
movt r0
clrt
dmulu.l r4, r7 ;x1 * y0
sts macl, r8
addc r8, r2
sts mach, r8
addc r8, r1
addc r10, r0
clrt
dmulu.l r4, r6 ;x1 * y1
sts macl, r8
addc r8, r1
sts mach, r8
addc r8, r0
; XXXXXXXX YYYYYYYY ZZZZZZZZ WWWWWWWW
;XX XXXXXXYY YYYYYYZZ ZZZZZZWW WWWWWW
mov #8, r8 ;8 //Left
shld r8, r0 ; XXXXXX
mov r1, r5
shld r8, r1 ; YYYYYY
mov #-24, r8 ;8-32 //Right
shld r8, r5 ;YY
shld r8, r2 ;ZZ
add r5, r0
add r2, r1
cmp/eq r10, r11
bt _positive
negc r1, r1
negc r0, r0
_positive:
mov.l @(16,r15), r4
mov.l @(20,r15), r5
mov.l @(24,r15), r6
mov.l @(28,r15), r7
mov.l r0, @r4 ;High
mov.l r1, @r5 ;Mid
mov.l @r15+, r11
mov.l @r15+, r10
mov.l @r15+, r9
rts
mov.l @r15+, r8_mul64Optimized: ;mul64Optimized(x1, x0, y1, y0, &high, &mid, &low, &below)
; r4, r5, r6, r7, r8, r9, 10, 11
; 12 * 34 = 10*30 + 2*30 + 10*4 + 2*4 = 300 + 60 + 40 + 8 = 408
;x1x0 * y1y0 = x1*y1 + x0*y1 + x1*y0 + x0*y0 =
;Decimal point is 8bits right, from the left 1:7:56 Sign:Int:Frac
mov.l r8, @-r15
mov.l r9, @-r15
mov.l r10, @-r15
mov.l r11, @-r15
mov.l #1, r1
mov.l #0, r10
mov.l #0, r11
cmp/ge r10, r4
bt _positiveX
negc r5, r5
negc r4, r4
xor r1, r11
_positiveX:
cmp/ge r10, r6
bt _positiveY
negc r7, r7
negc r6, r6
xor r1, r11
_positiveY:
mov.l #0, r1
dmulu.l r5, r7 ;x0 * y0
sts mach, r2
clrt
dmulu.l r5, r6 ;x0 * y1
sts macl, r8
addc r8, r2
sts mach, r8
addc r8, r1
movt r0
clrt
dmulu.l r4, r7 ;x1 * y0
sts macl, r8
addc r8, r2
sts mach, r8
addc r8, r1
addc r10, r0
clrt
dmulu.l r4, r6 ;x1 * y1
sts macl, r8
addc r8, r1
sts mach, r8
addc r8, r0
; XXXXXXXX YYYYYYYY ZZZZZZZZ WWWWWWWW
;XX XXXXXXYY YYYYYYZZ ZZZZZZWW WWWWWW
mov #8, r8 ;8 //Left
shld r8, r0 ; XXXXXX
mov r1, r5
shld r8, r1 ; YYYYYY
mov #-24, r8 ;8-32 //Right
shld r8, r5 ;YY
shld r8, r2 ;ZZ
add r5, r0
add r2, r1
cmp/eq r10, r11
bt _positive
negc r1, r1
negc r0, r0
_positive:
mov.l @(16,r15), r4
mov.l @(20,r15), r5
mov.l @(24,r15), r6
mov.l @(28,r15), r7
mov.l r0, @r4 ;High
mov.l r1, @r5 ;Mid
mov.l @r15+, r11
mov.l @r15+, r10
mov.l @r15+, r9
mov.l @r15+, r8
rts
nop
some optimized and very unreadable code
; r4, r5, r6, r7, r8, r9, 10, 11
; 12 * 34 = 10*30 + 2*30 + 10*4 + 2*4 = 300 + 60 + 40 + 8 = 408
;x1x0 * y1y0 = x1*y1 + x0*y1 + x1*y0 + x0*y0 =
;Decimal point is 8bits right, from the left 1:7:56 Sign:Int:Frac
mov.l r11, @-r15
mov.l #0, r0
mov.l #0, r11
cmp/ge r11, r4
bt _positiveX
;wasted cycle
negc r5, r5
negc r4, r4
xor #1, r0
_positiveX:
cmp/ge r11, r6
bt/s _positiveY
mov.l r10, @-r15
negc r7, r7
negc r6, r6
xor #1, r0
_positiveY:
dmulu.l r5, r7 ;x0 * y0
mov.l r9, @-r15
mov.l r8, @-r15
mov.l #0, r1
sts mach, r2
dmulu.l r5, r6 ;x0 * y1
mov r0, r10
clrt
;wasted cycle
sts macl, r8
addc r8, r2
sts mach, r8
dmulu.l r4, r7 ;x1 * y0
addc r8, r1
movt r0
clrt
sts macl, r8
addc r8, r2
sts mach, r8
dmulu.l r4, r6 ;x1 * y1
addc r8, r1
addc r11, r0
clrt
sts macl, r8
addc r8, r1
sts mach, r8
addc r8, r0
; XXXXXXXX YYYYYYYY ZZZZZZZZ WWWWWWWW
;XX XXXXXXYY YYYYYYZZ ZZZZZZWW WWWWWW
mov #8, r8 ;8 //Left
shld r8, r0 ; XXXXXX
mov r1, r5
shld r8, r1 ; YYYYYY
mov #-24, r8 ;8-32 //Right
shld r8, r5 ;YY
shld r8, r2 ;ZZ
add r5, r0
add r2, r1
cmp/eq r11, r10
bt/s _positive
mov.l @(16,r15), r4
negc r1, r1
negc r0, r0
_positive:
mov.l @(20,r15), r5
mov.l r0, @r4 ;High
mov.l r1, @r5 ;Mid
mov.l @r15+, r8
mov.l @r15+, r9
mov.l @r15+, r10
rts
mov.l @r15+, r11
Citer : Posté le 21/11/2019 09:35 | #
Looks nice! I wonder if this could be shorter. Have you looked at the libgcc implementation? Here is how they multiply unsigned 64-bit values (where you don't have carry problems):
.h and .l represent the high and low halves of a 64-bit value
X is 2^32 (also looks like a polynomial)
(Xr4+r5)(Xr6+r7) = r5r7 + X(r4r7 + r5r6) + X^2(r4r6)
= r5r7 + X(r4r7 + r5r6) (X^2 overflows)
= r5r7 + X(r4r7.l + r5r6.l) (X * rxry.h overflows)
Output is:
r0 = r4r7.l + r5r6.l + r5r7.h (higher half)
r1 = r5r7.l (lower half)
dmulu.l r5,r7
sts macl,r1 # r1 = r5r7.l
sts mach,r2 # r2 = r5r7.h
mul.l r6,r5
sts macl,r0 # r0 = r5r6.l
mul.l r7,r4
add r2,r0 # r0 = r5r6.l + r5r7.h
sts macl,r2 # r2 = r4r7.l
rts
add r2,r0 # r0 = r4r7.l + r5r6.l + r5r7.h
I'm mainly mentioning this because of the "analysis" of the multiplication, which allows using 32-bit multiplications at times and has no carry. If you make both operands unsigned at the beginning then add the sign at the very end, maybe you can gain on these clrt and adds everywhere?
Anyway, this already looks really good! The performance of the fixed-point version is clearly very fast!
Citer : Posté le 21/11/2019 09:48 | #
That 64x64 only gives the lower half of the 128bit output
But I need the high half because the fixed point is very high up
That why it has normal mul.l and not dmulu.l
I had a feeling that I didn't need any of those carries, I had them there when testing signed multiplication
Ajouté le 03/01/2020 à 22:20 :
how can I create a "function" in asm?
I want to branch to the multiplication code, then have it automatically return back to the spot it first branched from
I sort of got it working with bsr but it overwrites the pr
and value when I want to return back to the C code, it insteads returns to the bsr
Citer : Posté le 03/01/2020 22:30 | #
Yeah bsr (you can also use jsr) overrides pr, thus you have to save pr whenever you know that you will call subfunctions.
# Call...
lds.l @r15+, pr
The calling convention is that you should also save r0-r7 if you need them later on. On the other hand, you can leave r8-r15 as is, and the called function will not overwrite them (just like pr).
Citer : Posté le 03/01/2020 23:04 | #
Thx, works perfectly
I need to pass 4 values in and out
should I use r0-r3, r4-r7 or r8-r11?
Citer : Posté le 04/01/2020 07:49 | #
Parameters should go to r4 to r7 (in this order), and the return value should be r0. If you have more parameters, pass them on the stack in reverse order (fifth parameter on top, sixth below, and so on). If you have more return values, either pass pointers as parameters or return them in r0 to r3. Second option should be checked for compatibility with the C ABI.
These are just conventions, you don't absolutely need them if your code is not going to get called directly from C code, but it cannot hurt to observe them.
Citer : Posté le 05/01/2020 09:47 | #
umm...
is there a limit on "addressable" space on the stack?
error when building
line 138
changing 64 to 60 removes the error, but the address is 4 too low
Citer : Posté le 05/01/2020 10:22 | #
Yes, indeed. Check out the instruction:
Operation: (disp × 4 + Rn) → Rm
Code: 0101nnnnmmmmdddd
As you can see, there are only 4 bits for d, which means that you can only go from 0 to 60 in steps of 4.
But fear not, because you can just use a more powerful instruction such as this:
mov.l @(r0,r15), r2
You have to use r0 as the index here (instructions almost never have 3 parameters because this would span too many of the available 64k opcodes, so r0 is fixed).
Or you can use the Global Base Register which exists specifically for that purpose. As you can see, because the following instructions uses gbr instead of rm as base, the 4 bits dedicated to the value m are now available for d:
Operation: (disp × 4 + GBR) → R0
Code: 11000110dddddddd
You still need to use r0, but now at least you don't have to add calculations to obtain your index. You can do the following:
ldc r15, gbr
# Enjoy!
mov.l @(64, gbr), r0
Remember to save GBR in functions, IIRC it's callee-saved.
Citer : Posté le 06/01/2020 10:15 | #
since I haven't been working on this from Nov 25 (due to school and other stuff). I've finally come back to this only 5 days ago
And I now have a fully working 64bit fixed point mandelbrot in asm
Gonna go to bed now; but will optimize, clean up and comment everything tomorrow
The function in C to call drawMandel in asm
drawMandel(zoom, vram, yHighStart, yLowStart, xHighStart, xLowStart, iMax);
Currently, I'm just using these params for testing
drawMandel(1, vram, -0x01000000, -0x00000000, -0x02000000, -0x00000009, 50);
drawMandel in asm
;r0 = temp0
;r1 = temp1
;r2 = highIsz, vram
;r3 = lowIsz
;r4 = zoom, xHigh
;r5 = vram, xLow
;r6 = yHighStart, yHigh
;r7 = yLowStart, yLow
;r8 = tempPixel
;r9 =
;r10 =
;r11 =
;r12 =
;r13 =
;r14 =
;@(,r15) = xHighStart
;@(,r15) = xLowStart
;@(,r15) = iMax
;@(,r15) = vram
;64bit number format - spilt between two 32bit variables
;1:7:56
;Sign:Int:Frac
;±:0000000:00000000000000000000000000000000000000000000000000000000
;±0000000.000000000000000000000000,00000000000000000000000000000000
;high = ±0000000.000000000000000000000000
;low = 00000000000000000000000000000000
;stc.l gbr, @-r15 ;gbr PUSH!! 0
mov.l r8, @-r15 ;r8 PUSH!! 1
mov.l r9, @-r15 ;r9 PUSH!! 2
mov.l r10, @-r15 ;r10 PUSH!! 3
mov.l r11, @-r15 ;r11 PUSH!! 4
mov.l r12, @-r15 ;r12 PUSH!! 5
mov.l r13, @-r15 ;r13 PUSH!! 6
mov.l r14, @-r15 ;r14 PUSH!! 7
sts.l pr, @-r15 ;pr PUSH!! 8
;ldc r15, gbr ;gbr = r15
;r2 = lowIsz //±0000000.000000000000000000000000
;r3 = highIsz //00000000000000000000000000000000
;r4 = zoom //0 - 56
;highIsz = 0x00100000 >> zoom;
neg r4, r4 ;zoom = -zoom
mov.l #1048576, r2 ;highIsz = 0x00100000
shld r4, r2 ;highIsz >>= zoom
;lowIsz = zoom < 21 ? 0x80000000 >> zoom - 21 : 0;
mov.l #2147483648,r3 ;lowIsz = 0x80000000
add #21, r4 ;zoom += 21
cmp/pl r4 ;T = zoom > 0
bt/s highBits ;if(T == 1) branch high //+delay Slot
shld r4, r3 ;lowIsz >>= zoom
mov #0, r2 ;highIsz = 0
highBits:
mov.l r2, @-r15 ;highIsz PUSH!! 9
mov.l r3, @-r15 ;lowIsz PUSH!! 10
;Main Loops
mov.l r5, @-r15 ;vram PUSH!! 11
mov #0, r8 ;tempPixel = 0
mov #64, r0 ;row = 64
row: ;for (row = 64; row > 0; row--) {
mov.l r0, @-r15 ;row PUSH!! 12
mov @(48,r15), r4 ;xHighStart
mov @(52,r15), r5 ;xLowStart
mov.l #128, r1 ;col = 128
col: ;for (col = 128; col > 0; col--) {
mov @(56,r15), r10 ;iMax
mov.l r6, @-r15 ;yHigh PUSH!! 13
mov.l r7, @-r15 ;yLow PUSH!! 14
mov.l r4, @-r15 ;xHigh PUSH!! 15
mov.l r5, @-r15 ;xLow PUSH!! 16
mov.l r1, @-r15 ;col PUSH!! 17
mov.l r8, @-r15 ;tempPixel PUSH!! 18
innerLoop: ;for (i = iMax; i > 0; i--) {
;mov.l r10, @-r15 ;i PUSH!! 19
;r4 = zrHigh = xHigh
;r5 = zrLow = xLow
;r6 = ziHigh = yHigh
;r7 = ziLow = yLow
mov r4, r8 ;zrHigh
mov r5, r9 ;zrLow
bsr sq64 ;zr2 = zr * zr
nop
mov r0, r13 ;zr2High
mov r1, r14 ;zr2Low
mov r6, r4 ;ziHigh
mov r7, r5 ;ziLow
bsr sq64 ;zi2 = zi * zi
nop
mov r0, r11 ;zi2High
mov r1, r12 ;zi2Low
;if (zr2 + zi2 > 4)
addc r14, r1 ;zi2Low += zr2Low
addc r13, r0 ;zi2High += zr2High
mov.l #67108864, r1 ;0x04000000 //±0000100.000000000000000000000000
cmp/ge r0, r1 ;T = 0x04000000 >= zi2High
bf exitInnerLoop ;if(T == 0) branch exitInnerLoop
;zi *= zr;
mov r8, r4 ;zrHigh
mov r9, r5 ;zrLow
bsr mul64 ;zi *= zr
nop
;zi += zi + y;
clrt
addc r1, r1 ;ziLow += ziLow
addc r0, r0 ;ziHigh += ziHigh
mov.l @(16,r15), r7 ;yLow
mov.l @(20,r15), r6 ;yHigh
clrt
addc r1, r7 ;ziLow += yLow
addc r0, r6 ;ziHigh += yHigh
;zr = zr2 - zi2 + x;
clrt
subc r12, r14 ;zr2Low -= zi2Low
subc r11, r13 ;zr2High -= zi2High
clrt
mov.l @(8,r15), r5 ;xLow
mov.l @(12,r15), r4 ;xHigh
addc r14, r5 ;zrLow += xLow
addc r13, r4 ;zrHigh += xHigh
;mov.l @r15+, r0 ;i POP!! 19
dt r10 ;T = i-- == 0
bf innerLoop ;if(T == 0) branch innerLoop
exitInnerLoop:
;mov.l @r15+, r0 ;i POP!! 19
;tst r10, r10
mov.l @r15+, r8 ;tempPixel POP!! 18
rotcl r8 ;tempPixel = (tempPixel << 1) + T
mov.l @r15+, r1 ;col POP!! 17
mov r1, r0 ;col
and #7, r0 ;col &= 7
cmp/eq #1, r0 ;T = col == 1
bf bypassVRAM ;if(T == 0) branch bypassVRAM
mov @(20,r15), r0 ;vram
mov.b r8, @r0 ;*vram = tempPixel
add #1, r0 ;vram++
mov r0, @(20,r15) ;vram
bypassVRAM:
mov @(24,r15), r3 ;lowIsz
mov @(28,r15), r2 ;highIsz
mov.l @r15+, r5 ;xLow POP!! 16
mov.l @r15+, r4 ;xHigh POP!! 15
clrt
addc r3, r5 ;xLow += lowIsz; T = Carry
addc r2, r4 ;xHigh += highIsz + T
mov.l @r15+, r7 ;yLow POP!! 14
mov.l @r15+, r6 ;yHigh POP!! 13
dt r1 ;T = col-- == 0
bf col ;if(T == 0) branch col
clrt
addc r3, r7 ;yLow += lowIsz; T = Carry
addc r2, r6 ;yHigh += highIsz + T
mov.l @r15+, r0 ;row POP!! 12
dt r0 ;T = row-- == 0
bf row ;if(T == 0) branch row
mov.l @r15+, r0 ;vram POP!! 11
mov.l @r15+, r0 ;highIsz POP!! 10
mov.l @r15+, r0 ;lowIsz POP!! 9
lds.l @r15+, pr ;pr POP!! 8
mov.l @r15+, r14 ;r14 POP!! 7
mov.l @r15+, r13 ;r13 POP!! 6
mov.l @r15+, r12 ;r12 POP!! 5
mov.l @r15+, r11 ;r11 POP!! 4
mov.l @r15+, r10 ;r10 POP!! 3
mov.l @r15+, r9 ;r9 POP!! 2
mov.l @r15+, r8 ;r8 POP!! 1
;ldc.l @r15+, gbr ;gbr POP!! 0
rts
nop
sq64 function in asm
;r0 = outHigh
;r1 = outLow
;r2 = tempLow
;r3 = temp
;r4 = nHigh
;r5 = nLow
cmp/pz r4 ;T = nHigh >= 0
bt sqPositiveIn ;if(T == 1) branch sqPositiveIn
negc r5, r5 ;nLow = -nLow
negc r4, r4 ;nHigh = -nHigh
sqPositiveIn: ;n < 0 ? -n : n
dmulu.l r5, r4 ;nLow * nHigh
sts macl, r2
shll r2
sts mach, r1
addc r1, r1
movt r0
clrt
dmulu.l r5, r5 ;nLow * nLow
sts mach, r3
addc r3, r2
dmulu.l r4, r4 ;nHigh * nHigh
sts macl, r3
addc r3, r1
sts mach, r3
addc r3, r0
; XXXXXXXX YYYYYYYY ZZZZZZZZ WWWWWWWW << 8
;XX XXXXXXYY YYYYYYZZ ZZZZZZWW WWWWWW
mov r1, r5
shll8 r0 ;000000xx XXXXXX00
shll8 r1 ;000000yy YYYYYY00
mov #-24, r3 ;8-32
shld r3, r5 ;000000YY yyyyyy00
shld r3, r2 ;000000ZZ zzzzzz00
clrt
addc r2, r1 ;outLow = YYYYYY00 + 000000ZZ
addc r5, r0 ;outHigh = XXXXXX00 + 000000YY
rts
nop
mul64 function in asm
; r4, r5, r6, r7
;Decimal point is 8bits right, from the left 1:7:56 Sign:Int:Frac
mov.l r11, @-r15
mov.l r10, @-r15
mov.l r9, @-r15
mov.l r8, @-r15
mov.l #0, r0
mov.l #0, r11
cmp/ge r11, r4
bt mulPositiveX
negc r5, r5
negc r4, r4
xor #1, r0
mulPositiveX:
cmp/ge r11, r6
bt mulPositiveY
negc r7, r7
negc r6, r6
xor #1, r0
mulPositiveY:
mov.l #0, r1
mov r0, r10
dmulu.l r5, r7 ;x0 * y0
sts mach, r2
dmulu.l r5, r6 ;x0 * y1
clrt
sts macl, r8
addc r8, r2
sts mach, r8
dmulu.l r4, r7 ;x1 * y0
addc r8, r1
movt r0
clrt
sts macl, r8
addc r8, r2
sts mach, r8
dmulu.l r4, r6 ;x1 * y1
addc r8, r1
addc r11, r0
clrt
sts macl, r8
addc r8, r1
sts mach, r8
addc r8, r0
; XXXXXXXX YYYYYYYY ZZZZZZZZ WWWWWWWW
;XX XXXXXXYY YYYYYYZZ ZZZZZZWW WWWWWW
mov r1, r5
shll8 r0 ; XXXXXX
shll8 r1 ; YYYYYY
mov #-24, r8 ;8-32 //Right
shld r8, r5 ;YY
shld r8, r2 ;ZZ
add r5, r0
add r2, r1
cmp/eq r11, r10
bt mulPositiveN
negc r1, r1 ;outLow
negc r0, r0 ;outHigh
mulPositiveN:
mov.l @r15+, r8
mov.l @r15+, r9
mov.l @r15+, r10
mov.l @r15+, r11
rts
nop
hmm... the tabs aren't lining up correctly
(tabs are 4 spaces aligned)
Citer : Posté le 06/01/2020 10:43 | #
Not bad at all! Do you have a compiled binary to try that out?
Citer : Posté le 06/01/2020 20:58 | # | Fichier joint
Heres a working prototype
You can't change anything, nor exit the program (need to restart the calc via the button on the back)
It isn't zooming in, but slowly increases iterations from 1 to infinity
Will release a fully working version later today
MANDEL.G1A
Citer : Posté le 06/01/2020 21:13 | #
That looks nice and really fast even for 20 or 30 iterations. Good job!
Citer : Posté le 08/01/2020 01:12 | # | Fichier joint
I'm gonna be away for a while, so won't be able to do any coding
So here's an update on it
I'm managed to decode how ML_display_vram works in asm and have added to my code
Now you can see it display to screen as it generates
I don't redraw the entire screen every pixel, I only draw 8pixels to the screen every 8pixels
it was 14 instructions extra and in total, they only run 2816 extra instructions
Which is nothing compared to the millions of instructions being run to calculate all the pixels
It is super buggy right now tho, at certain zoom levels, moving the camera around does nothing
I'm not sure how to stop it generating while it's running via a keypress [EXIT]
Would you know how?
You can it exit the program when its finished via [MENU] (don't need to restart the calc :P)
I don't clear the screen every time the Mandel is redrawn, so you can see the old image get overwritten as the new one generates
MANDEL.G1A
Citer : Posté le 12/01/2020 09:40 | #
it was 14 instructions extra and in total, they only run 2816 extra instructions
Which is nothing compared to the millions of instructions being run to calculate all the pixels
This is correct, however even with the friendly SuperH platform here there is no equivalence between time and number of instructions. Remember that these instructions perform accesses to device memory and requires much longer than register arithmetic.
Hopefully for you this was measured and the total cost of updating the screen is usually 3.5ms or so. This is not too much. But it is still a lot more than the 13µs needed to clear the video RAM, even though the amount of updated memory is the same!
It is quite interactive already and I really like it. To stop generating when EXIT is pressed, you can check for IsKeyDown(KEY_CTRL_EXIT) at regular intervals, for instance whenever you finish a group of 4 rows. This test is not very costly, on SH4 calculators it is one device memory access followed by a binary mask.
Citer : Posté le 17/01/2020 07:39 | # | Fichier joint
Fixed a bunch of bugs and added the X and Y cords to the top left of the screen (Fixed point to string converter. Took me way too long to figure it out)
There still seems to be some bugs where it doesn't move at certain zoom levels, but shouldn't be too bad now
pressing [MENU] just takes you to the main menu, but now [EXIT] will quit the program and return to the main menu
But I'm having some problems with it crashing after pressing [EXIT]
Max zoom level has been increased from 2^48 to 2^60, but movement becomes limited after 2^54 due to the 64bit variables that store the position of the camera running out of space
Now I'm going to add an option to change the iterations manually (maybe even grayscale)
Stop it while it's still generating via [EXIT]
And clean up and optimize the code heavily
I found out today that addc and addv compute their T flag differently
I used addv because it doesn't have a + T allowing me to skip clrt which addc needs to be run before use
It was causing some weird speckle like patterns at higher zoom levels
Computing carries in C is a pain, so I made small functions in ASM to do it
Like sum64 and neg64
MANDEL.G1A
mandel.c
#include "stdio.h"
#define TRUE 1
#define FALSE 0
#define abs(x) ((x) < 0 ? -(x) : (x))
#define ML_vram_adress (*(sc_cpv)sc0135)
typedef enum { ML_TRANSPARENT = -1, ML_WHITE, ML_BLACK, ML_XOR, ML_CHECKER } ML_Color;
typedef char* (*sc_cpv)(void);
const unsigned int sc0135[] = { 0xD201D002, 0x422B0009, 0x80010070, 0x0135 };
void ML_clear_vram() {
int i, end, * pointer_long, vram;
char* pointer_byte;
vram = (int)ML_vram_adress();
end = 4 - vram & 3;
pointer_byte = (char*)vram;
for (i = 0; i < end; i++) pointer_byte[i] = 0;
pointer_long = (int*)(vram + end);
for (i = 0; i < 255; i++) pointer_long[i] = 0;
pointer_byte += 1020 + end;
end = vram & 3;
for (i = 0; i < end; i++) pointer_byte[i] = 0;
}
void ML_display_vram() {
char* LCD_register_selector = (char*)0xB4000000, * LCD_data_register = (char*)0xB4010000, * vram;
int i, j;
vram = ML_vram_adress();
for (i = 0; i < 64; i++) {
*LCD_register_selector = 4;
*LCD_data_register = i | 192;
*LCD_register_selector = 4;
*LCD_data_register = 0;
*LCD_register_selector = 7;
for (j = 0; j < 16; j++)
*LCD_data_register = *vram++;
}
}
void ML_horizontal_line(int y, int x1, int x2, ML_Color color) {
int i;
char checker;
char* vram = ML_vram_adress();
if (y & ~63 || (x1 < 0 && x2 < 0) || (x1 > 127 && x2 > 127))
return;
if (x1 > x2) {
i = x1;
x1 = x2;
x2 = i;
}
if (x1 < 0)
x1 = 0;
if (x2 > 127)
x2 = 127;
switch (color) {
case ML_BLACK:
if (x1 >> 3 != x2 >> 3) {
vram[(y << 4) + (x1 >> 3)] |= 255 >> (x1 & 7);
vram[(y << 4) + (x2 >> 3)] |= 255 << 7 - (x2 & 7);
for (i = (x1 >> 3) + 1; i < x2 >> 3; i++)
vram[(y << 4) + i] = 255;
} else
vram[(y << 4) + (x1 >> 3)] |= (255 >> (x1 % 8 + 7 - x2 % 8)) << (7 - (x2 & 7));
break;
case ML_WHITE:
if (x1 >> 3 != x2 >> 3) {
vram[(y << 4) + (x1 >> 3)] &= 255 << 8 - (x1 & 7);
vram[(y << 4) + (x2 >> 3)] &= 255 >> 1 + (x2 & 7);
for (i = (x1 >> 3) + 1; i < x2 >> 3; i++)
vram[(y << 4) + i] = 0;
} else
vram[(y << 4) + (x1 >> 3)] &= (255 << 8 - (x1 & 7)) | (255 >> 1 + (x2 & 7));
break;
case ML_XOR:
if (x1 >> 3 != x2 >> 3) {
vram[(y << 4) + (x1 >> 3)] ^= 255 >> (x1 & 7);
vram[(y << 4) + (x2 >> 3)] ^= 255 << 7 - (x2 & 7);
for (i = (x1 >> 3) + 1; i < (x2 >> 3); i++)
vram[(y << 4) + i] ^= 255;
} else
vram[(y << 4) + (x1 >> 3)] ^= (255 >> ((x1 & 7) + 7 - (x2 & 7))) << (7 - (x2 & 7));
break;
case ML_CHECKER:
checker = (y & 1 ? 85 : 170);
if (x1 >> 3 != x2 >> 3) {
vram[(y << 4) + (x1 >> 3)] &= 255 << 8 - (x1 & 7);
vram[(y << 4) + (x2 >> 3)] &= 255 >> 1 + (x2 & 7);
vram[(y << 4) + (x1 >> 3)] |= checker & 255 >> (x1 & 7);
vram[(y << 4) + (x2 >> 3)] |= checker & 255 << 7 - (x2 & 7);
for (i = (x1 >> 3) + 1; i < x2 >> 3; i++)
vram[(y << 4) + i] = checker;
} else {
vram[(y << 4) + (x1 >> 3)] &= (255 << 8 - (x1 & 7)) | (255 >> 1 + (x2 & 7));
vram[(y << 4) + (x1 >> 3)] |= checker & (255 >> (x1 % 8 + 7 - x2 % 8)) << (7 - (x2 & 7));
}
break;
}
}
void ML_vertical_line(int x, int y1, int y2, ML_Color color) {
int i, j;
char checker, byte, * vram = ML_vram_adress();
if (x & ~127 || (y1 < 0 && y2 < 0) || (y1 > 63 && y2 > 63)) return;
if (y1 > y2) {
int tmp = y1;
y1 = y2;
y2 = tmp;
}
if (y1 < 0) y1 = 0;
if (y2 > 63) y2 = 63;
i = (y1 << 4) + (x >> 3);
j = (y2 << 4) + (x >> 3);
switch (color) {
case ML_BLACK:
byte = 128 >> (x & 7);
for (; i <= j; i += 16)
vram[i] |= byte;
break;
case ML_WHITE:
byte = ~(128 >> (x & 7));
for (; i <= j; i += 16)
vram[i] &= byte;
break;
case ML_XOR:
byte = 128 >> (x & 7);
for (; i <= j; i += 16)
vram[i] ^= byte;
break;
case ML_CHECKER:
byte = 128 >> (x & 7);
checker = y1 & 1 ^ x & 1;
for (; i <= j; i += 16) {
if (checker) vram[i] &= ~byte;
else vram[i] |= byte;
checker = !checker;
}
break;
}
}
void ML_pixel(int x, int y, ML_Color color) {
char* vram = ML_vram_adress();
if (x & ~127 || y & ~63) return;
switch (color) {
case ML_BLACK:
vram[(y << 4) + (x >> 3)] |= 128 >> (x & 7);
break;
case ML_WHITE:
vram[(y << 4) + (x >> 3)] &= ~(128 >> (x & 7));
break;
case ML_XOR:
vram[(y << 4) + (x >> 3)] ^= 128 >> (x & 7);
break;
case ML_CHECKER:
if (y & 1 ^ x & 1) vram[(y << 4) + (x >> 3)] &= ~(128 >> (x & 7));
else vram[(y << 4) + (x >> 3)] |= 128 >> (x & 7);
break;
}
}
unsigned char HUD = TRUE; //Heads Up Display: Cords, Zoom level & Max iteration: toggle with [F1]
char colour = ML_XOR; //Colour of camera rectangle: Black, White, Inverted, Transparent and Checkered
unsigned int iMax = 32; //max iterations
int screenZoom = 1; //zoom level on screen (camera rectangle)
unsigned int graphZoom = 1; //zoom level for graph
int screenX = 0, screenY = 0; //offset cords on screen from 0,0 for camera rectangle
int graphHighX = 0;
int graphHighY = 0;
int graphLowX = 0;
int graphLowY = 0;
void drawMandelbrot(int X0, int X1, int Y0, int Y1, int zoom, int iMax) {
register char* vram = ML_vram_adress();
int offsetHigh = zoom < 21 ? 0x04000000 >> zoom : 0;
int offsetLow = zoom >= 21 ? 0x80000000 >> zoom : 0;
sum64(&X0, &X1, offsetHigh, offsetLow);
offsetHigh = zoom < 22 ? 0x02000000 >> zoom : 0;
offsetLow = zoom >= 22 ? 0x80000000 >> zoom : 0;
sum64(&Y0, &Y1, offsetHigh, offsetLow);
drawMandel(zoom, vram, -Y0, -Y1, -X0, -X1, iMax);
SaveDisp(1);
}
void reset() {
HUD = TRUE;
iMax = 32;
colour = ML_XOR;
screenZoom = 1;
graphZoom = 1;
screenX = 0;
screenY = 0;
graphHighX = 0;
graphHighY = 0;
graphLowX = 0;
graphLowY = 0;
ML_clear_vram();
drawMandelbrot(graphHighX, graphLowX, graphHighY, graphLowY, graphZoom, iMax);
}
double divByPow(double n, double x, int p) { //Divide OR Times n by x, p times (n / x^p): used for numbers bigger than 2^32 (int limit)
if (p < 0)
for (; p < 0; p++)
n *= x;
else
for (; p > 0; p--)
n /= x;
return n;
}
char sprintFrac(unsigned char* string, int fixedPoint, int high, int low) {
int tempHigh = 0, tempMid = 500000000, tempLow = 0;
int outHigh = 0, outMid = 0, outLow = 0;
int copyHigh = high, copyLow = low;
unsigned int testHigh;
unsigned int testLow;
int i;
unsigned char fracHigh[256];
outHigh = abs(high) >> 32 - fixedPoint;
i = 0;
fixedPoint -= 4;
testHigh = high;
testLow = low;
abs64(&testHigh, &testLow);
testHigh <<= 4;
testHigh += testLow >> 32 - fixedPoint;
testLow &= (1 << 32 - fixedPoint) - 1;
testHigh &= (1 << 32 - fixedPoint) - 1;
do {
testHigh *= 10;
testLow *= 10;
testHigh += testLow >> 32 - fixedPoint;
fracHigh[i++] = '0' + (testHigh >> 32 - fixedPoint);
testHigh &= (1 << 32 - fixedPoint) - 1;
testLow &= (1 << 32 - fixedPoint) - 1;
} while (testHigh || testLow);
fracHigh[i] = '\0';
sprintf(string, "%s%s%u.%s", string, high < 0 ? "-" : "+", outHigh, fracHigh);
}
int AddIn_main(int isAppli, unsigned short OptionNum) { //Main function
unsigned int key; //pause until key press
unsigned char string[32]; //Used in converting int to string
unsigned int graphMoveHigh, graphMoveLow;//amount graphX & Y changes by when moving camera rectangle around
unsigned int screenMove; //amount screenX & Y changes by when moving camera rectangle around
int screenX1, screenX2; //corner X cords for drawing rectangle to screen
int screenY1, screenY2; //corner Y cords for drawing rectangle to screen
//64bit Fixed Point number format - spilt between two 32bit variables
//1:7:56
//Sign:Int:Frac
//±:0000000:00000000000000000000000000000000000000000000000000000000
//±0000000.000000000000000000000000,00000000000000000000000000000000
//high = ±0000000.000000000000000000000000
//low = 00000000000000000000000000000000
reset();
do {
screenMove = graphZoom > 55 ? 16 >> (screenZoom - (graphZoom - 55)) : screenZoom > 4 ? 1 : 16 >> screenZoom;
if ((screenZoom > 4 ? graphZoom - screenZoom + 4 : graphZoom) < 24) {
graphMoveHigh = 0x00800000 >> (screenZoom > 4 ? graphZoom - screenZoom + 4 : graphZoom);
graphMoveLow = 0x00000000;
} else if ((screenZoom > 4 ? graphZoom - screenZoom + 4 : graphZoom) < 56) {
graphMoveHigh = 0x00000000;
graphMoveLow = 0x80000000 >> (screenZoom > 4 ? graphZoom - screenZoom + 4 : graphZoom) - 24;
} else {
graphMoveHigh = 0x00000000;
graphMoveLow = 0x00000001;
}
if (HUD == -1) {
string[0] = '\0';
sprintFrac(&string, 8, graphMoveHigh, graphMoveLow);
PrintMini(7, 12, string, 0);
sprintf(&string, "%u", graphZoom);
PrintMini(0, 22, string, 0);
sprintf(&string, "%u", screenZoom);
PrintMini(0, 28, string, 0);
sprintf(&string, "%u", screenMove);
PrintMini(0, 34, string, 0);
sprintf(&string, "%X", graphMoveLow);
PrintMini(0, 40, string, 0);
}
GetKey(&key);
switch (key) {
case KEY_CHAR_PLUS:
if (graphZoom < 65) {
graphZoom++;
screenZoom++;
}
break;
case KEY_CHAR_MINUS:
if (graphZoom) {
graphZoom--;
screenZoom--;
}
break;
case KEY_CTRL_UP:
screenY -= screenMove;
sum64(&graphHighY, &graphLowY, graphMoveHigh, graphMoveLow);
break;
case KEY_CTRL_DOWN:
screenY += screenMove;
sub64(&graphHighY, &graphLowY, graphMoveHigh, graphMoveLow);
break;
case KEY_CTRL_LEFT:
screenX -= screenMove;
sum64(&graphHighX, &graphLowX, graphMoveHigh, graphMoveLow);
break;
case KEY_CTRL_RIGHT:
screenX += screenMove;
sub64(&graphHighX, &graphLowX, graphMoveHigh, graphMoveLow);
break;
case KEY_CTRL_F1:
HUD = !HUD;
break;
case KEY_CTRL_F2:
if (colour > ML_TRANSPARENT)
colour--;
else
colour = ML_CHECKER;
break;
case KEY_CTRL_F3:
//Gray scale, by refreshing screen multiple times per sec at different max iterations (iMax)
break;
case KEY_CTRL_AC:
reset();
break;
case KEY_CTRL_EXE:
//SetTimer(1, 200, stop);
drawMandelbrot(graphHighX, graphLowX, graphHighY, graphLowY, graphZoom, iMax);
//KillTimer(1);
screenX = 0;
screenY = 0;
screenZoom = 1;
break;
}
iMax = 8 * (graphZoom + 3);
if (key != KEY_CTRL_AC && key != KEY_CTRL_EXE) {
RestoreDisp(1);
if (screenZoom < 8) {
screenX1 = 65 - divByPow(128, 2, screenZoom) + screenX;
screenX2 = 62 + divByPow(128, 2, screenZoom) + screenX;
screenY1 = 32 - (screenZoom > 6 ? 1 : divByPow(64, 2, screenZoom)) + screenY;
screenY2 = 31 + (screenZoom > 6 ? 0 : divByPow(64, 2, screenZoom)) + screenY;
ML_horizontal_line(screenY1, screenX1, screenX2, colour);
ML_horizontal_line(screenY2, screenX1, screenX2, colour);
ML_vertical_line(screenX1 - 1, screenY1, screenY2, colour);
ML_vertical_line(screenX2 + 1, screenY1, screenY2, colour);
} else
ML_pixel(screenX + 64, screenY + 31, colour);
if (HUD == TRUE) {
neg64(&graphHighX, &graphLowX);
sprintf(&string, "X:");
sprintFrac(&string, 8, graphHighX, graphLowX);
PrintMini(0, 0, string, 0);
neg64(&graphHighX, &graphLowX);
sprintf(&string, "Y:");
sprintFrac(&string, 8, graphHighY, graphLowY);
PrintMini(0, 6, string, 0);
sprintf(&string, "MaxI:%u", iMax);
PrintMini(0, 53, string, 0);
if (graphZoom > 32)
sprintf(&string, "Zoom:2^%ux", graphZoom - 1);
else if (graphZoom > 0)
sprintf(&string, "Zoom:%ux", 1 << graphZoom - 1);
else
sprintf(&string, "Zoom:0.5x");
PrintMini(0, 59, string, 0);
}
}
} while (key != KEY_CTRL_EXIT);
return 1;
}
#pragma section _BR_Size
unsigned long BR_Size;
#pragma section
#pragma section _TOP
int InitializeSystem(int isAppli, unsigned short OptionNum) {
return INIT_ADDIN_APPLICATION(isAppli, OptionNum);
}
#pragma section
mandelasm.src
.EXPORT _neg64
.EXPORT _abs64
.EXPORT _sum64
.EXPORT _sub64
.EXPORT _shld64
.ALIGN 4
_drawMandel: ;drawMandel(zoom, vram, yHighStart, yLowStart, xHighStart, xLowStart, iMax);
;r0 = temp0
;r1 = temp1
;r2 = highIsz, vram
;r3 = lowIsz
;r4 = zoom, xHigh
;r5 = vram, xLow
;r6 = yHighStart, yHigh
;r7 = yLowStart, yLow
;r8 = tempPixel
;r9 =
;r10 =
;r11 =
;r12 =
;r13 =
;r14 =
;@(,r15) = xHighStart
;@(,r15) = xLowStart
;@(,r15) = iMax
;@(,r15) = vram
;64bit Fixed Point number format - spilt between two 32bit variables
;1:7:56
;Sign:Int:Frac
;±:0000000:00000000000000000000000000000000000000000000000000000000
;±0000000.000000000000000000000000,00000000000000000000000000000000
;high = ±0000000.000000000000000000000000
;low = 00000000000000000000000000000000
;stc.l gbr, @-r15 ;gbr PUSH!! 0
mov.l r8, @-r15 ;r8 PUSH!! 1
mov.l r9, @-r15 ;r9 PUSH!! 2
mov.l r10, @-r15 ;r10 PUSH!! 3
mov.l r11, @-r15 ;r11 PUSH!! 4
mov.l r12, @-r15 ;r12 PUSH!! 5
mov.l r13, @-r15 ;r13 PUSH!! 6
mov.l r14, @-r15 ;r14 PUSH!! 7
sts.l pr, @-r15 ;pr PUSH!! 8
;ldc r15, gbr ;gbr = r15
;r2 = lowIsz //±0000000.000000000000000000000000
;r3 = highIsz //00000000000000000000000000000000
;r4 = zoom //0 - 56
;highIsz = 0x00100000 >> zoom;
neg r4, r4 ;zoom = -zoom
mov.l #1048576, r2 ;highIsz = 0x00100000
shld r4, r2 ;highIsz >>= zoom
;lowIsz = zoom < 21 ? 0x80000000 >> zoom - 21 : 0;
mov.l #2147483648,r3 ;lowIsz = 0x80000000
add #21, r4 ;zoom += 21
cmp/pl r4 ;T = zoom > 0
bt/s highBits ;if(T == 1) branch high //+delay Slot
shld r4, r3 ;lowIsz >>= zoom
mov #0, r2 ;highIsz = 0
highBits:
mov.l r2, @-r15 ;highIsz PUSH!! 9
mov.l r3, @-r15 ;lowIsz PUSH!! 10
;Main Loops
mov.l r5, @-r15 ;vram PUSH!! 11
mov #0, r8 ;tempPixel = 0
mov #64, r0 ;row = 64
row: ;for (row = 64; row > 0; row--) {
mov.l r0, @-r15 ;row PUSH!! 12
mov @(48,r15), r4 ;xHighStart
mov @(52,r15), r5 ;xLowStart
;char* LCD_register_selector = (char*)0xB4000000
mov.l #3019898880,r1
;*LCD_register_selector = 4
mov #4, r3
mov.b r3, @r1
;*LCD_data_register = row | 192
add #-1, r0
xor #255, r0
;char* LCD_data_register = (char*)0xB4010000
mov.l #3019964416,r2
mov.b r0, @r2
;*LCD_register_selector = 4
mov.b r3, @r1
;*LCD_data_register = 0
mov #0, r3
mov.b r3, @r2
;*LCD_register_selector = 7
mov #7, r3
mov.b r3, @r1
mov.l #128, r1 ;col = 128
col: ;for (col = 128; col > 0; col--) {
mov @(56,r15), r10 ;iMax
mov.l r6, @-r15 ;yHigh PUSH!! 13
mov.l r7, @-r15 ;yLow PUSH!! 14
mov.l r4, @-r15 ;xHigh PUSH!! 15
mov.l r5, @-r15 ;xLow PUSH!! 16
mov.l r1, @-r15 ;col PUSH!! 17
mov.l r8, @-r15 ;tempPixel PUSH!! 18
innerLoop: ;for (i = iMax; i != 0; i--) {
;r4 = zrHigh = xHigh
;r5 = zrLow = xLow
;r6 = ziHigh = yHigh
;r7 = ziLow = yLow
mov r4, r8 ;zrHigh
mov r5, r9 ;zrLow
bsr sq64 ;zr2 = zr * zr
nop
mov r0, r13 ;zr2High
mov r1, r14 ;zr2Low
mov r6, r4 ;ziHigh
mov r7, r5 ;ziLow
bsr sq64 ;zi2 = zi * zi
nop
mov r0, r11 ;zi2High
mov r1, r12 ;zi2Low
;if (zr2 + zi2 > 4)
addc r14, r1 ;zi2Low += zr2Low
addc r13, r0 ;zi2High += zr2High
mov.l #67108864, r1 ;0x04000000 //±0000100.000000000000000000000000
cmp/ge r0, r1 ;T = 0x04000000 >= zi2High
bf exitInnerLoop ;if(T == 0) branch exitInnerLoop
;zi *= zr;
mov r8, r4 ;zrHigh
mov r9, r5 ;zrLow
bsr mul64 ;zi *= zr
nop
;zi += zi + y;
clrt
addc r1, r1 ;ziLow += ziLow
addc r0, r0 ;ziHigh += ziHigh
mov.l @(16,r15), r7 ;yLow
mov.l @(20,r15), r6 ;yHigh
clrt
addc r1, r7 ;ziLow += yLow
addc r0, r6 ;ziHigh += yHigh
;zr = zr2 - zi2 + x;
clrt
subc r12, r14 ;zr2Low -= zi2Low
subc r11, r13 ;zr2High -= zi2High
mov.l @(8,r15), r5 ;xLow
mov.l @(12,r15), r4 ;xHigh
clrt
addc r14, r5 ;xLow += zr2Low
addc r13, r4 ;xHigh += zr2High
dt r10 ;T = i-- == 0
bf innerLoop ;if(T == 0) branch innerLoop
exitInnerLoop:
mov.l @r15+, r8 ;tempPixel POP!! 18
rotcl r8 ;tempPixel = (tempPixel << 1) + T
mov.l @r15+, r1 ;col POP!! 17
mov r1, r0 ;col
and #7, r0 ;col &= 7
cmp/eq #1, r0 ;T = col == 1
bf bypassVRAM ;if(T == 0) branch bypassVRAM
mov.l #3019964416,r2 ;LCD_data_register = 0xB4010000
mov.b r8, @r2 ;*LCD_data_register = tempPixel
mov @(20,r15), r0 ;vram
mov.b r8, @r0 ;*vram = tempPixel
add #1, r0 ;vram++
mov r0, @(20,r15) ;vram
bypassVRAM:
mov @(24,r15), r3 ;lowIsz
mov @(28,r15), r2 ;highIsz
mov.l @r15+, r5 ;xLow POP!! 16
mov.l @r15+, r4 ;xHigh POP!! 15
clrt
addc r3, r5 ;xLow += lowIsz; T = Carry
addc r2, r4 ;xHigh += highIsz + T
mov.l @r15+, r7 ;yLow POP!! 14
mov.l @r15+, r6 ;yHigh POP!! 13
dt r1 ;T = col-- == 0
bf col ;if(T == 0) branch col
clrt
addc r3, r7 ;yLow += lowIsz; T = Carry
addc r2, r6 ;yHigh += highIsz + T
mov.l @r15+, r0 ;row POP!! 12
dt r0 ;T = row-- == 0
bf row ;if(T == 0) branch row
mov.l @r15+, r0 ;vram POP!! 11
mov.l @r15+, r0 ;highIsz POP!! 10
mov.l @r15+, r0 ;lowIsz POP!! 9
lds.l @r15+, pr ;pr POP!! 8
mov.l @r15+, r14 ;r14 POP!! 7
mov.l @r15+, r13 ;r13 POP!! 6
mov.l @r15+, r12 ;r12 POP!! 5
mov.l @r15+, r11 ;r11 POP!! 4
mov.l @r15+, r10 ;r10 POP!! 3
mov.l @r15+, r9 ;r9 POP!! 2
mov.l @r15+, r8 ;r8 POP!! 1
rts
nop
sq64: ;Square 64bit number ;sq64(nHigh, nLow);
;r0 = outHigh
;r1 = outLow
;r2 = tempLower
;r3 = temp
;r4 = nHigh
;r5 = nLow
cmp/pz r4 ;T = nHigh >= 0
bt sqPositiveIn ;if(T == 1) branch sqPositiveIn
negc r5, r5 ;nLow = -nLow
negc r4, r4 ;nHigh = -nHigh
sqPositiveIn: ;n < 0 ? -n : n
dmulu.l r5, r4 ;nLow * nHigh
sts macl, r2 ;tempLower
shll r2
sts mach, r1
addc r1, r1
movt r0
dmulu.l r5, r5 ;nLow * nLow
clrt
sts mach, r3
addc r3, r2
dmulu.l r4, r4 ;nHigh * nHigh
sts macl, r3
addc r3, r1
sts mach, r3
addc r3, r0
; XXXXXXXX YYYYYYYY ZZZZZZZZ WWWWWWWW << 8
;XX XXXXXXYY YYYYYYZZ ZZZZZZWW WWWWWW
mov r1, r5
shll8 r0 ;000000xx XXXXXX00
shll8 r1 ;000000yy YYYYYY00
mov #-24, r3 ;8-32
shld r3, r5 ;000000YY yyyyyy00
shld r3, r2 ;000000ZZ zzzzzz00
clrt
addc r2, r1 ;outLow = YYYYYY00 + 000000ZZ
addc r5, r0 ;outHigh = XXXXXX00 + 000000YY
rts
nop
mul64: ;mul64(x1, x0, y1, y0)
; r4, r5, r6, r7
;Decimal point is 8bits right, from the left 1:7:56 Sign:Int:Frac
mov.l r10, @-r15
mov.l r9, @-r15
mov.l r8, @-r15
mov.l #0, r0
mov.l #0, r9
cmp/pz r4
bt mulPositiveX
negc r5, r5
negc r4, r4
xor #1, r0
mulPositiveX:
cmp/pz r6
bt mulPositiveY
negc r7, r7
negc r6, r6
xor #1, r0
mulPositiveY:
mov.l #0, r1
mov r0, r10
dmulu.l r5, r7 ;x0 * y0
sts mach, r2
dmulu.l r5, r6 ;x0 * y1
clrt
sts macl, r8
addc r8, r2
sts mach, r8
dmulu.l r4, r7 ;x1 * y0
addc r8, r1
movt r0
clrt
sts macl, r8
addc r8, r2
sts mach, r8
dmulu.l r4, r6 ;x1 * y1
addc r8, r1
addc r9, r0
clrt
sts macl, r8
addc r8, r1
sts mach, r8
addc r8, r0
; XXXXXXXX YYYYYYYY ZZZZZZZZ WWWWWWWW
;XX XXXXXXYY YYYYYYZZ ZZZZZZWW WWWWWW
mov r1, r5
shll8 r0 ; XXXXXX
shll8 r1 ; YYYYYY
mov #-24, r8 ;8-32 //Right
shld r8, r5 ;YY
shld r8, r2 ;ZZ
add r5, r0
add r2, r1
cmp/eq r9, r10
bt mulPositiveN
negc r1, r1 ;outLow
negc r0, r0 ;outHigh
mulPositiveN:
mov.l @r15+, r8
mov.l @r15+, r9
mov.l @r15+, r10
rts
nop
_neg64: ;neg64(&high, &low)
clrt
mov.l @r5, r0
negc r0, r0
mov.l r0, @r5
mov.l @r4, r0
negc r0, r0
mov.l r0, @r4
rts
nop
_abs64: ;abs64(&high, &low)
mov.l @r4, r0
cmp/pz r0
bt/s positiveABS
clrt
mov.l @r5, r1
negc r1, r1
negc r0, r0
mov.l r1, @r5
positiveABS:
rts
mov.l r0, @r4
_sum64: ;sum64(&highN, &lowN, highM, lowM)
clrt
mov.l @r5, r0
addc r7, r0
mov.l r0, @r5
mov.l @r4, r0
addc r6, r0
rts
mov.l r0, @r4
_sub64: ;sub64(&highN, &lowN, highM, lowM)
clrt
mov.l @r5, r0
subc r7, r0
mov.l r0, @r5
mov.l @r4, r0
subc r6, r0
rts
mov.l r0, @r4
.ALIGN 4
.END
Ajouté le 21/01/2020 à 08:22 :
How can I use Timer() in C to stop the mandelbrot from rendering? (after a keypress)
Or if I code it in ASM, what's the code for it?
Having it run every 4 rows instead of every 2sec, means you have to wait a long time if the max iteration level is super high
and having it run every 8 pixels or so, isn't needed for lower iterations that finish in less than a sec
Citer : Posté le 21/01/2020 18:39 | #
You should use SetTimer() to start the timer then KillTimer() to stop it. You can find both of them in the fxlib documentation or here in French.