Mandelbrot Generator
Posté le 15/06/2019 10:38
Hello allô
Default 1x zoom takes 7sec
Max zoom takes around 5-10min
It has a max zoom of 2^50: over one Quadrillion!
Going over 2^48 can be rather buggy
This is because numbers are limited to the 8 byte double variables
Attached file is both SH4 and SH3 compatible:
MANDEL.G1A
This does need the 'MonochromeLib' libs the code comes with it now
Controls
[-] Zoom out
[+] Zoom in
[F1] Hide/show HUD which contains Cords, Zoom level and Max Iterations. (Heads Up Display)
[F2] Changes colours of camera rectangle: Black, White & Inverted
[AC] Resets screen back to default state
[EXE] Draw set
[EXIT] Stop drawing the Mandelbrot (If it's taking too long)
[MENU] Return to the menu screen
[REPLAY] Move camera rectangle around (Arrow Keys: [LEFT], [RIGHT], [UP], [DOWN])
How can I optimize this code to run faster or zoom in further?
#include "fxlib.h"
#include "stdio.h"
#define TRUE 1
#define FALSE 0
#define ML_vram_adress (*(sc_cpv)sc0135)
typedef enum { ML_TRANSPARENT = -1, ML_WHITE, ML_BLACK, ML_XOR, ML_CHECKER } ML_Color;
typedef char* (*sc_cpv)(void);
const unsigned int sc0135[] = { 0xD201D002, 0x422B0009, 0x80010070, 0x0135 };
unsigned int key; //pause until key press
int kcode1, kcode2; //row & col keycode for Bkey_GetKeyWait()
char unused; //unused (cause CASIO dumb dumb)
unsigned short dispX, dispY; //cords on display when drawing mandelbrot
void ML_clear_vram() {
int i, end, * pointer_long, vram;
char* pointer_byte;
vram = (int)ML_vram_adress();
end = 4 - vram & 3;
pointer_byte = (char*)vram;
for (i = 0; i < end; i++) pointer_byte[i] = 0;
pointer_long = (int*)(vram + end);
for (i = 0; i < 255; i++) pointer_long[i] = 0;
pointer_byte += 1020 + end;
end = vram & 3;
for (i = 0; i < end; i++) pointer_byte[i] = 0;
}
void ML_display_vram() {
char* LCD_register_selector = (char*)0xB4000000, * LCD_data_register = (char*)0xB4010000, * vram;
int i, j;
vram = ML_vram_adress();
for (i = 0; i < 64; i++) {
*LCD_register_selector = 4;
*LCD_data_register = i | 192;
*LCD_register_selector = 4;
*LCD_data_register = 0;
*LCD_register_selector = 7;
for (j = 0; j < 16; j++)
*LCD_data_register = *vram++;
}
}
void ML_display_vram_row(int row) { //faster than ML_display_vram() which displays the entire screen instead of a single row
unsigned char i;
char* LCD_register_selector = (char*)0xB4000000, *LCD_data_register = (char*)0xB4010000, *vram;
vram = (row << 4) + ML_vram_adress();
*LCD_register_selector = 4;
*LCD_data_register = row | 192;
*LCD_register_selector = 4;
*LCD_data_register = 0;
*LCD_register_selector = 7;
for (i = 0; i < 16; i++)
* LCD_data_register = *vram++;
}
void ML_horizontal_line(int y, int x1, int x2, ML_Color color) {
int i;
char checker;
char* vram = ML_vram_adress();
if (y & ~63 || (x1 < 0 && x2 < 0) || (x1 > 127 && x2 > 127))
return;
if (x1 > x2) {
i = x1;
x1 = x2;
x2 = i;
}
if (x1 < 0)
x1 = 0;
if (x2 > 127)
x2 = 127;
switch (color) {
case ML_BLACK:
if (x1 >> 3 != x2 >> 3) {
vram[(y << 4) + (x1 >> 3)] |= 255 >> (x1 & 7);
vram[(y << 4) + (x2 >> 3)] |= 255 << 7 - (x2 & 7);
for (i = (x1 >> 3) + 1; i < x2 >> 3; i++)
vram[(y << 4) + i] = 255;
} else
vram[(y << 4) + (x1 >> 3)] |= (255 >> (x1 % 8 + 7 - x2 % 8)) << (7 - (x2 & 7));
break;
case ML_WHITE:
if (x1 >> 3 != x2 >> 3) {
vram[(y << 4) + (x1 >> 3)] &= 255 << 8 - (x1 & 7);
vram[(y << 4) + (x2 >> 3)] &= 255 >> 1 + (x2 & 7);
for (i = (x1 >> 3) + 1; i < x2 >> 3; i++)
vram[(y << 4) + i] = 0;
} else
vram[(y << 4) + (x1 >> 3)] &= (255 << 8 - (x1 & 7)) | (255 >> 1 + (x2 & 7));
break;
case ML_XOR:
if (x1 >> 3 != x2 >> 3) {
vram[(y << 4) + (x1 >> 3)] ^= 255 >> (x1 & 7);
vram[(y << 4) + (x2 >> 3)] ^= 255 << 7 - (x2 & 7);
for (i = (x1 >> 3) + 1; i < (x2 >> 3); i++)
vram[(y << 4) + i] ^= 255;
} else
vram[(y << 4) + (x1 >> 3)] ^= (255 >> ((x1 & 7) + 7 - (x2 & 7))) << (7 - (x2 & 7));
break;
case ML_CHECKER:
checker = (y & 1 ? 85 : 170);
if (x1 >> 3 != x2 >> 3) {
vram[(y << 4) + (x1 >> 3)] &= 255 << 8 - (x1 & 7);
vram[(y << 4) + (x2 >> 3)] &= 255 >> 1 + (x2 & 7);
vram[(y << 4) + (x1 >> 3)] |= checker & 255 >> (x1 & 7);
vram[(y << 4) + (x2 >> 3)] |= checker & 255 << 7 - (x2 & 7);
for (i = (x1 >> 3) + 1; i < x2 >> 3; i++)
vram[(y << 4) + i] = checker;
} else {
vram[(y << 4) + (x1 >> 3)] &= (255 << 8 - (x1 & 7)) | (255 >> 1 + (x2 & 7));
vram[(y << 4) + (x1 >> 3)] |= checker & (255 >> (x1 % 8 + 7 - x2 % 8)) << (7 - (x2 & 7));
}
break;
}
}
void ML_vertical_line(int x, int y1, int y2, ML_Color color) {
int i, j;
char checker, byte, * vram = ML_vram_adress();
if (x & ~127 || (y1 < 0 && y2 < 0) || (y1 > 63 && y2 > 63)) return;
if (y1 > y2) {
int tmp = y1;
y1 = y2;
y2 = tmp;
}
if (y1 < 0) y1 = 0;
if (y2 > 63) y2 = 63;
i = (y1 << 4) + (x >> 3);
j = (y2 << 4) + (x >> 3);
switch (color) {
case ML_BLACK:
byte = 128 >> (x & 7);
for (; i <= j; i += 16)
vram[i] |= byte;
break;
case ML_WHITE:
byte = ~(128 >> (x & 7));
for (; i <= j; i += 16)
vram[i] &= byte;
break;
case ML_XOR:
byte = 128 >> (x & 7);
for (; i <= j; i += 16)
vram[i] ^= byte;
break;
case ML_CHECKER:
byte = 128 >> (x & 7);
checker = y1 & 1 ^ x & 1;
for (; i <= j; i += 16) {
if (checker) vram[i] &= ~byte;
else vram[i] |= byte;
checker = !checker;
}
break;
}
}
void ML_pixel(int x, int y, ML_Color color) {
char* vram = ML_vram_adress();
if (x & ~127 || y & ~63) return;
switch (color) {
case ML_BLACK:
vram[(y << 4) + (x >> 3)] |= 128 >> (x & 7);
break;
case ML_WHITE:
vram[(y << 4) + (x >> 3)] &= ~(128 >> (x & 7));
break;
case ML_XOR:
vram[(y << 4) + (x >> 3)] ^= 128 >> (x & 7);
break;
case ML_CHECKER:
if (y & 1 ^ x & 1) vram[(y << 4) + (x >> 3)] &= ~(128 >> (x & 7));
else vram[(y << 4) + (x >> 3)] |= 128 >> (x & 7);
break;
}
}
double divByPow(double n, double x, int p) { //Divide OR Times n by x, p times (n / x^p): used for numbers bigger than 2^32 (int limit)
if (p < 0)
for (; p < 0; p++)
n *= x;
else
for (; p > 0; p--)
n /= x;
return n;
}
void stop(void) { //stops drawing set if user presses [EXIT] or [MENU]
if (Bkey_GetKeyWait(&kcode1, &kcode2, 1, 0, 1, &unused))
if (kcode1 == 4 && (kcode2 == 8 || kcode2 == 9)) {
dispX = 128; //Very hacky stop function
dispY = 64;
}
}
int AddIn_main(int isAppli, unsigned short OptionNum) { //Main function
unsigned int graphZoom = 1; //zoom level for graph
char screenZoom; //zoom level on screen (rectangle)
int screenX1, screenX2; //corner X cords for drawing rectangle to screen
int screenY1, screenY2; //corner Y cords for drawing rectangle to screen
unsigned char string[1]; //Used in converting int/double to char
char HUD = TRUE; //Heads Up Display: Cords, Zoom level & Max iteration: toggle with [F1]
char colour = ML_XOR; //Colour of rectangle: Black, White or Inverted
int screenX, screenY; //offset cords on screen from 0,0 for rectangle
double graphX = 0, graphY = 0; //cords on graph - where to center mandelbrot
double graphMove; //amount graphX & Y changes by when moving rectangle around
int screenMove; //amount screenX & Y changes by when moving rectangle around with arrow keys
short tempPixel = 0; //Write pixels to temp variable then write the entire 2bytes to VRAM all at once
register double zr, zi; //zr is real, zi imaginary
register double zr2, zi2; //zr2 = zr^2, zi2 = zi^2
register double x1 = -2.0; //bounding box cords on graph
register double x2 = 2.0; //bounding box cords on graph
register double y1 = -1.0; //bounding box cords on graph
register double y2 = 1.0; //bounding box cords on graph
register double x, y; //pixel cords on graph tested if in set
register double xIsz, yIsz; //amount x/y increases by when ploting graph
register unsigned short iMax = 32; //max iterations
register unsigned short i; //iterations
while (TRUE) {
register char* vram = ML_vram_adress();
SetTimer(1, 200, stop);
ML_clear_vram();
ML_display_vram();
xIsz = (x2 - x1) / 128;
yIsz = (y2 - y1) / 64;
y = y1;
for (dispY = 0; dispY < 64; dispY++) {
x = x1;
y += yIsz;
for (dispX = 0; dispX < 128; dispX++) {
zr = x;
zi = y;
for (i = 0; i < iMax; i++) {
zr2 = zr * zr;
zi2 = zi * zi;
if (zr2 + zi2 > 4)
break;
zi = zr * zi;
zi += zi + y;
zr = zr2 - zi2 + x;
}
tempPixel = (tempPixel << 1) | (i == iMax);
if ((dispX & 7) == 7)
*vram++ = tempPixel;
x += xIsz;
}
ML_display_vram_row(dispY);
}
SaveDisp(1);
KillTimer(1);
screenX = 0;
screenY = 0;
screenZoom = 1;
Bkey_GetKeyWait(&kcode1, &kcode2, 2, 1, 1, &unused);
do {
GetKey(&key);
screenMove = screenZoom > 4 ? 1 : divByPow(16, 2, screenZoom);
graphMove = screenZoom > 4 ? divByPow(1, 2, graphZoom - (double)screenZoom) : divByPow(16, 2, graphZoom);
switch (key) {
case KEY_CHAR_PLUS:
if (graphZoom < 51) {
graphZoom++;
screenZoom++;
}
break;
case KEY_CHAR_MINUS:
if (graphZoom) {
graphZoom--;
screenZoom--;
}
break;
case KEY_CTRL_UP:
screenY -= screenMove;
graphY -= graphMove;
break;
case KEY_CTRL_DOWN:
screenY += screenMove;
graphY += graphMove;
break;
case KEY_CTRL_LEFT:
screenX -= screenMove;
graphX -= graphMove;
break;
case KEY_CTRL_RIGHT:
screenX += screenMove;
graphX += graphMove;
break;
case KEY_CTRL_F1:
HUD = !HUD;
break;
case KEY_CTRL_F2:
if (colour)
colour--;
else
colour = ML_XOR;
break;
case KEY_CTRL_F3:
//Gray scale, by refreshing screen multiple times per sec at different max iterations (iMax)
break;
case KEY_CTRL_AC:
graphZoom = 1;
graphX = 0;
graphY = 0;
screenZoom = 1;
screenX = 0;
screenY = 0;
key = KEY_CTRL_EXE;
break;
}
RestoreDisp(1);
iMax = 8 * (graphZoom + 3);
if (screenZoom < 8) {
screenX1 = 65 - divByPow(128, 2, screenZoom) + screenX;
screenX2 = 62 + divByPow(128, 2, screenZoom) + screenX;
screenY1 = 32 - (screenZoom > 6 ? 1 : divByPow(64, 2, screenZoom)) + screenY;
screenY2 = 31 + (screenZoom > 6 ? 0 : divByPow(64, 2, screenZoom)) + screenY;
ML_horizontal_line(screenY1, screenX1, screenX2, colour);
ML_horizontal_line(screenY2, screenX1, screenX2, colour);
ML_vertical_line(screenX1 - 1, screenY1, screenY2, colour);
ML_vertical_line(screenX2 + 1, screenY1, screenY2, colour);
} else
ML_pixel(screenX + 64, screenY + 31, colour);
x1 = divByPow(-4, 2, graphZoom) + (0.03125 * graphX);
x2 = divByPow(4, 2, graphZoom) + (0.03125 * graphX);
y1 = divByPow(-2, 2, graphZoom) + (0.03125 * graphY);
y2 = divByPow(2, 2, graphZoom) + (0.03125 * graphY);
if (HUD == TRUE) {
sprintf(&string, "X1:%f", x1);
PrintMini(0, 0, string, 0);
sprintf(&string, "Y1:%f", y1);
PrintMini(0, 6, string, 0);
sprintf(&string, "X2:%f", x2);
PrintMini(81, 53, string, 0);
sprintf(&string, "Y2:%f", y2);
PrintMini(81, 59, string, 0);
sprintf(&string, "MaxI:%u", iMax);
PrintMini(0, 53, string, 0);
if (graphZoom > 32)
sprintf(&string, "Zoom:2^%ux", graphZoom - 1);
else
sprintf(&string, "Zoom:%ux", (int)divByPow(1, 2, -graphZoom + 1));
PrintMini(0, 59, string, 0);
}
ML_display_vram();
} while (key != KEY_CTRL_EXE);
}
return 0;
}
#pragma section _BR_Size
unsigned long BR_Size;
#pragma section
#pragma section _TOP
int InitializeSystem(int isAppli, unsigned short OptionNum) {
return INIT_ADDIN_APPLICATION(isAppli, OptionNum);
}
#pragma section
Fichier joint
Citer : Posté le 13/11/2019 06:00 | #
That's correct. You need the lowest mul, you just don't need the macl of that mul
Citer : Posté le 13/11/2019 21:49 | #
on the sh3_manual at 8.2.61 SHLD (Shift Logical Dynamically): Shift Instruction in the Description: it says The T bit is the last shifted bit of Rn.
but the rest of the section never explains anything else of the T bit. Is this a mistake?
Citer : Posté le 13/11/2019 21:52 | #
The T (Test) bit refers to that specific bit of the status register that is used for comparisons and jumps. There is not much more to say. Basically this instruction shifts the first operand and the last bit shifted out ends up in T so you can use movt to retrieve it or a conditional branch such as bt to jump depending on its value. More importantly, you can use rotate-with-carry instructions to reinsert it somewhere else.
Did you expect something else specifically? :o
Citer : Posté le 13/11/2019 21:59 | #
That sentenace is the only place in the entire instruction description that the T bit is mentioned
In the Abstract it doesn't show anything about the T bit Rn << Rm → Rn (Rm ≥ 0), Rn >> Rm → [0 → Rn], (Rm < 0)
also says the T bit isn't used T Bit: -- (normally if it is used, it says T Bit: LSB or MSB, Carry, Overflow, 1, 0 etc)
and in the example code, the T bit is never shown, even tho it should be
;After execution R1 = H'FFFFFFEC, R2 = H'00000801
SHLD R3,R4 ;Before execution R3 = H'00000014, R4 = H'FFFFF801
;After execution R3 = H'00000014, R4 = H'80100000
Citer : Posté le 13/11/2019 22:01 | #
Oh, sorry, that's what you mean. I was confused by the "explain".
Well, there are inconsistencies sometimes. The only way to know is to try out. But after re-reading the relevant documentation I would doubt the T bit is really set. It is not set for shad and the uses cases look slim. Also, the pseudo-code representations are accurate in my experience.
Citer : Posté le 14/11/2019 05:28 | #
With Karatsuba algorithm you need a 33bit multiplier
and for some reason when I input negatives, the anwser gets +2 00000000 00000000 added to it (which I think is caused by overflow, due to 32bit mul)
Citer : Posté le 14/11/2019 09:20 | #
This is true with the basic formula, but if I remember well you can use a difference and an absolute value, ie. you can compute |x1-x0|×|y1-y0| instead of (x1+x0)×(y1+y0). This choice ensures that both operands are 32-bit only. Then you put back the sign in the result.
Also here's a cool trick to know if the sign is needed. When you have x1-x0 and y1-y0 you can xor them together and shift the result left (remember the top bit goes to T). In two operations only, T will become 1 if and only if the result of the product is negative.
It is true that the cost of the additional operations might by more than that of the multiplication. Honestly, I don't know exactly which will be faster. Karatsuba's method is especially fast when multiplying large polynomials and stuff where multiplying is really expensive (although for extremely large stuff you have the FFT).
Citer : Posté le 14/11/2019 09:49 | #
There still is the problem when it adds the high and low digits of x together, it can overflow (or y)
z2 = x1 * y1
z0 = x0 * y0
z1 = (x1 + x0) * (y1 + y0) - z2 - z0
x1+x0 //this can overflow 32bits very easily. Even without negatives
x1 = 1
x0 = FFFFFFFF
00000001 + FFFFFFFF = 1 00000000 > 32bits
Citer : Posté le 14/11/2019 10:07 | #
Yes exactly, this is why I explained how to use a difference instead.
z2 = x1 * y1
z0 = x0 * y0
x1 = (x1 - x0) * (y0 - y1) + z2 + z0
Note how x1 - x0 cannot overfow when both are positive numbers. So if you pull out the sign first after computing the difference, you can multiply with the normal 32-it multiplier because there cannot be any overflow, then you put back the sign.
Citer : Posté le 14/11/2019 10:13 | #
ohh... didn't see the subtraction
Mrvoxy Invité
Citer : Posté le 15/11/2019 16:08 | #
All this optimization is really cool, but I think I have found a bug of a different type:
Pressing [EXIT] does not stop generation! This makes it impossible to stop the calculator from drawing the set, which may take 1 minute at high iterations.
Could this have been introduced when the iteration code was changed?
This may be related to the fact that pressing [EXIT] when not drawing the set brings up the HUD and camera rectangle... I do not think this is intended.
Citer : Posté le 15/11/2019 17:40 | #
As a quick note for Redcmd, I would suggest using a timer to check for key presses so that the interference of keyboard management is limited.
Citer : Posté le 15/11/2019 20:11 | #
There is currently a timer set to 200 that checks if the [EXIT] or [MENU] key is pressed
200 is about 1.56sec, so hold the button down for atleast 2sec to exit
after the set has been drawn, pressing any key will bring up the HUD and camera rectangle
Citer : Posté le 15/11/2019 22:01 | #
And?
(sorry redmc)
Citer : Posté le 15/11/2019 22:59 | #
sorry @Youstones wasn't talking to you
I'm not expecting everyone to answer in English
I was talking to the other guy thats been spamming
Mrvoxy Invité
Citer : Posté le 16/11/2019 01:41 | #
I tried holding MENU, EXIT, and MENU and EXIT for 2, 5, and even 10 seconds, but the fractal just kept on drawing
I use fx-9750GII with G75+E OS. I tried both the original MANDEL-15760.g1a and the one that I ran through SH4 compatibility tool in case the keyboard routine differences cause this, neither worked. I do not think it is some freak file corruption, since I also reinstalled 3 times.
Maybe I do not have the latest file?
Citer : Posté le 16/11/2019 02:39 | # | Fichier joint
Fixed two bugs:
Pressing [EXIT] or [MENU] while it was drawing, didn't stop it (I forgot to upload the correct file ages ago)
If you zoomed in once, then zoomed in by a huge amount all at once, moving the rectangle around didn't actually change where the set was drawn
oh and its 10kb smaller now
MANDEL.G1A
Mrvoxy Invité
Citer : Posté le 16/11/2019 02:54 | #
Very cool!
But how do I download? The link in the post still points to the old version, and the "attached file" says it is 20KB, but when I click, it still points to the 30KB version.
Citer : Posté le 16/11/2019 03:16 | #
Seems Im not able to update the file in the original post.
MANDEL.G1A
Mrvoxy Invité
Citer : Posté le 16/11/2019 03:41 | #
The functionality has increased!
But now there is some debug information in the middle of the screen, showing a decimal that changes on camera movement and two integers that change on zoom whenever the HUD is displayed...
Also is a way to toggle what information appears on the HUD when brought up (can select whether points, zoom/iterations is shown or not individually) planned?
Citer : Posté le 16/11/2019 03:46 | # | Fichier joint
oops
I removed it in code, but forgot to recompile
Once I have got the 64bit Fixed Point assembly code all working
I'll start adding more features
- Gray Scale
- Higher zoom level
- Faster rendering
- Customizable Iterations
- Customizable HUD
- Julia Set
MANDEL.G1A