Hello again, thanks for the code
but it still doesn't work
Nvidia doesn't like reversing numbers it seems
however, if I comments anyone of the four affectations, it builds
After trying by decomposing the reversing in many ways, I abandoned this path to bypass the problem.
I made a first version running with byte reversing and comparaisons made in the python script, and a second version with a function that make the comparison directly between a word and a reversed word in the OpenCl code. For now this version works only on big endian memory mapping (I hope it's what is used in the GPUs).
Note: I added somme print to the screen to show some informations when running the script.
I also let some comments in the code.
The two versions build and run without error, but I'm not sure that operations are correct.
As expected the second version is more effective (about 14800 khashs/sec with the 1st version, and 15200 khashs/sec with the 2nd version).
Version 1 : reversing and comparison in the python script
--- F:/Desktop/m0mchil-poclbm-cf33815/poclbm.py Fri Oct 15 08:48:48 2010
+++ F:/Desktop/m0mchil-poclbm-cf33815/poclbm_nv1.py Mon Oct 18 23:36:46 2010
@@ -21,6 +21,9 @@
t2=(rot(a, 30)^rot(a, 19)^rot(a, 10))+((a&b)|(c&(a|b)))
return (uint32(d + t1), uint32(t1+t2))
+def bytereverse(x):
+ return (((x & 0x000000ffL) << 24) | ((x & 0x0000ff00L) << 8) | ((x & 0x00ff0000L) >> 8) | ((x & 0xff000000L) >> 24))
+
def sysWrite(format, args=()):
sys.stdout.write('
' + format % args)
sys.stdout.flush()
@@ -50,16 +53,40 @@
context = cl.Context([devices[options.device]], None, None)
else:
print 'No device specified, you may use -d to specify one of the following
'
+ # create_some_context should ask for the platform and device to use
context = cl.create_some_context()
+
+device = context.devices[0]
+print "==============================================================="
+print "Platform name:", platform.name
+print "Platform profile:", platform.profile
+print "Platform vendor:", platform.vendor
+print "Platform version:", platform.version
+print "---------------------------------------------------------------"
+print "Device name:", device.name
+print "Device type:", cl.device_type.to_string(device.type)
+print "Device memory: ", device.global_mem_size//1024//1024, 'MB'
+print "Device max clock speed:", device.max_clock_frequency, 'MHz'
+print "Device compute units:", device.max_compute_units
+print "==============================================================="
+
queue = cl.CommandQueue(context)
-kernelFile = open('btc_miner.cl', 'r')
+kernelFile = open('btc_miner_nv1.cl', 'r')
miner = cl.Program(context, kernelFile.read()).build()
kernelFile.close()
+print "==============================================================="
+print "build OK"
+print "==============================================================="
+
if (options.worksize == -1):
options.worksize = miner.search.get_work_group_info(cl.kernel_work_group_info.WORK_GROUP_SIZE, context.devices[0])
+print "==============================================================="
+print "worksize = ", options.worksize
+print "==============================================================="
+
frames = options.frames
frame = float(1)/frames
window = frame/30
@@ -75,7 +102,7 @@
work['extraNonce'] = 0
work['block'] = ''
-output = np.zeros(2, np.uint32)
+output = np.zeros(6, np.uint32)
while True:
try:
@@ -117,10 +144,12 @@
output_buf = cl.Buffer(context, mf.WRITE_ONLY | mf.USE_HOST_PTR, hostbuf=output)
rate = start = time()
+ success = 0
+ nonce = 0
while True:
- if (output[0]):
- work['block'] = work['block'][:152] + pack('I', long(output[1])).encode('hex') + work['block'][160:]
- sysWriteLn('found: %s, %s', (output[1], datetime.now().strftime("%d/%m/%Y %H:%M")))
+ if (success):
+ work['block'] = work['block'][:152] + pack('I', long(nonce)).encode('hex') + work['block'][160:]
+ sysWriteLn('found: %s, %s', (nonce, datetime.now().strftime("%d/%m/%Y %H:%M")))
break
if (time() - start > options.askrate or base + globalThreads == 0x7FFFFFFF):
@@ -131,8 +160,20 @@
base = 0x7FFFFFFF - globalThreads
kernelStart = time()
- miner.search(queue, (globalThreads, ), (options.worksize, ), block2[0], block2[1], block2[2], state[0], state[1], state[2], state[3], state[4], state[5], state[6], state[7], state2[1], state2[2], state2[3], state2[5], state2[6], state2[7], target[6], pack('I', base), output_buf)
+ miner.search(queue, (globalThreads, ), (options.worksize, ), block2[0], block2[1], block2[2], state[0], state[1], state[2], state[3], state[4], state[5], state[6], state[7], state2[1], state2[2], state2[3], state2[5], state2[6], state2[7], pack('I', base), output_buf)
cl.enqueue_read_buffer(queue, output_buf, output).wait()
+ #target = target[6]
+ #H.x = output[0] ; H.y = output[1]
+ #G.x = output[2] ; G.y = output[3]
+ #nonce.x = output[4] ; nonce.y = output[5]
+ if (output[0] == 0 and bytereverse(output[2])<=target[6]):
+ success = 1
+ nonce = output[4]
+
+ if (output[1] == 0 and bytereverse(output[3])<=target[6]):
+ success = 1
+ nonce = output[5]
+
kernelTime = time() - kernelStart
if (kernelTime < lower):
@@ -142,4 +183,4 @@
if (time() - rate > options.rate):
rate = time()
- sysWrite('%s khash/s', int((base / (time() - start)) / 500))
\ No newline at end of file
+ sysWrite('%s : %s khash/s', (datetime.now().strftime("%d/%m/%Y %H:%M:%S"),int((base / (time() - start)) / 500)))
\ No newline at end of file
--- F:/Desktop/m0mchil-poclbm-cf33815/btc_miner.cl Fri Oct 15 08:48:48 2010
+++ F:/Desktop/m0mchil-poclbm-cf33815/btc_miner_nv1.cl Mon Oct 18 22:01:08 2010
@@ -1,5 +1,4 @@
-#define bytereverse(x) ( ((x) << 24) | (((x) << 8) & 0x00ff0000) | (((x) >> 8) & 0x0000ff00) | ((x) >> 24) )
-#define rot(x, y) rotate(x, (uint)y)
+#define rot(x, y) rotate(x, (uint2)y)
#define R(x) (work[x] = (rot(work[x-2],15)^rot(work[x-2],13)^((work[x-2])>>10)) + work[x-7] + (rot(work[x-15],25)^rot(work[x-15],14)^((work[x-15])>>3)) + work[x-16])
#define sharound(a,b,c,d,e,f,g,h,x,K) {h=(h+(rot(e, 26)^rot(e, 21)^rot(e, 7))+(g^(e&(f^g)))+K+x); t1=(rot(a, 30)^rot(a, 19)^rot(a, 10))+((a&b)|(c&(a|b))); d+=h; h+=t1;}
@@ -8,7 +7,6 @@
const uint state4, const uint state5, const uint state6, const uint state7,
const uint B1, const uint C1, const uint D1,
const uint F1, const uint G1, const uint H1,
- const uint target,
const uint base,
__global uint * output)
{
@@ -209,17 +207,12 @@
G+=0x1f83d9ab;
H+=0x5be0cd19;
- if((H.x==0) && (bytereverse(G.x)<=target))
- {
- output[0] = 1;
- output[1] = nonce.x;
- }
-
- if((H.y==0) && (bytereverse(G.y)<=target))
- {
- output[0] = 1;
- output[1] = nonce.y;
- }
+ output[0] = H.x;
+ output[1] = H.y;
+ output[2] = G.x;
+ output[3] = G.y;
+ output[4] = nonce.x;
+ output[5] = nonce.y;
}
// end
\ No newline at end of file
Version 2 : function that make the comparison between inversed and non inversed word
--- F:/Desktop/m0mchil-poclbm-cf33815/poclbm.py Fri Oct 15 08:48:48 2010
+++ F:/Desktop/m0mchil-poclbm-cf33815/poclbm_nv2.py Mon Oct 18 23:53:18 2010
@@ -50,16 +50,40 @@
context = cl.Context([devices[options.device]], None, None)
else:
print 'No device specified, you may use -d to specify one of the following
'
+ # create_some_context should ask for the platform and device to use
context = cl.create_some_context()
+
+device = context.devices[0]
+print "==============================================================="
+print "Platform name:", platform.name
+print "Platform profile:", platform.profile
+print "Platform vendor:", platform.vendor
+print "Platform version:", platform.version
+print "---------------------------------------------------------------"
+print "Device name:", device.name
+print "Device type:", cl.device_type.to_string(device.type)
+print "Device memory: ", device.global_mem_size//1024//1024, 'MB'
+print "Device max clock speed:", device.max_clock_frequency, 'MHz'
+print "Device compute units:", device.max_compute_units
+print "==============================================================="
+
queue = cl.CommandQueue(context)
-kernelFile = open('btc_miner.cl', 'r')
+kernelFile = open('btc_miner_nv2.cl', 'r')
miner = cl.Program(context, kernelFile.read()).build()
kernelFile.close()
+print "==============================================================="
+print "build OK"
+print "==============================================================="
+
if (options.worksize == -1):
options.worksize = miner.search.get_work_group_info(cl.kernel_work_group_info.WORK_GROUP_SIZE, context.devices[0])
+print "==============================================================="
+print "worksize = ", options.worksize
+print "==============================================================="
+
frames = options.frames
frame = float(1)/frames
window = frame/30
@@ -142,4 +166,4 @@
if (time() - rate > options.rate):
rate = time()
- sysWrite('%s khash/s', int((base / (time() - start)) / 500))
\ No newline at end of file
+ sysWrite('%s : %s khash/s', (datetime.now().strftime("%d/%m/%Y %H:%M:%S"),int((base / (time() - start)) / 500)))
\ No newline at end of file
--- F:/Desktop/m0mchil-poclbm-cf33815/btc_miner.cl Fri Oct 15 08:48:48 2010
+++ F:/Desktop/m0mchil-poclbm-cf33815/btc_miner_nv2.cl Mon Oct 18 23:33:50 2010
@@ -1,8 +1,88 @@
-#define bytereverse(x) ( ((x) << 24) | (((x) << 8) & 0x00ff0000) | (((x) >> 8) & 0x0000ff00) | ((x) >> 24) )
-#define rot(x, y) rotate(x, (uint)y)
+#define rot(x, y) rotate(x, (uint2)y)
#define R(x) (work[x] = (rot(work[x-2],15)^rot(work[x-2],13)^((work[x-2])>>10)) + work[x-7] + (rot(work[x-15],25)^rot(work[x-15],14)^((work[x-15])>>3)) + work[x-16])
#define sharound(a,b,c,d,e,f,g,h,x,K) {h=(h+(rot(e, 26)^rot(e, 21)^rot(e, 7))+(g^(e&(f^g)))+K+x); t1=(rot(a, 30)^rot(a, 19)^rot(a, 10))+((a&b)|(c&(a|b))); d+=h; h+=t1;}
+// test 1 : recursive function, build error :
+// Error: Code selection failed to select: 0x504c1d8: i8 = NVPTXISD::MoveParam 0x504c150
+/*
+bool recurseReversedCompare(uchar *reversed, uchar *normal, uchar ir, uchar in)
+{
+ if(reversed[ir] > normal[in])
+ {
+ return false;
+ }
+ else if(reversed[ir] < normal[in])
+ {
+ return true;
+ }
+ else if(in > 0)
+ {
+ return recurseReversedCompare(reversed, normal, ir+1, in-1);
+ }
+ else
+ {
+ return true;
+ }
+}
+
+bool reversedSmallerThan(uint reversed, uint normal)
+{
+ uchar *r = (uchar *)&reversed;
+ uchar *n = (uchar *)&normal;
+
+ return recurseReversedCompare(r, n, 0, 3);
+}
+*/
+
+// test 2 : non recursive
+bool reversedSmallerThan(uint reversed, uint normal)
+{
+ uchar *r = (uchar *)&reversed;
+ uchar *n = (uchar *)&normal;
+
+ if(r[0] > n[3])
+ {
+ return false;
+ }
+ else if(r[0] < n[3])
+ {
+ return true;
+ }
+ else
+ {
+ if(r[1] > n[2])
+ {
+ return false;
+ }
+ else if(r[1] < n[2])
+ {
+ return true;
+ }
+ else
+ {
+ if(r[2] > n[1])
+ {
+ return false;
+ }
+ else if(r[2] < n[1])
+ {
+ return true;
+ }
+ else
+ {
+ if(r[3] > n[0])
+ {
+ return false;
+ }
+ else
+ {
+ return true;
+ }
+ }
+ }
+ }
+}
+
__kernel void search( const uint block0, const uint block1, const uint block2,
const uint state0, const uint state1, const uint state2, const uint state3,
const uint state4, const uint state5, const uint state6, const uint state7,
@@ -209,13 +289,13 @@
G+=0x1f83d9ab;
H+=0x5be0cd19;
- if((H.x==0) && (bytereverse(G.x)<=target))
+ if((H.x==0) && reversedSmallerThan(G.x, target)) // (bytereverse(G.x)<=target)
{
output[0] = 1;
output[1] = nonce.x;
}
- if((H.y==0) && (bytereverse(G.y)<=target))
+ if((H.y==0) && reversedSmallerThan(G.y, target)) // (bytereverse(G.y)<=target)
{
output[0] = 1;
output[1] = nonce.y;
There are probably some optimisations to make