@@ -937,7 +937,7 @@ void init_multiplier_adder(nnode_t *node, nnode_t *parent, int a, int b)
937937 *-----------------------------------------------------------------------*/
938938void split_multiplier (nnode_t *node, int a0, int b0, int a1, int b1, netlist_t *netlist)
939939{
940- nnode_t *a0b0, *a0b1, *a1b0, *a1b1, *addsmall, *addbig;
940+ nnode_t *a0b0, *a0b1, *a1b0, *a1b1, *addsmall, *addsmall2, * addbig;
941941 int size;
942942
943943 /* Check for a legitimate split */
@@ -976,50 +976,151 @@ void split_multiplier(nnode_t *node, int a0, int b0, int a1, int b1, netlist_t *
976976 init_split_multiplier (node, a1b0, a0, a1, 0 , b0, a1b1, a0b0);
977977 mult_list = insert_in_vptr_list (mult_list, a1b0);
978978
979- /* New node for the initial add */
980- addsmall = allocate_nnode (node->loc );
981- addsmall->name = (char *)vtr::malloc (strlen (node->name ) + 6 );
982- strcpy (addsmall->name , node->name );
983- strcat (addsmall->name , " -add0" );
984- // this addition will have a carry out in the worst case, add to input pins and connect then to gnd
985- init_multiplier_adder (addsmall, a1b0, a1b0->num_output_pins + 1 , a0b1->num_output_pins + 1 );
986-
987- /* New node for the BIG add */
988- addbig = allocate_nnode (node->loc );
989- addbig->name = (char *)vtr::malloc (strlen (node->name ) + 6 );
990- strcpy (addbig->name , node->name );
991- strcat (addbig->name , " -add1" );
992- init_multiplier_adder (addbig, addsmall, addsmall->num_output_pins , a0b0->num_output_pins - b0 + a1b1->num_output_pins );
993-
994- // connect inputs to port a of addsmall
995- for (int i = 0 ; i < a1b0->num_output_pins ; i++)
996- connect_nodes (a1b0, i, addsmall, i);
997- add_input_pin_to_node (addsmall, get_zero_pin (netlist), a1b0->num_output_pins );
998- // connect inputs to port b of addsmall
999- for (int i = 0 ; i < a0b1->num_output_pins ; i++)
1000- connect_nodes (a0b1, i, addsmall, i + addsmall->input_port_sizes [0 ]);
1001- add_input_pin_to_node (addsmall, get_zero_pin (netlist), a0b1->num_output_pins + addsmall->input_port_sizes [0 ]);
1002-
1003- // connect inputs to port a of addbig
1004- size = addsmall->num_output_pins ;
1005- for (int i = 0 ; i < size; i++)
1006- connect_nodes (addsmall, i, addbig, i);
1007-
1008- // connect inputs to port b of addbig
1009- for (int i = b0; i < a0b0->output_port_sizes [0 ]; i++)
1010- connect_nodes (a0b0, i, addbig, i - b0 + size);
1011- size = size + a0b0->output_port_sizes [0 ] - b0;
1012- for (int i = 0 ; i < a1b1->output_port_sizes [0 ]; i++)
1013- connect_nodes (a1b1, i, addbig, i + size);
1014-
1015- // remap the multiplier outputs coming directly from a0b0
1016- for (int i = 0 ; i < b0; i++) {
1017- remap_pin_to_new_node (node->output_pins [i], a0b0, i);
1018- }
979+ // using the balenced addition method only works if a0 and b0 are the same size
980+ // (i.e. if the input ports on the hardware multiplier are equal)
981+ if (b0 == a0) {
982+ /* New node for the initial add */
983+ addsmall = allocate_nnode (node->loc );
984+ addsmall->name = (char *)vtr::malloc (strlen (node->name ) + 6 );
985+ strcpy (addsmall->name , node->name );
986+ strcat (addsmall->name , " -add0" );
987+ // this addition will have a carry out in the worst case, add to input pins and connect then to gnd
988+ init_multiplier_adder (addsmall, a1b0, a1b0->num_output_pins + 1 , a0b1->num_output_pins + 1 );
989+
990+ // connect inputs to port a of addsmall
991+ for (int i = 0 ; i < a1b0->num_output_pins ; i++)
992+ connect_nodes (a1b0, i, addsmall, i);
993+
994+ add_input_pin_to_node (addsmall, get_zero_pin (netlist), a1b0->num_output_pins );
995+ // connect inputs to port b of addsmall
996+ for (int i = 0 ; i < a0b1->num_output_pins ; i++)
997+ connect_nodes (a0b1, i, addsmall, i + addsmall->input_port_sizes [0 ]);
998+ add_input_pin_to_node (addsmall, get_zero_pin (netlist), a0b1->num_output_pins + addsmall->input_port_sizes [0 ]);
999+
1000+ /* New node for the BIG add */
1001+ addbig = allocate_nnode (node->loc );
1002+ addbig->name = (char *)vtr::malloc (strlen (node->name ) + 6 );
1003+ strcpy (addbig->name , node->name );
1004+ strcat (addbig->name , " -add1" );
1005+ init_multiplier_adder (addbig, addsmall, addsmall->num_output_pins , a0b0->num_output_pins - b0 + a1b1->num_output_pins );
1006+
1007+ // connect inputs to port a of addbig
1008+ size = addsmall->num_output_pins ;
1009+ for (int i = 0 ; i < size; i++)
1010+ connect_nodes (addsmall, i, addbig, i);
1011+
1012+ // connect inputs to port b of addbig
1013+ for (int i = b0; i < a0b0->output_port_sizes [0 ]; i++)
1014+ connect_nodes (a0b0, i, addbig, i - b0 + size);
1015+ size = size + a0b0->output_port_sizes [0 ] - b0;
1016+ for (int i = 0 ; i < a1b1->output_port_sizes [0 ]; i++)
1017+ connect_nodes (a1b1, i, addbig, i + size);
1018+
1019+ // remap the multiplier outputs coming directly from a0b0
1020+ for (int i = 0 ; i < b0; i++) {
1021+ remap_pin_to_new_node (node->output_pins [i], a0b0, i);
1022+ }
1023+
1024+ // remap the multiplier outputs coming from addbig
1025+ for (int i = 0 ; i < addbig->num_output_pins ; i++) {
1026+ remap_pin_to_new_node (node->output_pins [i + b0], addbig, i);
1027+ }
1028+ } else {
1029+ /* Expounding upon the description for the method in this function.
1030+ if we have two numbers A and B and we have a hardware multiplier of size a0xb0,
1031+ we can split them into two parts:
1032+ A = A1 << a0 + A0
1033+ B = B1 << b0 + B0
1034+ where A1 and B1 are the high bits of A and B, and A0 and B0 are the low bits.
1035+ Note that len(A0) = a0 and len(B0) = b0 by definition.
1036+ The multiplication of A and B can be expressed as:
1037+ A * B = (A1 << a0 + A0) * (B1 << b0 + B0)
1038+ = {A1 * B1 << (a0 + b0)} + {(A1 * B0) << a0 + (A0 * B1) << b0} + {A0 * B0}
1039+ we define split the editions up like so:
1040+ addsmall = (A1 * B0) << a0 + (A0 * B1) << b0 // can have carry
1041+ addsmall2 = (A1 * B1 << (a0 + b0)) + (A0 * B0) // Will not have carry
1042+ addbig = addsmall + addsmall2
1043+ This is a slightly modified version of the Karatsuba algorithm.
1044+ */
1045+ // ///////////// Addsmall /////////////////////
1046+ addsmall = allocate_nnode (node->loc );
1047+ addsmall->name = (char *)vtr::malloc (strlen (node->name ) + 6 );
1048+ strcpy (addsmall->name , node->name );
1049+ strcat (addsmall->name , " -add0" );
1050+ init_multiplier_adder (addsmall, a1b0, a1b0->num_output_pins + a0 + 1 , a0b1->num_output_pins + b0 + 1 );
1051+
1052+ // The first a0 pins of addsmall input connecting to a1b0 are connected to zero
1053+ for (int i = 0 ; i < a0; i++) {
1054+ add_input_pin_to_node (addsmall, get_zero_pin (netlist), i);
1055+ }
1056+
1057+ // connect inputs to port a of addsmall
1058+ for (int i = 0 ; i < a1b0->num_output_pins ; i++) {
1059+ connect_nodes (a1b0, i, addsmall, i + a0);
1060+ }
1061+
1062+ // add zero pin for carry
1063+ add_input_pin_to_node (addsmall, get_zero_pin (netlist), a1b0->num_output_pins + a0);
1064+
1065+ // The first b0 pins of addsmall input connecting to a0b1 are connected to zero
1066+ for (int i = 0 ; i < b0; i++) {
1067+ add_input_pin_to_node (addsmall, get_zero_pin (netlist), i + addsmall->input_port_sizes [0 ]);
1068+ }
1069+
1070+ // connect inputs to port b of addsmall
1071+ for (int i = 0 ; i < a0b1->num_output_pins ; i++) {
1072+ connect_nodes (a0b1, i, addsmall, i + addsmall->input_port_sizes [0 ] + b0);
1073+ }
1074+
1075+ // add zero pin for carry
1076+ add_input_pin_to_node (addsmall, get_zero_pin (netlist), a0b1->num_output_pins + addsmall->input_port_sizes [0 ] + b0);
1077+
1078+ // ///////////// Addsmall2 /////////////////////
1079+ addsmall2 = allocate_nnode (node->loc );
1080+ addsmall2->name = (char *)vtr::malloc (strlen (node->name ) + 6 );
1081+ strcpy (addsmall2->name , node->name );
1082+ strcat (addsmall2->name , " -add1" );
1083+ init_multiplier_adder (addsmall2, a1b1, a1b1->num_output_pins + a0 + b0, a0b0->num_output_pins );
10191084
1020- // remap the multiplier outputs coming from addbig
1021- for (int i = 0 ; i < addbig->num_output_pins ; i++) {
1022- remap_pin_to_new_node (node->output_pins [i + b0], addbig, i);
1085+ // connect first a0+ b0 pins of addsmall2 to zero
1086+ for (int i = 0 ; i < a0 + b0; i++) {
1087+ add_input_pin_to_node (addsmall2, get_zero_pin (netlist), i);
1088+ }
1089+
1090+ // connect inputs to port a of addsmall2
1091+ for (int i = 0 ; i < a1b1->num_output_pins ; i++) {
1092+ connect_nodes (a1b1, i, addsmall2, i + a0 + b0);
1093+ }
1094+
1095+ // connect inputs to port b of addsmall2
1096+ for (int i = 0 ; i < a0b0->output_port_sizes [0 ]; i++) {
1097+ connect_nodes (a0b0, i, addsmall2, i + addsmall2->input_port_sizes [0 ]);
1098+ }
1099+
1100+ // ///////////// Addbig /////////////////////
1101+ addbig = allocate_nnode (node->loc );
1102+ addbig->name = (char *)vtr::malloc (strlen (node->name ) + 6 );
1103+ strcpy (addbig->name , node->name );
1104+ strcat (addbig->name , " -add2" );
1105+ init_multiplier_adder (addbig, addsmall, addsmall->num_output_pins , addsmall2->num_output_pins );
1106+ // Here the final addition can have a carry out in the worst case, however,
1107+ // our final product will always only be the length of the longest input port so regardless of the carry the
1108+ // final adds carry will always drop out.
1109+
1110+ // connect inputs to port a of addbig
1111+ for (int i = 0 ; i < addsmall->num_output_pins ; i++) {
1112+ connect_nodes (addsmall, i, addbig, i);
1113+ }
1114+
1115+ // connect inputs to port b of addbig
1116+ for (int i = 0 ; i < addsmall2->num_output_pins ; i++) {
1117+ connect_nodes (addsmall2, i, addbig, i + addbig->input_port_sizes [0 ]);
1118+ }
1119+
1120+ // remap the multiplier outputs coming directly from a0b0
1121+ for (int i = 0 ; i < addbig->num_output_pins ; i++) {
1122+ remap_pin_to_new_node (node->output_pins [i], addbig, i);
1123+ }
10231124 }
10241125
10251126 // CLEAN UP
@@ -1060,7 +1161,6 @@ void split_multiplier_a(nnode_t *node, int a0, int a1, int b)
10601161 strcat (a0b->name , " -0" );
10611162 init_split_multiplier (node, a0b, 0 , a0, 0 , b, nullptr , nullptr );
10621163 mult_list = insert_in_vptr_list (mult_list, a0b);
1063-
10641164 /* New node for a1b multiply */
10651165 a1b = allocate_nnode (node->loc );
10661166 a1b->name = (char *)vtr::malloc (strlen (node->name ) + 3 );
@@ -1184,7 +1284,6 @@ void pad_multiplier(nnode_t *node, netlist_t *netlist)
11841284
11851285 oassert (node->type == MULTIPLY);
11861286 oassert (hard_multipliers != NULL );
1187-
11881287 sizea = node->input_port_sizes [0 ];
11891288 sizeb = node->input_port_sizes [1 ];
11901289 sizeout = node->output_port_sizes [0 ];
@@ -1199,6 +1298,13 @@ void pad_multiplier(nnode_t *node, netlist_t *netlist)
11991298 }
12001299 diffa = ina - sizea;
12011300 diffb = inb - sizeb;
1301+ // input multiplier size on middle range of unequal Hard Block size(ex; mul_size>18 && mul_size<25)
1302+ if (diffb < 0 ) {
1303+ std::swap (ina, inb);
1304+ diffa = ina - sizea;
1305+ diffb = inb - sizeb;
1306+ }
1307+
12021308 diffout = hard_multipliers->outputs ->size - sizeout;
12031309
12041310 if (configuration.split_hard_multiplier == 1 ) {
@@ -1281,11 +1387,9 @@ void iterate_multipliers(netlist_t *netlist)
12811387 int mula, mulb;
12821388 int a0, a1, b0, b1;
12831389 nnode_t *node;
1284-
12851390 /* Can only perform the optimisation if hard multipliers exist! */
12861391 if (hard_multipliers == NULL )
12871392 return ;
1288-
12891393 sizea = hard_multipliers->inputs ->size ;
12901394 sizeb = hard_multipliers->inputs ->next ->size ;
12911395 if (sizea < sizeb) {
@@ -1313,7 +1417,6 @@ void iterate_multipliers(netlist_t *netlist)
13131417 sizea = sizeb;
13141418 sizeb = swap;
13151419 }
1316-
13171420 /* Do I need to split the multiplier on both inputs? */
13181421 if ((mula > sizea) && (mulb > sizeb)) {
13191422 a0 = sizea;
@@ -1890,4 +1993,4 @@ void free_multipliers()
18901993
18911994 hard_multipliers->instances = NULL ;
18921995 }
1893- }
1996+ }
0 commit comments