001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    
018    package org.apache.commons.math.linear;
019    
020    import java.io.Serializable;
021    
022    import org.apache.commons.math.Field;
023    import org.apache.commons.math.FieldElement;
024    import org.apache.commons.math.MathRuntimeException;
025    
026    /**
027     * Cache-friendly implementation of FieldMatrix using a flat arrays to store
028     * square blocks of the matrix.
029     * <p>
030     * This implementation is specially designed to be cache-friendly. Square blocks are
031     * stored as small arrays and allow efficient traversal of data both in row major direction
032     * and columns major direction, one block at a time. This greatly increases performances
033     * for algorithms that use crossed directions loops like multiplication or transposition.
034     * </p>
035     * <p>
036     * The size of square blocks is a static parameter. It may be tuned according to the cache
037     * size of the target computer processor. As a rule of thumbs, it should be the largest
038     * value that allows three blocks to be simultaneously cached (this is necessary for example
039     * for matrix multiplication). The default value is to use 36x36 blocks.
040     * </p>
041     * <p>
042     * The regular blocks represent {@link #BLOCK_SIZE} x {@link #BLOCK_SIZE} squares. Blocks
043     * at right hand side and bottom side which may be smaller to fit matrix dimensions. The square
044     * blocks are flattened in row major order in single dimension arrays which are therefore
045     * {@link #BLOCK_SIZE}<sup>2</sup> elements long for regular blocks. The blocks are themselves
046     * organized in row major order.
047     * </p>
048     * <p>
049     * As an example, for a block size of 36x36, a 100x60 matrix would be stored in 6 blocks.
050     * Block 0 would be a Field[1296] array holding the upper left 36x36 square, block 1 would be
051     * a Field[1296] array holding the upper center 36x36 square, block 2 would be a Field[1008]
052     * array holding the upper right 36x28 rectangle, block 3 would be a Field[864] array holding
053     * the lower left 24x36 rectangle, block 4 would be a Field[864] array holding the lower center
054     * 24x36 rectangle and block 5 would be a Field[672] array holding the lower right 24x28
055     * rectangle.
056     * </p>
057     * <p>
058     * The layout complexity overhead versus simple mapping of matrices to java
059     * arrays is negligible for small matrices (about 1%). The gain from cache efficiency leads
060     * to up to 3-fold improvements for matrices of moderate to large size.
061     * </p>
062     * @param <T> the type of the field elements
063     * @version $Revision: 825919 $ $Date: 2009-10-16 10:51:55 -0400 (Fri, 16 Oct 2009) $
064     * @since 2.0
065     */
066    public class BlockFieldMatrix<T extends FieldElement<T>> extends AbstractFieldMatrix<T> implements Serializable {
067    
068        /** Block size. */
069        public static final int BLOCK_SIZE = 36;
070    
071        /** Serializable version identifier */
072        private static final long serialVersionUID = -4602336630143123183L;
073    
074        /** Blocks of matrix entries. */
075        private final T blocks[][];
076    
077        /** Number of rows of the matrix. */
078        private final int rows;
079    
080        /** Number of columns of the matrix. */
081        private final int columns;
082    
083        /** Number of block rows of the matrix. */
084        private final int blockRows;
085    
086        /** Number of block columns of the matrix. */
087        private final int blockColumns;
088    
089        /**
090         * Create a new matrix with the supplied row and column dimensions.
091         *
092         * @param field field to which the elements belong
093         * @param rows  the number of rows in the new matrix
094         * @param columns  the number of columns in the new matrix
095         * @throws IllegalArgumentException if row or column dimension is not
096         *  positive
097         */
098        public BlockFieldMatrix(final Field<T> field, final int rows, final int columns)
099            throws IllegalArgumentException {
100    
101            super(field, rows, columns);
102            this.rows    = rows;
103            this.columns = columns;
104    
105            // number of blocks
106            blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
107            blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
108    
109            // allocate storage blocks, taking care of smaller ones at right and bottom
110            blocks = createBlocksLayout(field, rows, columns);
111    
112        }
113    
114        /**
115         * Create a new dense matrix copying entries from raw layout data.
116         * <p>The input array <em>must</em> already be in raw layout.</p>
117         * <p>Calling this constructor is equivalent to call:
118         * <pre>matrix = new BlockFieldMatrix<T>(getField(), rawData.length, rawData[0].length,
119         *                                   toBlocksLayout(rawData), false);</pre>
120         * </p>
121         * @param rawData data for new matrix, in raw layout
122         *
123         * @exception IllegalArgumentException if <code>blockData</code> shape is
124         * inconsistent with block layout
125         * @see #BlockFieldMatrix(int, int, FieldElement[][], boolean)
126         */
127        public BlockFieldMatrix(final T[][] rawData)
128            throws IllegalArgumentException {
129            this(rawData.length, rawData[0].length, toBlocksLayout(rawData), false);
130        }
131    
132        /**
133         * Create a new dense matrix copying entries from block layout data.
134         * <p>The input array <em>must</em> already be in blocks layout.</p>
135         * @param rows  the number of rows in the new matrix
136         * @param columns  the number of columns in the new matrix
137         * @param blockData data for new matrix
138         * @param copyArray if true, the input array will be copied, otherwise
139         * it will be referenced
140         *
141         * @exception IllegalArgumentException if <code>blockData</code> shape is
142         * inconsistent with block layout
143         * @see #createBlocksLayout(Field, int, int)
144         * @see #toBlocksLayout(FieldElement[][])
145         * @see #BlockFieldMatrix(FieldElement[][])
146         */
147        public BlockFieldMatrix(final int rows, final int columns,
148                                final T[][] blockData, final boolean copyArray)
149            throws IllegalArgumentException {
150    
151            super(extractField(blockData), rows, columns);
152            this.rows    = rows;
153            this.columns = columns;
154    
155            // number of blocks
156            blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
157            blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
158    
159            if (copyArray) {
160                // allocate storage blocks, taking care of smaller ones at right and bottom
161                blocks = buildArray(getField(), blockRows * blockColumns, -1);
162            } else {
163                // reference existing array
164                blocks = blockData;
165            }
166    
167            int index = 0;
168            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
169                final int iHeight = blockHeight(iBlock);
170                for (int jBlock = 0; jBlock < blockColumns; ++jBlock, ++index) {
171                    if (blockData[index].length != iHeight * blockWidth(jBlock)) {
172                        throw MathRuntimeException.createIllegalArgumentException(
173                                "wrong array shape (block length = {0}, expected {1})",
174                                blockData[index].length, iHeight * blockWidth(jBlock));
175                    }
176                    if (copyArray) {
177                        blocks[index] = blockData[index].clone();
178                    }
179                }
180            }
181    
182        }
183    
184        /**
185         * Convert a data array from raw layout to blocks layout.
186         * <p>
187         * Raw layout is the straightforward layout where element at row i and
188         * column j is in array element <code>rawData[i][j]</code>. Blocks layout
189         * is the layout used in {@link BlockFieldMatrix} instances, where the matrix
190         * is split in square blocks (except at right and bottom side where blocks may
191         * be rectangular to fit matrix size) and each block is stored in a flattened
192         * one-dimensional array.
193         * </p>
194         * <p>
195         * This method creates an array in blocks layout from an input array in raw layout.
196         * It can be used to provide the array argument of the {@link
197         * #BlockFieldMatrix(int, int, FieldElement[][], boolean)}
198         * constructor.
199         * </p>
200         * @param <T> the type of the field elements
201         * @param rawData data array in raw layout
202         * @return a new data array containing the same entries but in blocks layout
203         * @exception IllegalArgumentException if <code>rawData</code> is not rectangular
204         *  (not all rows have the same length)
205         * @see #createBlocksLayout(Field, int, int)
206         * @see #BlockFieldMatrix(int, int, FieldElement[][], boolean)
207         */
208        public static <T extends FieldElement<T>> T[][] toBlocksLayout(final T[][] rawData)
209            throws IllegalArgumentException {
210    
211            final int rows         = rawData.length;
212            final int columns      = rawData[0].length;
213            final int blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
214            final int blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
215    
216            // safety checks
217            for (int i = 0; i < rawData.length; ++i) {
218                final int length = rawData[i].length;
219                if (length != columns) {
220                    throw MathRuntimeException.createIllegalArgumentException(
221                            "some rows have length {0} while others have length {1}",
222                            columns, length);
223                }
224            }
225    
226            // convert array
227            final Field<T> field = extractField(rawData);
228            final T[][] blocks = buildArray(field, blockRows * blockColumns, -1);
229            int blockIndex = 0;
230            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
231                final int pStart  = iBlock * BLOCK_SIZE;
232                final int pEnd    = Math.min(pStart + BLOCK_SIZE, rows);
233                final int iHeight = pEnd - pStart;
234                for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
235                    final int qStart = jBlock * BLOCK_SIZE;
236                    final int qEnd   = Math.min(qStart + BLOCK_SIZE, columns);
237                    final int jWidth = qEnd - qStart;
238    
239                    // allocate new block
240                    final T[] block = buildArray(field, iHeight * jWidth);
241                    blocks[blockIndex] = block;
242    
243                    // copy data
244                    int index = 0;
245                    for (int p = pStart; p < pEnd; ++p) {
246                        System.arraycopy(rawData[p], qStart, block, index, jWidth);
247                        index += jWidth;
248                    }
249    
250                    ++blockIndex;
251    
252                }
253            }
254    
255            return blocks;
256    
257        }
258    
259        /**
260         * Create a data array in blocks layout.
261         * <p>
262         * This method can be used to create the array argument of the {@link
263         * #BlockFieldMatrix(int, int, FieldElement[][], boolean)}
264         * constructor.
265         * </p>
266         * @param <T> the type of the field elements
267         * @param field field to which the elements belong
268         * @param rows  the number of rows in the new matrix
269         * @param columns  the number of columns in the new matrix
270         * @return a new data array in blocks layout
271         * @see #toBlocksLayout(FieldElement[][])
272         * @see #BlockFieldMatrix(int, int, FieldElement[][], boolean)
273         */
274        public static <T extends FieldElement<T>> T[][] createBlocksLayout(final Field<T> field,
275                                                                           final int rows, final int columns) {
276    
277            final int blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
278            final int blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
279    
280            final T[][] blocks = buildArray(field, blockRows * blockColumns, -1);
281            int blockIndex = 0;
282            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
283                final int pStart  = iBlock * BLOCK_SIZE;
284                final int pEnd    = Math.min(pStart + BLOCK_SIZE, rows);
285                final int iHeight = pEnd - pStart;
286                for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
287                    final int qStart = jBlock * BLOCK_SIZE;
288                    final int qEnd   = Math.min(qStart + BLOCK_SIZE, columns);
289                    final int jWidth = qEnd - qStart;
290                    blocks[blockIndex] = buildArray(field, iHeight * jWidth);
291                    ++blockIndex;
292                }
293            }
294    
295            return blocks;
296    
297        }
298    
299        /** {@inheritDoc} */
300        @Override
301        public FieldMatrix<T> createMatrix(final int rowDimension, final int columnDimension)
302            throws IllegalArgumentException {
303            return new BlockFieldMatrix<T>(getField(), rowDimension, columnDimension);
304        }
305    
306        /** {@inheritDoc} */
307        @Override
308        public FieldMatrix<T> copy() {
309    
310            // create an empty matrix
311            BlockFieldMatrix<T> copied = new BlockFieldMatrix<T>(getField(), rows, columns);
312    
313            // copy the blocks
314            for (int i = 0; i < blocks.length; ++i) {
315                System.arraycopy(blocks[i], 0, copied.blocks[i], 0, blocks[i].length);
316            }
317    
318            return copied;
319    
320        }
321    
322        /** {@inheritDoc} */
323        @Override
324        public FieldMatrix<T> add(final FieldMatrix<T> m)
325            throws IllegalArgumentException {
326            try {
327                return add((BlockFieldMatrix<T>) m);
328            } catch (ClassCastException cce) {
329    
330                // safety check
331                checkAdditionCompatible(m);
332    
333                final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
334    
335                // perform addition block-wise, to ensure good cache behavior
336                int blockIndex = 0;
337                for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
338                    for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
339    
340                        // perform addition on the current block
341                        final T[] outBlock = out.blocks[blockIndex];
342                        final T[] tBlock   = blocks[blockIndex];
343                        final int      pStart   = iBlock * BLOCK_SIZE;
344                        final int      pEnd     = Math.min(pStart + BLOCK_SIZE, rows);
345                        final int      qStart   = jBlock * BLOCK_SIZE;
346                        final int      qEnd     = Math.min(qStart + BLOCK_SIZE, columns);
347                        int k = 0;
348                        for (int p = pStart; p < pEnd; ++p) {
349                            for (int q = qStart; q < qEnd; ++q) {
350                                outBlock[k] = tBlock[k].add(m.getEntry(p, q));
351                                ++k;
352                            }
353                        }
354    
355                        // go to next block
356                        ++blockIndex;
357    
358                    }
359                }
360    
361                return out;
362    
363            }
364        }
365    
366        /**
367         * Compute the sum of this and <code>m</code>.
368         *
369         * @param m    matrix to be added
370         * @return     this + m
371         * @throws  IllegalArgumentException if m is not the same size as this
372         */
373        public BlockFieldMatrix<T> add(final BlockFieldMatrix<T> m)
374            throws IllegalArgumentException {
375    
376            // safety check
377            checkAdditionCompatible(m);
378    
379            final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
380    
381            // perform addition block-wise, to ensure good cache behavior
382            for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
383                final T[] outBlock = out.blocks[blockIndex];
384                final T[] tBlock   = blocks[blockIndex];
385                final T[] mBlock   = m.blocks[blockIndex];
386                for (int k = 0; k < outBlock.length; ++k) {
387                    outBlock[k] = tBlock[k].add(mBlock[k]);
388                }
389            }
390    
391            return out;
392    
393        }
394    
395        /** {@inheritDoc} */
396        @Override
397        public FieldMatrix<T> subtract(final FieldMatrix<T> m)
398            throws IllegalArgumentException {
399            try {
400                return subtract((BlockFieldMatrix<T>) m);
401            } catch (ClassCastException cce) {
402    
403                // safety check
404                checkSubtractionCompatible(m);
405    
406                final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
407    
408                // perform subtraction block-wise, to ensure good cache behavior
409                int blockIndex = 0;
410                for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
411                    for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
412    
413                        // perform subtraction on the current block
414                        final T[] outBlock = out.blocks[blockIndex];
415                        final T[] tBlock   = blocks[blockIndex];
416                        final int      pStart   = iBlock * BLOCK_SIZE;
417                        final int      pEnd     = Math.min(pStart + BLOCK_SIZE, rows);
418                        final int      qStart   = jBlock * BLOCK_SIZE;
419                        final int      qEnd     = Math.min(qStart + BLOCK_SIZE, columns);
420                        int k = 0;
421                        for (int p = pStart; p < pEnd; ++p) {
422                            for (int q = qStart; q < qEnd; ++q) {
423                                outBlock[k] = tBlock[k].subtract(m.getEntry(p, q));
424                                ++k;
425                            }
426                        }
427    
428                        // go to next block
429                        ++blockIndex;
430    
431                    }
432                }
433    
434                return out;
435    
436            }
437        }
438    
439        /**
440         * Compute this minus <code>m</code>.
441         *
442         * @param m    matrix to be subtracted
443         * @return     this - m
444         * @throws  IllegalArgumentException if m is not the same size as this
445         */
446        public BlockFieldMatrix<T> subtract(final BlockFieldMatrix<T> m)
447            throws IllegalArgumentException {
448    
449            // safety check
450            checkSubtractionCompatible(m);
451    
452            final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
453    
454            // perform subtraction block-wise, to ensure good cache behavior
455            for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
456                final T[] outBlock = out.blocks[blockIndex];
457                final T[] tBlock   = blocks[blockIndex];
458                final T[] mBlock   = m.blocks[blockIndex];
459                for (int k = 0; k < outBlock.length; ++k) {
460                    outBlock[k] = tBlock[k].subtract(mBlock[k]);
461                }
462            }
463    
464            return out;
465    
466        }
467    
468        /** {@inheritDoc} */
469        @Override
470        public FieldMatrix<T> scalarAdd(final T d)
471            throws IllegalArgumentException {
472    
473            final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
474    
475            // perform subtraction block-wise, to ensure good cache behavior
476            for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
477                final T[] outBlock = out.blocks[blockIndex];
478                final T[] tBlock   = blocks[blockIndex];
479                for (int k = 0; k < outBlock.length; ++k) {
480                    outBlock[k] = tBlock[k].add(d);
481                }
482            }
483    
484            return out;
485    
486        }
487    
488        /** {@inheritDoc} */
489        @Override
490        public FieldMatrix<T> scalarMultiply(final T d)
491            throws IllegalArgumentException {
492    
493            final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
494    
495            // perform subtraction block-wise, to ensure good cache behavior
496            for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
497                final T[] outBlock = out.blocks[blockIndex];
498                final T[] tBlock   = blocks[blockIndex];
499                for (int k = 0; k < outBlock.length; ++k) {
500                    outBlock[k] = tBlock[k].multiply(d);
501                }
502            }
503    
504            return out;
505    
506        }
507    
508        /** {@inheritDoc} */
509        @Override
510        public FieldMatrix<T> multiply(final FieldMatrix<T> m)
511            throws IllegalArgumentException {
512            try {
513                return multiply((BlockFieldMatrix<T>) m);
514            } catch (ClassCastException cce) {
515    
516                // safety check
517                checkMultiplicationCompatible(m);
518    
519                final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, m.getColumnDimension());
520                final T zero = getField().getZero();
521    
522                // perform multiplication block-wise, to ensure good cache behavior
523                int blockIndex = 0;
524                for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
525    
526                    final int pStart = iBlock * BLOCK_SIZE;
527                    final int pEnd   = Math.min(pStart + BLOCK_SIZE, rows);
528    
529                    for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
530    
531                        final int qStart = jBlock * BLOCK_SIZE;
532                        final int qEnd   = Math.min(qStart + BLOCK_SIZE, m.getColumnDimension());
533    
534                        // select current block
535                        final T[] outBlock = out.blocks[blockIndex];
536    
537                        // perform multiplication on current block
538                        for (int kBlock = 0; kBlock < blockColumns; ++kBlock) {
539                            final int kWidth      = blockWidth(kBlock);
540                            final T[] tBlock = blocks[iBlock * blockColumns + kBlock];
541                            final int rStart      = kBlock * BLOCK_SIZE;
542                            int k = 0;
543                            for (int p = pStart; p < pEnd; ++p) {
544                                final int lStart = (p - pStart) * kWidth;
545                                final int lEnd   = lStart + kWidth;
546                                for (int q = qStart; q < qEnd; ++q) {
547                                    T sum = zero;
548                                    int r = rStart;
549                                    for (int l = lStart; l < lEnd; ++l) {
550                                        sum = sum.add(tBlock[l].multiply(m.getEntry(r, q)));
551                                        ++r;
552                                    }
553                                    outBlock[k] = outBlock[k].add(sum);
554                                    ++k;
555                                }
556                            }
557                        }
558    
559                        // go to next block
560                        ++blockIndex;
561    
562                    }
563                }
564    
565                return out;
566    
567            }
568        }
569    
570        /**
571         * Returns the result of postmultiplying this by m.
572         *
573         * @param m    matrix to postmultiply by
574         * @return     this * m
575         * @throws     IllegalArgumentException
576         *             if columnDimension(this) != rowDimension(m)
577         */
578        public BlockFieldMatrix<T> multiply(BlockFieldMatrix<T> m) throws IllegalArgumentException {
579    
580            // safety check
581            checkMultiplicationCompatible(m);
582    
583            final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, m.columns);
584            final T zero = getField().getZero();
585    
586            // perform multiplication block-wise, to ensure good cache behavior
587            int blockIndex = 0;
588            for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
589    
590                final int pStart = iBlock * BLOCK_SIZE;
591                final int pEnd   = Math.min(pStart + BLOCK_SIZE, rows);
592    
593                for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
594                    final int jWidth = out.blockWidth(jBlock);
595                    final int jWidth2 = jWidth  + jWidth;
596                    final int jWidth3 = jWidth2 + jWidth;
597                    final int jWidth4 = jWidth3 + jWidth;
598    
599                    // select current block
600                    final T[] outBlock = out.blocks[blockIndex];
601    
602                    // perform multiplication on current block
603                    for (int kBlock = 0; kBlock < blockColumns; ++kBlock) {
604                        final int kWidth = blockWidth(kBlock);
605                        final T[] tBlock = blocks[iBlock * blockColumns + kBlock];
606                        final T[] mBlock = m.blocks[kBlock * m.blockColumns + jBlock];
607                        int k = 0;
608                        for (int p = pStart; p < pEnd; ++p) {
609                            final int lStart = (p - pStart) * kWidth;
610                            final int lEnd   = lStart + kWidth;
611                            for (int nStart = 0; nStart < jWidth; ++nStart) {
612                                T sum = zero;
613                                int l = lStart;
614                                int n = nStart;
615                                while (l < lEnd - 3) {
616                                    sum = sum.
617                                          add(tBlock[l].multiply(mBlock[n])).
618                                          add(tBlock[l + 1].multiply(mBlock[n + jWidth])).
619                                          add(tBlock[l + 2].multiply(mBlock[n + jWidth2])).
620                                          add(tBlock[l + 3].multiply(mBlock[n + jWidth3]));
621                                    l += 4;
622                                    n += jWidth4;
623                                }
624                                while (l < lEnd) {
625                                    sum = sum.add(tBlock[l++].multiply(mBlock[n]));
626                                    n += jWidth;
627                                }
628                                outBlock[k] = outBlock[k].add(sum);
629                                ++k;
630                            }
631                        }
632                    }
633    
634                    // go to next block
635                    ++blockIndex;
636    
637                }
638            }
639    
640            return out;
641    
642        }
643    
644        /** {@inheritDoc} */
645        @Override
646        public T[][] getData() {
647    
648            final T[][] data = buildArray(getField(), getRowDimension(), getColumnDimension());
649            final int lastColumns = columns - (blockColumns - 1) * BLOCK_SIZE;
650    
651            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
652                final int pStart = iBlock * BLOCK_SIZE;
653                final int pEnd   = Math.min(pStart + BLOCK_SIZE, rows);
654                int regularPos   = 0;
655                int lastPos      = 0;
656                for (int p = pStart; p < pEnd; ++p) {
657                    final T[] dataP = data[p];
658                    int blockIndex = iBlock * blockColumns;
659                    int dataPos    = 0;
660                    for (int jBlock = 0; jBlock < blockColumns - 1; ++jBlock) {
661                        System.arraycopy(blocks[blockIndex++], regularPos, dataP, dataPos, BLOCK_SIZE);
662                        dataPos += BLOCK_SIZE;
663                    }
664                    System.arraycopy(blocks[blockIndex], lastPos, dataP, dataPos, lastColumns);
665                    regularPos += BLOCK_SIZE;
666                    lastPos    += lastColumns;
667                }
668            }
669    
670            return data;
671    
672        }
673    
674        /** {@inheritDoc} */
675        @Override
676        public FieldMatrix<T> getSubMatrix(final int startRow, final int endRow,
677                                       final int startColumn, final int endColumn)
678            throws MatrixIndexException {
679    
680            // safety checks
681            checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
682    
683            // create the output matrix
684            final BlockFieldMatrix<T> out =
685                new BlockFieldMatrix<T>(getField(), endRow - startRow + 1, endColumn - startColumn + 1);
686    
687            // compute blocks shifts
688            final int blockStartRow    = startRow    / BLOCK_SIZE;
689            final int rowsShift        = startRow    % BLOCK_SIZE;
690            final int blockStartColumn = startColumn / BLOCK_SIZE;
691            final int columnsShift     = startColumn % BLOCK_SIZE;
692    
693            // perform extraction block-wise, to ensure good cache behavior
694            int pBlock = blockStartRow;
695            for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
696                final int iHeight = out.blockHeight(iBlock);
697                int qBlock = blockStartColumn;
698                for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
699                    final int jWidth = out.blockWidth(jBlock);
700    
701                    // handle one block of the output matrix
702                    final int      outIndex = iBlock * out.blockColumns + jBlock;
703                    final T[] outBlock = out.blocks[outIndex];
704                    final int      index    = pBlock * blockColumns + qBlock;
705                    final int      width    = blockWidth(qBlock);
706    
707                    final int heightExcess = iHeight + rowsShift - BLOCK_SIZE;
708                    final int widthExcess  = jWidth + columnsShift - BLOCK_SIZE;
709                    if (heightExcess > 0) {
710                        // the submatrix block spans on two blocks rows from the original matrix
711                        if (widthExcess > 0) {
712                            // the submatrix block spans on two blocks columns from the original matrix
713                            final int width2 = blockWidth(qBlock + 1);
714                            copyBlockPart(blocks[index], width,
715                                          rowsShift, BLOCK_SIZE,
716                                          columnsShift, BLOCK_SIZE,
717                                          outBlock, jWidth, 0, 0);
718                            copyBlockPart(blocks[index + 1], width2,
719                                          rowsShift, BLOCK_SIZE,
720                                          0, widthExcess,
721                                          outBlock, jWidth, 0, jWidth - widthExcess);
722                            copyBlockPart(blocks[index + blockColumns], width,
723                                          0, heightExcess,
724                                          columnsShift, BLOCK_SIZE,
725                                          outBlock, jWidth, iHeight - heightExcess, 0);
726                            copyBlockPart(blocks[index + blockColumns + 1], width2,
727                                          0, heightExcess,
728                                          0, widthExcess,
729                                          outBlock, jWidth, iHeight - heightExcess, jWidth - widthExcess);
730                        } else {
731                            // the submatrix block spans on one block column from the original matrix
732                            copyBlockPart(blocks[index], width,
733                                          rowsShift, BLOCK_SIZE,
734                                          columnsShift, jWidth + columnsShift,
735                                          outBlock, jWidth, 0, 0);
736                            copyBlockPart(blocks[index + blockColumns], width,
737                                          0, heightExcess,
738                                          columnsShift, jWidth + columnsShift,
739                                          outBlock, jWidth, iHeight - heightExcess, 0);
740                        }
741                    } else {
742                        // the submatrix block spans on one block row from the original matrix
743                        if (widthExcess > 0) {
744                            // the submatrix block spans on two blocks columns from the original matrix
745                            final int width2 = blockWidth(qBlock + 1);
746                            copyBlockPart(blocks[index], width,
747                                          rowsShift, iHeight + rowsShift,
748                                          columnsShift, BLOCK_SIZE,
749                                          outBlock, jWidth, 0, 0);
750                            copyBlockPart(blocks[index + 1], width2,
751                                          rowsShift, iHeight + rowsShift,
752                                          0, widthExcess,
753                                          outBlock, jWidth, 0, jWidth - widthExcess);
754                        } else {
755                            // the submatrix block spans on one block column from the original matrix
756                            copyBlockPart(blocks[index], width,
757                                          rowsShift, iHeight + rowsShift,
758                                          columnsShift, jWidth + columnsShift,
759                                          outBlock, jWidth, 0, 0);
760                        }
761                   }
762    
763                    ++qBlock;
764                }
765    
766                ++pBlock;
767    
768            }
769    
770            return out;
771    
772        }
773    
774        /**
775         * Copy a part of a block into another one
776         * <p>This method can be called only when the specified part fits in both
777         * blocks, no verification is done here.</p>
778         * @param srcBlock source block
779         * @param srcWidth source block width ({@link #BLOCK_SIZE} or smaller)
780         * @param srcStartRow start row in the source block
781         * @param srcEndRow end row (exclusive) in the source block
782         * @param srcStartColumn start column in the source block
783         * @param srcEndColumn end column (exclusive) in the source block
784         * @param dstBlock destination block
785         * @param dstWidth destination block width ({@link #BLOCK_SIZE} or smaller)
786         * @param dstStartRow start row in the destination block
787         * @param dstStartColumn start column in the destination block
788         */
789        private void copyBlockPart(final T[] srcBlock, final int srcWidth,
790                                   final int srcStartRow, final int srcEndRow,
791                                   final int srcStartColumn, final int srcEndColumn,
792                                   final T[] dstBlock, final int dstWidth,
793                                   final int dstStartRow, final int dstStartColumn) {
794            final int length = srcEndColumn - srcStartColumn;
795            int srcPos = srcStartRow * srcWidth + srcStartColumn;
796            int dstPos = dstStartRow * dstWidth + dstStartColumn;
797            for (int srcRow = srcStartRow; srcRow < srcEndRow; ++srcRow) {
798                System.arraycopy(srcBlock, srcPos, dstBlock, dstPos, length);
799                srcPos += srcWidth;
800                dstPos += dstWidth;
801            }
802        }
803    
804        /** {@inheritDoc} */
805        @Override
806        public void setSubMatrix(final T[][] subMatrix, final int row, final int column)
807            throws MatrixIndexException {
808    
809            // safety checks
810            final int refLength = subMatrix[0].length;
811            if (refLength < 1) {
812                throw MathRuntimeException.createIllegalArgumentException("matrix must have at least one column");
813            }
814            final int endRow    = row + subMatrix.length - 1;
815            final int endColumn = column + refLength - 1;
816            checkSubMatrixIndex(row, endRow, column, endColumn);
817            for (final T[] subRow : subMatrix) {
818                if (subRow.length != refLength) {
819                    throw MathRuntimeException.createIllegalArgumentException(
820                            "some rows have length {0} while others have length {1}",
821                            refLength, subRow.length);
822                }
823            }
824    
825            // compute blocks bounds
826            final int blockStartRow    = row / BLOCK_SIZE;
827            final int blockEndRow      = (endRow + BLOCK_SIZE) / BLOCK_SIZE;
828            final int blockStartColumn = column / BLOCK_SIZE;
829            final int blockEndColumn   = (endColumn + BLOCK_SIZE) / BLOCK_SIZE;
830    
831            // perform copy block-wise, to ensure good cache behavior
832            for (int iBlock = blockStartRow; iBlock < blockEndRow; ++iBlock) {
833                final int iHeight  = blockHeight(iBlock);
834                final int firstRow = iBlock * BLOCK_SIZE;
835                final int iStart   = Math.max(row,    firstRow);
836                final int iEnd     = Math.min(endRow + 1, firstRow + iHeight);
837    
838                for (int jBlock = blockStartColumn; jBlock < blockEndColumn; ++jBlock) {
839                    final int jWidth      = blockWidth(jBlock);
840                    final int firstColumn = jBlock * BLOCK_SIZE;
841                    final int jStart      = Math.max(column,    firstColumn);
842                    final int jEnd        = Math.min(endColumn + 1, firstColumn + jWidth);
843                    final int jLength     = jEnd - jStart;
844    
845                    // handle one block, row by row
846                    final T[] block = blocks[iBlock * blockColumns + jBlock];
847                    for (int i = iStart; i < iEnd; ++i) {
848                        System.arraycopy(subMatrix[i - row], jStart - column,
849                                         block, (i - firstRow) * jWidth + (jStart - firstColumn),
850                                         jLength);
851                    }
852    
853                }
854            }
855        }
856    
857        /** {@inheritDoc} */
858        @Override
859        public FieldMatrix<T> getRowMatrix(final int row)
860            throws MatrixIndexException {
861    
862            checkRowIndex(row);
863            final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), 1, columns);
864    
865            // perform copy block-wise, to ensure good cache behavior
866            final int iBlock  = row / BLOCK_SIZE;
867            final int iRow    = row - iBlock * BLOCK_SIZE;
868            int outBlockIndex = 0;
869            int outIndex      = 0;
870            T[] outBlock = out.blocks[outBlockIndex];
871            for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
872                final int jWidth     = blockWidth(jBlock);
873                final T[] block = blocks[iBlock * blockColumns + jBlock];
874                final int available  = outBlock.length - outIndex;
875                if (jWidth > available) {
876                    System.arraycopy(block, iRow * jWidth, outBlock, outIndex, available);
877                    outBlock = out.blocks[++outBlockIndex];
878                    System.arraycopy(block, iRow * jWidth, outBlock, 0, jWidth - available);
879                    outIndex = jWidth - available;
880                } else {
881                    System.arraycopy(block, iRow * jWidth, outBlock, outIndex, jWidth);
882                    outIndex += jWidth;
883                }
884            }
885    
886            return out;
887    
888        }
889    
890        /** {@inheritDoc} */
891        @Override
892        public void setRowMatrix(final int row, final FieldMatrix<T> matrix)
893            throws MatrixIndexException, InvalidMatrixException {
894            try {
895                setRowMatrix(row, (BlockFieldMatrix<T>) matrix);
896            } catch (ClassCastException cce) {
897                super.setRowMatrix(row, matrix);
898            }
899        }
900    
901        /**
902         * Sets the entries in row number <code>row</code>
903         * as a row matrix.  Row indices start at 0.
904         *
905         * @param row the row to be set
906         * @param matrix row matrix (must have one row and the same number of columns
907         * as the instance)
908         * @throws MatrixIndexException if the specified row index is invalid
909         * @throws InvalidMatrixException if the matrix dimensions do not match one
910         * instance row
911         */
912        public void setRowMatrix(final int row, final BlockFieldMatrix<T> matrix)
913            throws MatrixIndexException, InvalidMatrixException {
914    
915            checkRowIndex(row);
916            final int nCols = getColumnDimension();
917            if ((matrix.getRowDimension() != 1) ||
918                (matrix.getColumnDimension() != nCols)) {
919                throw new InvalidMatrixException(
920                        "dimensions mismatch: got {0}x{1} but expected {2}x{3}",
921                        matrix.getRowDimension(), matrix.getColumnDimension(),
922                        1, nCols);
923            }
924    
925            // perform copy block-wise, to ensure good cache behavior
926            final int iBlock = row / BLOCK_SIZE;
927            final int iRow   = row - iBlock * BLOCK_SIZE;
928            int mBlockIndex  = 0;
929            int mIndex       = 0;
930            T[] mBlock  = matrix.blocks[mBlockIndex];
931            for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
932                final int jWidth     = blockWidth(jBlock);
933                final T[] block = blocks[iBlock * blockColumns + jBlock];
934                final int available  = mBlock.length - mIndex;
935                if (jWidth > available) {
936                    System.arraycopy(mBlock, mIndex, block, iRow * jWidth, available);
937                    mBlock = matrix.blocks[++mBlockIndex];
938                    System.arraycopy(mBlock, 0, block, iRow * jWidth, jWidth - available);
939                    mIndex = jWidth - available;
940                } else {
941                    System.arraycopy(mBlock, mIndex, block, iRow * jWidth, jWidth);
942                    mIndex += jWidth;
943               }
944            }
945    
946        }
947    
948        /** {@inheritDoc} */
949        @Override
950        public FieldMatrix<T> getColumnMatrix(final int column)
951            throws MatrixIndexException {
952    
953            checkColumnIndex(column);
954            final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, 1);
955    
956            // perform copy block-wise, to ensure good cache behavior
957            final int jBlock  = column / BLOCK_SIZE;
958            final int jColumn = column - jBlock * BLOCK_SIZE;
959            final int jWidth  = blockWidth(jBlock);
960            int outBlockIndex = 0;
961            int outIndex      = 0;
962            T[] outBlock = out.blocks[outBlockIndex];
963            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
964                final int iHeight = blockHeight(iBlock);
965                final T[] block = blocks[iBlock * blockColumns + jBlock];
966                for (int i = 0; i < iHeight; ++i) {
967                    if (outIndex >= outBlock.length) {
968                        outBlock = out.blocks[++outBlockIndex];
969                        outIndex = 0;
970                    }
971                    outBlock[outIndex++] = block[i * jWidth + jColumn];
972                }
973            }
974    
975            return out;
976    
977        }
978    
979        /** {@inheritDoc} */
980        @Override
981        public void setColumnMatrix(final int column, final FieldMatrix<T> matrix)
982            throws MatrixIndexException, InvalidMatrixException {
983            try {
984                setColumnMatrix(column, (BlockFieldMatrix<T>) matrix);
985            } catch (ClassCastException cce) {
986                super.setColumnMatrix(column, matrix);
987            }
988        }
989    
990        /**
991         * Sets the entries in column number <code>column</code>
992         * as a column matrix.  Column indices start at 0.
993         *
994         * @param column the column to be set
995         * @param matrix column matrix (must have one column and the same number of rows
996         * as the instance)
997         * @throws MatrixIndexException if the specified column index is invalid
998         * @throws InvalidMatrixException if the matrix dimensions do not match one
999         * instance column
1000         */
1001        void setColumnMatrix(final int column, final BlockFieldMatrix<T> matrix)
1002            throws MatrixIndexException, InvalidMatrixException {
1003    
1004            checkColumnIndex(column);
1005            final int nRows = getRowDimension();
1006            if ((matrix.getRowDimension() != nRows) ||
1007                (matrix.getColumnDimension() != 1)) {
1008                throw new InvalidMatrixException(
1009                        "dimensions mismatch: got {0}x{1} but expected {2}x{3}",
1010                        matrix.getRowDimension(), matrix.getColumnDimension(),
1011                        nRows, 1);
1012            }
1013    
1014            // perform copy block-wise, to ensure good cache behavior
1015            final int jBlock  = column / BLOCK_SIZE;
1016            final int jColumn = column - jBlock * BLOCK_SIZE;
1017            final int jWidth  = blockWidth(jBlock);
1018            int mBlockIndex = 0;
1019            int mIndex      = 0;
1020            T[] mBlock = matrix.blocks[mBlockIndex];
1021            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1022                final int iHeight = blockHeight(iBlock);
1023                final T[] block = blocks[iBlock * blockColumns + jBlock];
1024                for (int i = 0; i < iHeight; ++i) {
1025                    if (mIndex >= mBlock.length) {
1026                        mBlock = matrix.blocks[++mBlockIndex];
1027                        mIndex = 0;
1028                    }
1029                    block[i * jWidth + jColumn] = mBlock[mIndex++];
1030                }
1031            }
1032    
1033        }
1034    
1035        /** {@inheritDoc} */
1036        @Override
1037        public FieldVector<T> getRowVector(final int row)
1038            throws MatrixIndexException {
1039    
1040            checkRowIndex(row);
1041            final T[] outData = buildArray(getField(), columns);
1042    
1043            // perform copy block-wise, to ensure good cache behavior
1044            final int iBlock  = row / BLOCK_SIZE;
1045            final int iRow    = row - iBlock * BLOCK_SIZE;
1046            int outIndex      = 0;
1047            for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1048                final int jWidth     = blockWidth(jBlock);
1049                final T[] block = blocks[iBlock * blockColumns + jBlock];
1050                System.arraycopy(block, iRow * jWidth, outData, outIndex, jWidth);
1051                outIndex += jWidth;
1052            }
1053    
1054            return new ArrayFieldVector<T>(outData, false);
1055    
1056        }
1057    
1058        /** {@inheritDoc} */
1059        @Override
1060        public void setRowVector(final int row, final FieldVector<T> vector)
1061            throws MatrixIndexException, InvalidMatrixException {
1062            try {
1063                setRow(row, ((ArrayFieldVector<T>) vector).getDataRef());
1064            } catch (ClassCastException cce) {
1065                super.setRowVector(row, vector);
1066            }
1067        }
1068    
1069        /** {@inheritDoc} */
1070        @Override
1071        public FieldVector<T> getColumnVector(final int column)
1072            throws MatrixIndexException {
1073    
1074            checkColumnIndex(column);
1075            final T[] outData = buildArray(getField(), rows);
1076    
1077            // perform copy block-wise, to ensure good cache behavior
1078            final int jBlock  = column / BLOCK_SIZE;
1079            final int jColumn = column - jBlock * BLOCK_SIZE;
1080            final int jWidth  = blockWidth(jBlock);
1081            int outIndex      = 0;
1082            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1083                final int iHeight = blockHeight(iBlock);
1084                final T[] block = blocks[iBlock * blockColumns + jBlock];
1085                for (int i = 0; i < iHeight; ++i) {
1086                    outData[outIndex++] = block[i * jWidth + jColumn];
1087                }
1088            }
1089    
1090            return new ArrayFieldVector<T>(outData, false);
1091    
1092        }
1093    
1094        /** {@inheritDoc} */
1095        @Override
1096        public void setColumnVector(final int column, final FieldVector<T> vector)
1097            throws MatrixIndexException, InvalidMatrixException {
1098            try {
1099                setColumn(column, ((ArrayFieldVector<T>) vector).getDataRef());
1100            } catch (ClassCastException cce) {
1101                super.setColumnVector(column, vector);
1102            }
1103        }
1104    
1105        /** {@inheritDoc} */
1106        @Override
1107        public T[] getRow(final int row)
1108            throws MatrixIndexException {
1109    
1110            checkRowIndex(row);
1111            final T[] out = buildArray(getField(), columns);
1112    
1113            // perform copy block-wise, to ensure good cache behavior
1114            final int iBlock  = row / BLOCK_SIZE;
1115            final int iRow    = row - iBlock * BLOCK_SIZE;
1116            int outIndex      = 0;
1117            for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1118                final int jWidth     = blockWidth(jBlock);
1119                final T[] block = blocks[iBlock * blockColumns + jBlock];
1120                System.arraycopy(block, iRow * jWidth, out, outIndex, jWidth);
1121                outIndex += jWidth;
1122            }
1123    
1124            return out;
1125    
1126        }
1127    
1128        /** {@inheritDoc} */
1129        @Override
1130        public void setRow(final int row, final T[] array)
1131            throws MatrixIndexException, InvalidMatrixException {
1132    
1133            checkRowIndex(row);
1134            final int nCols = getColumnDimension();
1135            if (array.length != nCols) {
1136                throw new InvalidMatrixException(
1137                        "dimensions mismatch: got {0}x{1} but expected {2}x{3}",
1138                        1, array.length, 1, nCols);
1139            }
1140    
1141            // perform copy block-wise, to ensure good cache behavior
1142            final int iBlock  = row / BLOCK_SIZE;
1143            final int iRow    = row - iBlock * BLOCK_SIZE;
1144            int outIndex      = 0;
1145            for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1146                final int jWidth     = blockWidth(jBlock);
1147                final T[] block = blocks[iBlock * blockColumns + jBlock];
1148                System.arraycopy(array, outIndex, block, iRow * jWidth, jWidth);
1149                outIndex += jWidth;
1150            }
1151    
1152        }
1153    
1154        /** {@inheritDoc} */
1155        @Override
1156        public T[] getColumn(final int column)
1157            throws MatrixIndexException {
1158    
1159            checkColumnIndex(column);
1160            final T[] out = buildArray(getField(), rows);
1161    
1162            // perform copy block-wise, to ensure good cache behavior
1163            final int jBlock  = column / BLOCK_SIZE;
1164            final int jColumn = column - jBlock * BLOCK_SIZE;
1165            final int jWidth  = blockWidth(jBlock);
1166            int outIndex      = 0;
1167            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1168                final int iHeight = blockHeight(iBlock);
1169                final T[] block = blocks[iBlock * blockColumns + jBlock];
1170                for (int i = 0; i < iHeight; ++i) {
1171                    out[outIndex++] = block[i * jWidth + jColumn];
1172                }
1173            }
1174    
1175            return out;
1176    
1177        }
1178    
1179        /** {@inheritDoc} */
1180        @Override
1181        public void setColumn(final int column, final T[] array)
1182            throws MatrixIndexException, InvalidMatrixException {
1183    
1184            checkColumnIndex(column);
1185            final int nRows = getRowDimension();
1186            if (array.length != nRows) {
1187                throw new InvalidMatrixException(
1188                        "dimensions mismatch: got {0}x{1} but expected {2}x{3}",
1189                        array.length, 1, nRows, 1);
1190            }
1191    
1192            // perform copy block-wise, to ensure good cache behavior
1193            final int jBlock  = column / BLOCK_SIZE;
1194            final int jColumn = column - jBlock * BLOCK_SIZE;
1195            final int jWidth  = blockWidth(jBlock);
1196            int outIndex      = 0;
1197            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1198                final int iHeight = blockHeight(iBlock);
1199                final T[] block = blocks[iBlock * blockColumns + jBlock];
1200                for (int i = 0; i < iHeight; ++i) {
1201                    block[i * jWidth + jColumn] = array[outIndex++];
1202                }
1203            }
1204    
1205        }
1206    
1207        /** {@inheritDoc} */
1208        @Override
1209        public T getEntry(final int row, final int column)
1210            throws MatrixIndexException {
1211            try {
1212                final int iBlock = row    / BLOCK_SIZE;
1213                final int jBlock = column / BLOCK_SIZE;
1214                final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1215                                   (column - jBlock * BLOCK_SIZE);
1216                return blocks[iBlock * blockColumns + jBlock][k];
1217            } catch (ArrayIndexOutOfBoundsException e) {
1218                throw new MatrixIndexException(
1219                        "no entry at indices ({0}, {1}) in a {2}x{3} matrix",
1220                        row, column, getRowDimension(), getColumnDimension());
1221            }
1222        }
1223    
1224        /** {@inheritDoc} */
1225        @Override
1226        public void setEntry(final int row, final int column, final T value)
1227            throws MatrixIndexException {
1228            try {
1229                final int iBlock = row    / BLOCK_SIZE;
1230                final int jBlock = column / BLOCK_SIZE;
1231                final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1232                                   (column - jBlock * BLOCK_SIZE);
1233                blocks[iBlock * blockColumns + jBlock][k] = value;
1234            } catch (ArrayIndexOutOfBoundsException e) {
1235                throw new MatrixIndexException(
1236                        "no entry at indices ({0}, {1}) in a {2}x{3} matrix",
1237                        row, column, getRowDimension(), getColumnDimension());
1238            }
1239        }
1240    
1241        /** {@inheritDoc} */
1242        @Override
1243        public void addToEntry(final int row, final int column, final T increment)
1244            throws MatrixIndexException {
1245            try {
1246                final int iBlock = row    / BLOCK_SIZE;
1247                final int jBlock = column / BLOCK_SIZE;
1248                final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1249                                   (column - jBlock * BLOCK_SIZE);
1250                final T[] blockIJ = blocks[iBlock * blockColumns + jBlock];
1251                blockIJ[k] = blockIJ[k].add(increment);
1252            } catch (ArrayIndexOutOfBoundsException e) {
1253                throw new MatrixIndexException(
1254                        "no entry at indices ({0}, {1}) in a {2}x{3} matrix",
1255                        row, column, getRowDimension(), getColumnDimension());
1256            }
1257        }
1258    
1259        /** {@inheritDoc} */
1260        @Override
1261        public void multiplyEntry(final int row, final int column, final T factor)
1262            throws MatrixIndexException {
1263            try {
1264                final int iBlock = row    / BLOCK_SIZE;
1265                final int jBlock = column / BLOCK_SIZE;
1266                final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1267                                   (column - jBlock * BLOCK_SIZE);
1268                final T[] blockIJ = blocks[iBlock * blockColumns + jBlock];
1269                blockIJ[k] = blockIJ[k].multiply(factor);
1270            } catch (ArrayIndexOutOfBoundsException e) {
1271                throw new MatrixIndexException(
1272                        "no entry at indices ({0}, {1}) in a {2}x{3} matrix",
1273                        row, column, getRowDimension(), getColumnDimension());
1274            }
1275        }
1276    
1277        /** {@inheritDoc} */
1278        @Override
1279        public FieldMatrix<T> transpose() {
1280    
1281            final int nRows = getRowDimension();
1282            final int nCols = getColumnDimension();
1283            final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), nCols, nRows);
1284    
1285            // perform transpose block-wise, to ensure good cache behavior
1286            int blockIndex = 0;
1287            for (int iBlock = 0; iBlock < blockColumns; ++iBlock) {
1288                for (int jBlock = 0; jBlock < blockRows; ++jBlock) {
1289    
1290                    // transpose current block
1291                    final T[] outBlock = out.blocks[blockIndex];
1292                    final T[] tBlock   = blocks[jBlock * blockColumns + iBlock];
1293                    final int      pStart   = iBlock * BLOCK_SIZE;
1294                    final int      pEnd     = Math.min(pStart + BLOCK_SIZE, columns);
1295                    final int      qStart   = jBlock * BLOCK_SIZE;
1296                    final int      qEnd     = Math.min(qStart + BLOCK_SIZE, rows);
1297                    int k = 0;
1298                    for (int p = pStart; p < pEnd; ++p) {
1299                        final int lInc = pEnd - pStart;
1300                        int l = p - pStart;
1301                        for (int q = qStart; q < qEnd; ++q) {
1302                            outBlock[k] = tBlock[l];
1303                            ++k;
1304                            l+= lInc;
1305                        }
1306                    }
1307    
1308                    // go to next block
1309                    ++blockIndex;
1310    
1311                }
1312            }
1313    
1314            return out;
1315    
1316        }
1317    
1318        /** {@inheritDoc} */
1319        @Override
1320        public int getRowDimension() {
1321            return rows;
1322        }
1323    
1324        /** {@inheritDoc} */
1325        @Override
1326        public int getColumnDimension() {
1327            return columns;
1328        }
1329    
1330        /** {@inheritDoc} */
1331        @Override
1332        public T[] operate(final T[] v)
1333            throws IllegalArgumentException {
1334    
1335            if (v.length != columns) {
1336                throw MathRuntimeException.createIllegalArgumentException(
1337                        "vector length mismatch: got {0} but expected {1}",
1338                        v.length, columns);
1339            }
1340            final T[] out = buildArray(getField(), rows);
1341            final T zero = getField().getZero();
1342    
1343            // perform multiplication block-wise, to ensure good cache behavior
1344            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1345                final int pStart = iBlock * BLOCK_SIZE;
1346                final int pEnd   = Math.min(pStart + BLOCK_SIZE, rows);
1347                for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1348                    final T[] block  = blocks[iBlock * blockColumns + jBlock];
1349                    final int      qStart = jBlock * BLOCK_SIZE;
1350                    final int      qEnd   = Math.min(qStart + BLOCK_SIZE, columns);
1351                    int k = 0;
1352                    for (int p = pStart; p < pEnd; ++p) {
1353                        T sum = zero;
1354                        int q = qStart;
1355                        while (q < qEnd - 3) {
1356                            sum = sum.
1357                                  add(block[k].multiply(v[q])).
1358                                  add(block[k + 1].multiply(v[q + 1])).
1359                                  add(block[k + 2].multiply(v[q + 2])).
1360                                  add(block[k + 3].multiply(v[q + 3]));
1361                            k += 4;
1362                            q += 4;
1363                        }
1364                        while (q < qEnd) {
1365                            sum = sum.add(block[k++].multiply(v[q++]));
1366                        }
1367                        out[p] = out[p].add(sum);
1368                    }
1369                }
1370            }
1371    
1372            return out;
1373    
1374        }
1375    
1376        /** {@inheritDoc} */
1377        @Override
1378        public T[] preMultiply(final T[] v)
1379            throws IllegalArgumentException {
1380    
1381            if (v.length != rows) {
1382                throw MathRuntimeException.createIllegalArgumentException(
1383                        "vector length mismatch: got {0} but expected {1}",
1384                        v.length, rows);
1385            }
1386            final T[] out = buildArray(getField(), columns);
1387            final T zero = getField().getZero();
1388    
1389            // perform multiplication block-wise, to ensure good cache behavior
1390            for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1391                final int jWidth  = blockWidth(jBlock);
1392                final int jWidth2 = jWidth  + jWidth;
1393                final int jWidth3 = jWidth2 + jWidth;
1394                final int jWidth4 = jWidth3 + jWidth;
1395                final int qStart = jBlock * BLOCK_SIZE;
1396                final int qEnd   = Math.min(qStart + BLOCK_SIZE, columns);
1397                for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1398                    final T[] block  = blocks[iBlock * blockColumns + jBlock];
1399                    final int      pStart = iBlock * BLOCK_SIZE;
1400                    final int      pEnd   = Math.min(pStart + BLOCK_SIZE, rows);
1401                    for (int q = qStart; q < qEnd; ++q) {
1402                        int k = q - qStart;
1403                        T sum = zero;
1404                        int p = pStart;
1405                        while (p < pEnd - 3) {
1406                            sum = sum.
1407                                  add(block[k].multiply(v[p])).
1408                                  add(block[k + jWidth].multiply(v[p + 1])).
1409                                  add(block[k + jWidth2].multiply(v[p + 2])).
1410                                  add(block[k + jWidth3].multiply(v[p + 3]));
1411                            k += jWidth4;
1412                            p += 4;
1413                        }
1414                        while (p < pEnd) {
1415                            sum = sum.add(block[k].multiply(v[p++]));
1416                            k += jWidth;
1417                        }
1418                        out[q] = out[q].add(sum);
1419                    }
1420                }
1421            }
1422    
1423            return out;
1424    
1425        }
1426    
1427        /** {@inheritDoc} */
1428        @Override
1429        public T walkInRowOrder(final FieldMatrixChangingVisitor<T> visitor)
1430            throws MatrixVisitorException {
1431            visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1432            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1433                final int pStart = iBlock * BLOCK_SIZE;
1434                final int pEnd   = Math.min(pStart + BLOCK_SIZE, rows);
1435                for (int p = pStart; p < pEnd; ++p) {
1436                    for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1437                        final int jWidth = blockWidth(jBlock);
1438                        final int qStart = jBlock * BLOCK_SIZE;
1439                        final int qEnd   = Math.min(qStart + BLOCK_SIZE, columns);
1440                        final T[] block = blocks[iBlock * blockColumns + jBlock];
1441                        int k = (p - pStart) * jWidth;
1442                        for (int q = qStart; q < qEnd; ++q) {
1443                            block[k] = visitor.visit(p, q, block[k]);
1444                            ++k;
1445                        }
1446                    }
1447                 }
1448            }
1449            return visitor.end();
1450        }
1451    
1452        /** {@inheritDoc} */
1453        @Override
1454        public T walkInRowOrder(final FieldMatrixPreservingVisitor<T> visitor)
1455            throws MatrixVisitorException {
1456            visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1457            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1458                final int pStart = iBlock * BLOCK_SIZE;
1459                final int pEnd   = Math.min(pStart + BLOCK_SIZE, rows);
1460                for (int p = pStart; p < pEnd; ++p) {
1461                    for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1462                        final int jWidth = blockWidth(jBlock);
1463                        final int qStart = jBlock * BLOCK_SIZE;
1464                        final int qEnd   = Math.min(qStart + BLOCK_SIZE, columns);
1465                        final T[] block = blocks[iBlock * blockColumns + jBlock];
1466                        int k = (p - pStart) * jWidth;
1467                        for (int q = qStart; q < qEnd; ++q) {
1468                            visitor.visit(p, q, block[k]);
1469                            ++k;
1470                        }
1471                    }
1472                 }
1473            }
1474            return visitor.end();
1475        }
1476    
1477        /** {@inheritDoc} */
1478        @Override
1479        public T walkInRowOrder(final FieldMatrixChangingVisitor<T> visitor,
1480                                     final int startRow, final int endRow,
1481                                     final int startColumn, final int endColumn)
1482            throws MatrixIndexException, MatrixVisitorException {
1483            checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1484            visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1485            for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1486                final int p0     = iBlock * BLOCK_SIZE;
1487                final int pStart = Math.max(startRow, p0);
1488                final int pEnd   = Math.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1489                for (int p = pStart; p < pEnd; ++p) {
1490                    for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1491                        final int jWidth = blockWidth(jBlock);
1492                        final int q0     = jBlock * BLOCK_SIZE;
1493                        final int qStart = Math.max(startColumn, q0);
1494                        final int qEnd   = Math.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1495                        final T[] block = blocks[iBlock * blockColumns + jBlock];
1496                        int k = (p - p0) * jWidth + qStart - q0;
1497                        for (int q = qStart; q < qEnd; ++q) {
1498                            block[k] = visitor.visit(p, q, block[k]);
1499                            ++k;
1500                        }
1501                    }
1502                 }
1503            }
1504            return visitor.end();
1505        }
1506    
1507        /** {@inheritDoc} */
1508        @Override
1509        public T walkInRowOrder(final FieldMatrixPreservingVisitor<T> visitor,
1510                                     final int startRow, final int endRow,
1511                                     final int startColumn, final int endColumn)
1512            throws MatrixIndexException, MatrixVisitorException {
1513            checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1514            visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1515            for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1516                final int p0     = iBlock * BLOCK_SIZE;
1517                final int pStart = Math.max(startRow, p0);
1518                final int pEnd   = Math.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1519                for (int p = pStart; p < pEnd; ++p) {
1520                    for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1521                        final int jWidth = blockWidth(jBlock);
1522                        final int q0     = jBlock * BLOCK_SIZE;
1523                        final int qStart = Math.max(startColumn, q0);
1524                        final int qEnd   = Math.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1525                        final T[] block = blocks[iBlock * blockColumns + jBlock];
1526                        int k = (p - p0) * jWidth + qStart - q0;
1527                        for (int q = qStart; q < qEnd; ++q) {
1528                            visitor.visit(p, q, block[k]);
1529                            ++k;
1530                        }
1531                    }
1532                 }
1533            }
1534            return visitor.end();
1535        }
1536    
1537        /** {@inheritDoc} */
1538        @Override
1539        public T walkInOptimizedOrder(final FieldMatrixChangingVisitor<T> visitor)
1540            throws MatrixVisitorException {
1541            visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1542            int blockIndex = 0;
1543            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1544                final int pStart = iBlock * BLOCK_SIZE;
1545                final int pEnd   = Math.min(pStart + BLOCK_SIZE, rows);
1546                for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1547                    final int qStart = jBlock * BLOCK_SIZE;
1548                    final int qEnd   = Math.min(qStart + BLOCK_SIZE, columns);
1549                    final T[] block = blocks[blockIndex];
1550                    int k = 0;
1551                    for (int p = pStart; p < pEnd; ++p) {
1552                        for (int q = qStart; q < qEnd; ++q) {
1553                            block[k] = visitor.visit(p, q, block[k]);
1554                            ++k;
1555                        }
1556                    }
1557                    ++blockIndex;
1558                }
1559            }
1560            return visitor.end();
1561        }
1562    
1563        /** {@inheritDoc} */
1564        @Override
1565        public T walkInOptimizedOrder(final FieldMatrixPreservingVisitor<T> visitor)
1566            throws MatrixVisitorException {
1567            visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1568            int blockIndex = 0;
1569            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1570                final int pStart = iBlock * BLOCK_SIZE;
1571                final int pEnd   = Math.min(pStart + BLOCK_SIZE, rows);
1572                for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1573                    final int qStart = jBlock * BLOCK_SIZE;
1574                    final int qEnd   = Math.min(qStart + BLOCK_SIZE, columns);
1575                    final T[] block = blocks[blockIndex];
1576                    int k = 0;
1577                    for (int p = pStart; p < pEnd; ++p) {
1578                        for (int q = qStart; q < qEnd; ++q) {
1579                            visitor.visit(p, q, block[k]);
1580                            ++k;
1581                        }
1582                    }
1583                    ++blockIndex;
1584                }
1585            }
1586            return visitor.end();
1587        }
1588    
1589        /** {@inheritDoc} */
1590        @Override
1591        public T walkInOptimizedOrder(final FieldMatrixChangingVisitor<T> visitor,
1592                                           final int startRow, final int endRow,
1593                                           final int startColumn, final int endColumn)
1594            throws MatrixIndexException, MatrixVisitorException {
1595            checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1596            visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1597            for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1598                final int p0     = iBlock * BLOCK_SIZE;
1599                final int pStart = Math.max(startRow, p0);
1600                final int pEnd   = Math.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1601                for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1602                    final int jWidth = blockWidth(jBlock);
1603                    final int q0     = jBlock * BLOCK_SIZE;
1604                    final int qStart = Math.max(startColumn, q0);
1605                    final int qEnd   = Math.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1606                    final T[] block = blocks[iBlock * blockColumns + jBlock];
1607                    for (int p = pStart; p < pEnd; ++p) {
1608                        int k = (p - p0) * jWidth + qStart - q0;
1609                        for (int q = qStart; q < qEnd; ++q) {
1610                            block[k] = visitor.visit(p, q, block[k]);
1611                            ++k;
1612                        }
1613                    }
1614                }
1615            }
1616            return visitor.end();
1617        }
1618    
1619        /** {@inheritDoc} */
1620        @Override
1621        public T walkInOptimizedOrder(final FieldMatrixPreservingVisitor<T> visitor,
1622                                           final int startRow, final int endRow,
1623                                           final int startColumn, final int endColumn)
1624            throws MatrixIndexException, MatrixVisitorException {
1625            checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1626            visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1627            for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1628                final int p0     = iBlock * BLOCK_SIZE;
1629                final int pStart = Math.max(startRow, p0);
1630                final int pEnd   = Math.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1631                for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1632                    final int jWidth = blockWidth(jBlock);
1633                    final int q0     = jBlock * BLOCK_SIZE;
1634                    final int qStart = Math.max(startColumn, q0);
1635                    final int qEnd   = Math.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1636                    final T[] block = blocks[iBlock * blockColumns + jBlock];
1637                    for (int p = pStart; p < pEnd; ++p) {
1638                        int k = (p - p0) * jWidth + qStart - q0;
1639                        for (int q = qStart; q < qEnd; ++q) {
1640                            visitor.visit(p, q, block[k]);
1641                            ++k;
1642                        }
1643                    }
1644                }
1645            }
1646            return visitor.end();
1647        }
1648    
1649        /**
1650         * Get the height of a block.
1651         * @param blockRow row index (in block sense) of the block
1652         * @return height (number of rows) of the block
1653         */
1654        private int blockHeight(final int blockRow) {
1655            return (blockRow == blockRows - 1) ? rows - blockRow * BLOCK_SIZE : BLOCK_SIZE;
1656        }
1657    
1658        /**
1659         * Get the width of a block.
1660         * @param blockColumn column index (in block sense) of the block
1661         * @return width (number of columns) of the block
1662         */
1663        private int blockWidth(final int blockColumn) {
1664            return (blockColumn == blockColumns - 1) ? columns - blockColumn * BLOCK_SIZE : BLOCK_SIZE;
1665        }
1666    
1667    }