Coding – Page 15 – The Research Kitchen

Problem 11 on Project Euler involves calculating the maximum product of adjacent numbers in any direction in a 20×20 matrix.

The solution below takes advantage of the symmetry of calculations to cut down on unnecessary loop operations:


problem11 < - function() {
    numbers <- scan("problem11.dat")
        m <- matrix(as.numeric(numbers), 20, byrow=TRUE)
        maxprd <- 0
        N <- 20; n <- 4
        prd1 <- 0; prd2 <- 0; prd3 <- 0
        dims <- dim(m)
        a <- (n-1)
        x <- c(0:a)
        for (i in 1:(dims[1])) {
            for (j in 1:(dims[2])) {
                prd1 <- ifelse(j <= N-a, prod(m[i,j+x]), 0) # row prod
                    prd2 <- ifelse(i <= N-a, prod(m[i+x,j]), 0) # column prod
# lower right diagonal
                    prd3 <- ifelse(i <= N-a && j <= N-a, prod(diag(m[i:(i+a),j:(j+a)])),0)
# lower left diagonal
                    prd4 <- ifelse(i <= N-a && j > a, prod(diag(m[i:(i+a),j:(j-a)])), 0)
                    maxprd < - max(prd1,prd2,prd3,prd4,maxprd)
            }
        }
    maxprd
}

Here is an example of using the SVNKit API to crawl a SVN repository and pick up the commit sizes. It uses a very simple (and incorrect) heuristic for estimating the number of lines changed per commit – it just gets the absolute value of the difference of the numer of lines added and subtracted per commit.

The code below will produce a comma-separated values file containing the author, commit time, line change count estimate, and revision number.

Loading the resulting file into R allows us to apply some analysis. We can plot the total number of commits per comitter:

Or look at the total number of lines committed on each commit:

And look at some summary stats (again, per author):

$user1
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.0 1.0 5.0 439.3 45.5 45100.0

$user2
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.0 3.0 26.0 294.9 105.5 62700.0

$user3
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.00 1.00 1.00 46.64 5.00 22300.00

$user4
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.0 5.5 51.0 225.5 166.0 1882.0

$user5
Min. 1st Qu. Median Mean 3rd Qu. Max.
39.0 108.0 267.0 231.4 298.0 445.0

$user6
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.0 2.0 7.0 181.3 41.0 21170.0

$user7
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.0 5.0 34.5 164.8 136.0 3066.0

You can see from the entries for the first couple of authors above that the mean is skewed by some very large commits – making the median a much more robust measure of average lines per commit.

package com.researchkitchen.svn; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileNotFoundException; import java.io.FileWriter; import java.io.IOException; import java.io.InputStreamReader; import java.text.SimpleDateFormat; import java.util.ArrayList; import org.tmatesoft.svn.core.SVNException; import org.tmatesoft.svn.core.SVNLogEntry; import org.tmatesoft.svn.core.SVNURL; import org.tmatesoft.svn.core.auth.ISVNAuthenticationManager; import org.tmatesoft.svn.core.internal.io.svn.SVNRepositoryFactoryImpl; import org.tmatesoft.svn.core.io.SVNRepository; import org.tmatesoft.svn.core.io.SVNRepositoryFactory; import org.tmatesoft.svn.core.wc.SVNClientManager; import org.tmatesoft.svn.core.wc.SVNDiffClient; import org.tmatesoft.svn.core.wc.SVNRevision; import org.tmatesoft.svn.core.wc.SVNWCUtil; public class SVNClient { @SuppressWarnings("unchecked") public static void main(String[] args) throws IOException { final String url = "svn://myserver/myproject/trunk"; final String name = "rory"; final String pass = "password"; BufferedWriter writer = new BufferedWriter(new FileWriter(new File("svn-stats.dat"))); SimpleDateFormat formatter = new SimpleDateFormat("dd/M/yyyy HH:mm:ss"); try { SVNRepositoryFactoryImpl.setup(); SVNURL svnUrl = SVNURL.parseURIDecoded(url); ISVNAuthenticationManager authManager = SVNWCUtil.createDefaultAuthenticationManager(name, pass); SVNRepository repo = SVNRepositoryFactory.create(svnUrl); repo.setAuthenticationManager(authManager); // Create a diff client SVNClientManager clientManager = SVNClientManager.newInstance(); SVNDiffClient diffClient = clientManager.getDiffClient(); writer.write("Revision,Author,Date,LinesChanged\n"); // Get svn log for entire repo history long currentRev = repo.getLatestRevision(); ArrayList<SVNLogEntry> entries = new ArrayList<SVNLogEntry>(repo.log(new String[] {""}, null, 1, currentRev, true, true)); // Diff all subsequent revisions for (int i = 1; i < entries.size(); ++i) { int changedThisCommit = 0; SVNLogEntry current = entries.get(i); SVNLogEntry prev = entries.get(i-1); System.out.println("Revision " + current.getRevision() + " committed by " + current.getAuthor()); ByteArrayOutputStream io = new ByteArrayOutputStream(); System.out.println("Diff between " + current.getRevision() + "=>" + prev.getRevision() + ":"); diffClient.doDiff(svnUrl, SVNRevision.HEAD, SVNRevision.create(prev.getRevision()), SVNRevision.create(current.getRevision()), true, false,io); // Very basic (and probably wrong) changed lines metric // see http://en.wikipedia.org/wiki/Diff#Unified_format BufferedReader br = new BufferedReader(new InputStreamReader(new ByteArrayInputStream(io.toByteArray()))); String line = null; while((line = br.readLine()) != null) { if (line.matches("^\\+([^\\+]).*")) changedThisCommit++; else if (line.matches("^\\-([^\\-]).*")) changedThisCommit--; } changedThisCommit = (changedThisCommit < 0 ? -changedThisCommit : changedThisCommit) + 1; System.out.println("Lines changed this commit:" + changedThisCommit); br.close(); writer.write(current.getRevision() + "," + current.getAuthor() + "," + formatter.format(current.getDate()) + "," + changedThisCommit + "\n"); } writer.close(); } catch (SVNException se) { se.printStackTrace(); } catch (FileNotFoundException e) { e.printStackTrace(); } } }