/* NOTE: This doesn't work perfectly, but it's pretty darn close. Feel free to debug it and send me your results. I would be grateful and heap praise upon you. Adam Shaw, 5/16/06, U Chicago: CS 102 */ package scraper; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.net.URL; import java.util.ArrayList; import java.util.List; public class PageGrabber { private String webaddr; public PageGrabber(String a) { this.webaddr = a; } public List getContents() throws IOException { List ss = new ArrayList(); URL url = new URL(webaddr); InputStream is = url.openStream(); InputStreamReader isr = new InputStreamReader(is); BufferedReader br = new BufferedReader(isr); while (br.ready()) { String s = br.readLine(); ss.add(s); } br.close(); isr.close(); is.close(); return ss; } } /* ************************* */ package scraper; import java.io.FileWriter; import java.io.IOException; import java.io.PrintWriter; import java.util.List; public class NBARosterScraper { private String teamname; private String webaddr; private PageGrabber g; public NBARosterScraper(String t) { this.teamname = t; this.webaddr = "http://www.nba.com/" + teamname + "/roster/index.html"; g = new PageGrabber(webaddr); } private boolean isPlayerBlock(String s) { String t = s.trim(); if (t.startsWith(""); int secondLT = h.indexOf("<",firstGT); String contents = h.substring(firstGT+1,secondLT); return contents.trim(); } private static String dropLastTag(String h) { h = h.trim(); int firstLT = h.indexOf("<"); String s = h.substring(0,firstLT); return s.trim(); } private static int inches(String ht) { int firstHyphen = ht.indexOf('-'); String ft = ht.substring(0, firstHyphen); String in = ht.substring(firstHyphen+1); int i = Integer.parseInt(ft) * 12 + Integer.parseInt(in); return i; } // takes a date like "06/27/1987" and reformats it to "'1987-06-27'" private static String mySQLDate(String dt) { int firstSlash = dt.indexOf('/'); int secondSlash = dt.indexOf("/",firstSlash+1); String m = dt.substring(0,firstSlash); String d = dt.substring(firstSlash+1,secondSlash); String y = dt.substring(secondSlash+1); return ("'" + y + '-' + m + '-' + d + "'"); } private static int yrsToInt(String y) { if (y.equals("R")) return 0; return Integer.parseInt(y); } public void scrape(PrintWriter pw) throws IOException { List lines = g.getContents(); for (int i=0; i