processLDDtoMapJson.py

#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program.  If not, see <https://www.gnu.org/licenses/>.

##### JSON maker from London Development Database #####
# This is intended to be a legible script to produce #
# an json output for the CA LDD datamap. If it makes  #
# no sense please ask!                               #
#######################xxx############################

import io
import numbers
import sys
import pyproj
import psycopg2
import psycopg2.extras
import string
import re
from dicttoxml import dicttoxml
import xmltodict #I wrote this for xml, then shifted to json part way through. Ideally it should be rewritten to be json native.
import json
import getpass

if len(sys.argv) != 5:
  print "Please run this script as follows:\n"
  print "\tpython " + sys.argv[0] + " <file out path> <sql query file> <impact field> <impact cat>\n"
  print "file out path: the directory and filename you want to output to (note that the script will provide a minified json under this file name followed by .min)"
  print "sql query file: an sql file with the query that will be run on the database. Please use existing queries as templates -- they are located in the 'q' directory."
  print "impact field:  either 'tenure_type_rc' (for housing tenure) or 'unit_provider_rc' (for who is responsible for the property e.g. council or private)"
  print "impact cat: 'S' for Social (with tenure_type_rc), 'L' for Local Authority (with unit_provider_rc), 'total' for all housing values (all lower case)\n"
  print "Example:\n"
  print "\tpython " + sys.argv[0] + " existingsocialhousing.json q/existsocialhousing.sql tenure_type_rc S\n"
  print "See README.md for more info\n"
  sys.exit()
fileOut = sys.argv[1] # where would you like the file to be outputted to?
baseQueryFn = sys.argv[2] # location of a text file with the sql query you would like to use. Use an existing example as basis; this will have requisite fields.
impactField = sys.argv[3] #e.g. tenure_type_rc or unit_provider_rc
impactCat = sys.argv[4] #e.g. What housing value should the map used for impact? 'S' for Social (with tenure_type_rc), 'L' for Local Authority (with unit_provider_rc), 'total' for all housing values (all lower case)
print "Please login to PSQL..."
username = raw_input('Username: ')
pswd = getpass.getpass(prompt='Password: ')
permittedImpactField = ("tenure_type_rc", "unit_provider_rc")
while impactField not in permittedImpactField:
  print "Please enter an appropriate impact field - either 'tenure_type_rc' (for housing tenure) or 'unit_provider_rc' (for who is responsible for the property e.g. council or private)"
  impactField = raw_input()
#Connect to the database and make the cursor
permittedImpactCat = ("L", "S", "total")
while impactCat not in permittedImpactCat:
  print "Please enter an appropriate impact category - 'S' for Social (with tenure_type_rc), 'L' for Local Authority (with unit_provider_rc), 'total' for all housing values (all lower case)"
  impactCat = raw_input()
dbConnect = psycopg2.connect("dbname='ldd' user='" + username + "' host='localhost' password='" + pswd + "'")
cursor = dbConnect.cursor(cursor_factory=psycopg2.extras.DictCursor)
bng = pyproj.Proj(init='epsg:27700')
wgs84 = pyproj.Proj(init='epsg:4326')

# Let's start making functions
# function to render SQL result as a python dictionary
def sendSQLgetDict(sql, cursor):
  cursor.execute (sql)
  ans =cursor.fetchall()
  dict_result = []
  for row in ans:
    dict_result.append(dict(row))
  return dict_result

def sendSQLgetList(sql, cursor):
  cursor.execute(sql)
  ans = cursor.fetchall()
  return ans

def sendSQLgetVal(sql, cursor):
  cursor.execute(sql)
  ans = cursor.fetchone()
  return ans

# function to covert easting/northing to lat/lon xml
def getLatLon(permId, cursor):
  paSQL = "SELECT easting, northing FROM app_ldd.ld_permissions WHERE permission_id = " + str(permId) +";"
  pa = sendSQLgetDict(paSQL, cursor)
  if isinstance(pa[0]["easting"], numbers.Number) and isinstance(pa[0]["northing"], numbers.Number): 
    lon,lat = pyproj.transform(bng, wgs84, pa[0]['easting'], pa[0]['northing'])
    slat = str(lat)
    slon = str(lon)
    return "<lat>" + slat + "</lat><lon>" + slon + "</lon>";
  else:
    return "FAIL"


def getPoly(permId, cursor):
  polytext = sendSQLgetVal("SELECT ST_AsText(the_geom) FROM app_ldd.mi_ld_permissions_polygons WHERE objectid = " + str(permId) + ";", cursor);
  out = "<geoPoly>"
  if polytext is not None:
    if polytext[0] is not None:
      m = re.match("POLYGON\(\((.*)\)\)", polytext[0], re.M|re.I)
    else: m = False
    if m:
      pt = m.group(1)
      pa = pt.split(',')
      out += ""
      for coord in pa:
        ca = coord.split(' ')
        easting = float(ca[0])
        northing = float(ca[1])
        lon,lat = pyproj.transform(bng, wgs84, easting, northing)
        out += "<point><lat>" + str(lat) + "</lat><lon>" + str(lon) + "</lon></point>"
  out += "</geoPoly>"
  return out

def getExistingHousing(permId, impactField, cursor):
  erl = sendSQLgetDict("SELECT " + impactField + " AS housingType, SUM(number_of_units) AS units FROM app_ldd.ld_exist_res_lines WHERE permission_id = " + str(permId) + " GROUP BY " + impactField + ";", cursor)
  xc_erl = "<existingHousing>"
  for rerl in erl:
    xml_erl = dicttoxml(rerl, attr_type=False, root=False)
    xc_erl += "<line>" + xml_erl + "</line>"
  xc_erl += "</existingHousing>"
  return xc_erl

def getExistingOpenSpace(permId, cursor):
  esl = sendSQLgetDict("SELECT open_space_type_rc AS spaceType, public_access_rc as accessType, SUM(area) AS hectares FROM app_ldd.ld_exist_open_space_lines WHERE permission_id = " + str(permId) + " GROUP BY open_space_type_rc, public_access_rc;", cursor)
  xc_esl = "<existingOpenSpace>"
  for resl in esl:
    #for key, value in resl.iteritems() :
     #print key, value
    xml_esl = dicttoxml(resl, attr_type=False, root=False)
    xc_esl += "<line>" + xml_esl + "</line>"
  xc_esl += "</existingOpenSpace>"
  return xc_esl

def getExistingFloorspace(permId, cursor):
  esl = sendSQLgetDict("SELECT planning_use_class_rc AS planningUseClass, SUM(floorspace) AS floorspace FROM app_ldd.ca_exist_non_res_floorspace WHERE permission_id = " + str(permId) + " GROUP BY planning_use_class_rc;", cursor)
  xc_esl = "<existNonResFloorspace>"
  for resl in esl:
    #for key, value in resl.iteritems() :
     #print key, value
    xml_esl = dicttoxml(resl, attr_type=False, root=False)
    xc_esl += "<line>" + xml_esl + "</line>"
  xc_esl += "</existNonResFloorspace>"
  return xc_esl

def getExistingNonResAccom(permId, cursor):
  esl = sendSQLgetDict("SELECT planning_use_class_rc AS planningUseClass, accomtype AS accomType, SUM(accom) AS accom FROM app_ldd.ca_exist_non_res_accom WHERE permission_id = " + str(permId) + " GROUP BY planning_use_class_rc, accomtype;", cursor)
  xc_esl = "<existNonResAccom>"
  for resl in esl:
    #for key, value in resl.iteritems() :
     #print key, value
    xml_esl = dicttoxml(resl, attr_type=False, root=False)
    xc_esl += "<line>" + xml_esl + "</line>"
  xc_esl += "</existNonResAccom>"
  return xc_esl

def getAllAppStages(rbp, impactField, cursor):
  #the first query gets the proposed housing from the initial application
  xc_prl = "<appStage>\n<appStageId>" + str(rbp["permission_id"]) + "</appStageId><permission_date>" + str(rbp["permission_date"]) + "</permission_date>\n<borough_ref>" + rbp["borough_ref"] + "</borough_ref><appStageDescr>Original Application</appStageDescr>"
  xc_prl += getProposedHousingBase(rbp["permission_id"], impactField, cursor)
  xc_prl += getProposedOpenSpaceBase(rbp["permission_id"], cursor)
  xc_prl += getProposedFloorspaceBase(rbp["permission_id"], cursor)
  xc_prl += "</appStage>"
  #the next query gets all the superseding permission ids from prop_res_lines, and works through them.
  tables = ["app_ldd.ld_prop_res_lines", "app_ldd.ld_prop_open_space_lines", "app_ldd.ca_prop_non_res_floorspace", "app_ldd.ca_prop_non_res_accom"]
  fields = "superseded_permission_id, superseded_date "
  where = " WHERE superseded_permission_id IS NOT NULL AND permission_id = " + str(rbp["permission_id"]) 
  superSetSQL = ""
  i = 0
  for t in tables:
    if i>0:
      superSetSQL += " UNION "
    superSetSQL += "SELECT " + fields + " FROM " + t + where
    i = 1 
  superSetSQL += " GROUP BY " + fields + " ORDER BY superseded_date;"
#  superSetSQL = "SELECT superseded_permission_id AS id FROM app_ldd.ld_prop_res_lines WHERE superseded_permission_id IS NOT NULL AND permission_id = " + str(rbp["permission_id"]) + " GROUP BY superseded_permission_id;"
  superSet = sendSQLgetDict(superSetSQL, cursor)
  for ss in superSet:
    print "Superseding app: " + str(ss["superseded_permission_id"])
    #this query then get the base details of the superseding app
    superDetailsSQL = "SELECT permission_id AS appStageId, permission_date, borough_ref, descr AS appStageDescr FROM app_ldd.ld_permissions WHERE permission_id = " + str(ss["superseded_permission_id"])
    sd = sendSQLgetDict(superDetailsSQL, cursor)
    xml_sd = dicttoxml(sd[0], attr_type=False, root=False)
    xc_prl += "<appStage>\n" + xml_sd 
    superIdSQL = "SELECT superseded_permission_id FROM app_ldd.ld_prop_res_lines WHERE permission_id = " + str(rbp["permission_id"]) + " AND superseded_date <= '" + str(ss["superseded_date"]) +  "' GROUP BY superseded_permission_id;"
    superIds = sendSQLgetDict(superIdSQL, cursor)
    superWhere = ""
    for s in superIds:
      superWhere += " OR permission_id = " + str(s['superseded_permission_id'])
    xc_prl += getProposedHousingForAppStage(rbp["permission_id"], ss["superseded_date"], superWhere, impactField, cursor)
    xc_prl += getProposedOpenSpaceForAppStage(rbp["permission_id"], ss["superseded_date"], superWhere, cursor)
    xc_prl += getProposedFloorspaceForAppStage(rbp["permission_id"], ss["superseded_date"], superWhere, cursor)
    xc_prl += "</appStage>"
  return xc_prl

def getProposedHousingBase(permId, impactField, cursor):
  prlSQL = "SELECT " + impactField + " AS housingType, SUM(number_of_units) AS units FROM app_ldd.ld_prop_res_lines WHERE permission_id = " + str(permId) + " GROUP BY " + impactField + ";"
  prl = sendSQLgetDict(prlSQL, cursor)
  xc_ph = "<proposedHousing>" 
  for rprl in prl:
    xml_prl = dicttoxml(rprl, attr_type=False, root=False)
    xc_ph += "<line>" + xml_prl + "</line>"
  xc_ph += "</proposedHousing>"
  return xc_ph

def getProposedOpenSpaceBase(permId, cursor):
  psl = sendSQLgetDict("SELECT open_space_type_rc AS spaceType, public_access_rc as accessType, SUM(area) AS hectares FROM app_ldd.ld_prop_open_space_lines WHERE permission_id = " + str(permId) + " GROUP BY open_space_type_rc, public_access_rc;", cursor)
  xc_psl = "<proposedOpenSpace>"
  for rpsl in psl:
    xml_psl = dicttoxml(rpsl, attr_type=False, root=False)
    xc_psl += "<line>" + xml_psl + "</line>"
  xc_psl += "</proposedOpenSpace>"
  return xc_psl

def getProposedFloorspaceBase(permId, cursor):
  psl = sendSQLgetDict("SELECT planning_use_class_rc AS planningUseClass, SUM(floorspace) AS floorspace FROM app_ldd.ca_prop_non_res_floorspace WHERE permission_id = " + str(permId) + " GROUP BY planning_use_class_rc;", cursor)
  xc_psl = "<propNonResFloorspace>"
  for rpsl in psl:
    xml_psl = dicttoxml(rpsl, attr_type=False, root=False)
    xc_psl += "<line>" + xml_psl + "</line>"
  xc_psl += "</propNonResFloorspace>"
  return xc_psl

def getProposedNonResAccomBase(permId, cursor):
  psl = sendSQLgetDict("SELECT planning_use_class_rc AS planningUseClass, accomtype AS accomType SUM(accom) AS accom FROM app_ldd.ca_prop_non_res_floorspace WHERE permission_id = " + str(permId) + " GROUP BY planning_use_class_rc, accomtype;", cursor)
  xc_psl = "<propNonResAccom>"
  for rpsl in psl:
    xml_psl = dicttoxml(rpsl, attr_type=False, root=False)
    xc_psl += "<line>" + xml_psl + "</line>"
  xc_psl += "</propNonResAccom>"
  return xc_psl

def getProposedHousingForAppStage(permId, superDate, superWhere, impactField, cursor):
  superLinesSQL = "SELECT " + impactField + " AS housingType, SUM(number_of_units) AS units FROM app_ldd.ld_prop_res_lines WHERE (permission_id = " + str(permId) + " AND (superseded_permission_id IS NULL OR superseded_date > '" + str(superDate) + "')) " + superWhere + " GROUP BY " + impactField + ";" 
  superLines = sendSQLgetDict(superLinesSQL, cursor)
  xc_ph = "<proposedHousing>"
  #And then we need to get every line that is replaced i.e. the sum total of every line that is superseded.
  for srl in superLines:
    xml_srl = dicttoxml(srl, attr_type=False, root=False)
    xc_ph += "<line>" + xml_srl + "</line>"
  xc_ph += "</proposedHousing>"
  return xc_ph

def getProposedOpenSpaceForAppStage(permId, superDate, superWhere, cursor):
  superLinesSQL = "SELECT open_space_type_rc AS spaceType, public_access_rc as accessType, SUM(area) AS hectares FROM app_ldd.ld_prop_open_space_lines WHERE (permission_id = " + str(permId) + " AND (superseded_permission_id IS NULL OR superseded_date > '" + str(superDate) + "')) " + superWhere + " GROUP BY open_space_type_rc, public_access_rc;" 
  superLines = sendSQLgetDict(superLinesSQL, cursor)
  xc_ps = "<proposedOpenSpace>"
  #And then we need to get every line that is replaced i.e. the sum total of every line that is superseded.
  for srl in superLines:
    xml_srl = dicttoxml(srl, attr_type=False, root=False)
    xc_ps += "<line>" + xml_srl + "</line>"
  xc_ps += "</proposedOpenSpace>"
  return xc_ps

def getProposedFloorspaceForAppStage(permId, superDate, superWhere, cursor):
  superLinesSQL = "SELECT planning_use_class_rc AS planningUseClass, SUM(floorspace) AS floorspace FROM app_ldd.ca_prop_non_res_floorspace WHERE (permission_id = " + str(permId) + " AND (superseded_permission_id IS NULL OR superseded_date > '" + str(superDate) + "')) " + superWhere + " GROUP BY planning_use_class_rc;" 
  superLines = sendSQLgetDict(superLinesSQL, cursor)
  xc_pfs = "<propNonResFloorspace>"
  #And then we need to get every line that is replaced i.e. the sum total of every line that is superseded.
  for srl in superLines:
    xml_srl = dicttoxml(srl, attr_type=False, root=False)
    xc_pfs += "<line>" + xml_srl + "</line>"
  xc_pfs += "</propNonResFloorspace>"
  return xc_pfs

def getProposedAccomForAppStage(permId, superDate, superWhere, cursor):
  superLinesSQL = "SELECT planning_use_class_rc AS planningUseClass, accomtype AS accomType, SUM(accom) AS accom FROM app_ldd.ca_prop_non_res_accom WHERE (permission_id = " + str(permId) + " AND (superseded_permission_id IS NULL OR superseded_date > '" + str(superDate) + "')) " + superWhere + " GROUP BY planning_use_class_rc;" 
  superLines = sendSQLgetDict(superLinesSQL, cursor)
  xc_pa = "<propNonResAccom>"
  #And then we need to get every line that is replaced i.e. the sum total of every line that is superseded.
  for srl in superLines:
    xml_srl = dicttoxml(srl, attr_type=False, root=False)
    xc_pa += "<line>" + xml_srl + "</line>"
  xc_pa += "</propNonResAccom>"
  return xc_pa


def getHousingImpact(rbp, impactField, impactCat, cursor):
  where = ""
  if impactCat != "total":
    where = "AND " + impactField + " = '" + impactCat + "'"
  esh = sendSQLgetVal("SELECT SUM(number_of_units) AS units FROM app_ldd.ld_exist_res_lines WHERE permission_id = " + str(rbp["permission_id"]) + where, cursor)
  psh = sendSQLgetVal("SELECT SUM(number_of_units) AS units FROM app_ldd.ld_prop_res_lines WHERE permission_id = " + str(rbp["permission_id"]) + where, cursor)
  existtotalhousing = sendSQLgetVal("SELECT SUM(number_of_units) AS units FROM app_ldd.ld_exist_res_lines WHERE permission_id = " + str(rbp["permission_id"]) , cursor)
  proptotalhousing = sendSQLgetVal("SELECT SUM(number_of_units) AS units FROM app_ldd.ld_prop_res_lines WHERE permission_id = " + str(rbp["permission_id"]), cursor)
  #print rbp["permission_id"]
  if type(psh[0]) is long:
    pshval = psh[0]
    if type(proptotalhousing[0]) is long and proptotalhousing[0] != 0:
      #print str(float(pshval)) + " divided by " + str(float(proptotalhousing[0]))
      pshpc = float((float(pshval) / float(proptotalhousing[0])))
      #print "equals " + str(pshpc)
    else: pshpc = 0.0
  else: 
    pshval = 0
    pshpc = 0.0
  if type(esh[0]) is long:
    eshval = esh[0]
    if type(existtotalhousing[0]) is long and existtotalhousing[0] != 0:
      #print str(float(eshval)) + " divided by " + str(float(existtotalhousing[0]))
      eshpc = float((float(eshval) / float(existtotalhousing[0])))
    else: eshpc = 0.0
  else: 
    eshval = 0
    eshpc = 0.0
  #shi = psh[0] - esh[0]
  shi = {"number": pshval - eshval, "pcshift": pshpc - eshpc}
  return shi

def getSpaceImpact(rbp, cursor):
  es = sendSQLgetVal("SELECT SUM(area) AS hectares FROM app_ldd.ld_exist_open_space_lines WHERE permission_id = " + str(rbp["permission_id"]) + ";", cursor)
  ps = sendSQLgetVal("SELECT SUM(area) AS hectares FROM app_ldd.ld_prop_open_space_lines WHERE permission_id = " + str(rbp["permission_id"]) + ";", cursor)
#  print type(ps[0])
  if type(ps[0]) is long:
    psval = ps[0]
  else: psval = 0;
  if type(es[0]) is long:
    esval = es[0]
  else: esval = 0;
#  print str(ps[0]) + "\n"
#  print type(es[0])
#  print str(es[0]) + "\n"
  #shi = psh[0] - esh[0]
  si = psval - esval  
  return si

def getMetaInfo(impactField, impactCat, cursor):
  domain = ""
  if impactField == "unit_provider_rc":
    resDomain = "UNIT_PROVIDER"
  elif impactField == "tenure_type_rc":
    resDomain = "TENURE_TYPE"
  xc_rc = ""
  cats = [ {"cn":"resCat", "domain": resDomain},
           {"cn":"osCat", "domain": "OPEN_SPACE_TYPE"},
           {"cn":"puCat", "domain": "CA_PLANNING_USE_CLASS"}
         ]
  for c in cats:
    xc_rc += getRefDomain(c["cn"], c["domain"], cursor)
  xc_rc += "<housingImpactExamined>" + impactCat + "</housingImpactExamined>"
  xc_rc += "<housingImpactDomain>" + domain + "</housingImpactDomain>"
  xc_rcp = xc_rc.replace("&","&amp;")
  return xc_rcp

def getRefDomain(codename, domain, cursor):
  sql = "SELECT code, descr FROM app_ldd.ld_ref_codes WHERE domain_name = '" + domain + "';"
  refcodes = sendSQLgetDict(sql, cursor)
  xc_rc = "<" + codename + ">"
  for rc in refcodes:
    xc_rc += "<" + rc["code"] + ">" + rc["descr"] + "</" + rc["code"] + ">"
  xc_rc += "</" + codename + ">"
  return xc_rc

def writeListToFile(l, f):
  textFile = open(f, "w")
  for row in l:
    textFile.write(row[0] + "\n")
  textFile.close

def outputJson(fo, out_str, minify):
  if minify:
    sep = (',', ':')
    ind = None
  else:
    sep = (',', ': ')
    ind = 4
  jsonString = json.dumps(xmltodict.parse(out_str), indent=ind, separators=sep)
  print("\nJSON output(" + fo + "):")
  print(jsonString)
  with open(fo, 'w') as f:
      f.write(jsonString)


#.. and back to the main script
# opening the main planning applications query and converting it into xml within xc variable
with open(baseQueryFn, 'r') as myfile:
  sql1=myfile.read().replace('\n', ' ')
basePerm = sendSQLgetDict(sql1, cursor)
xc = "<meta>"
xc += getMetaInfo(impactField, impactCat, cursor)
xc += "</meta>"
for rbp in basePerm:
  xml_loc = getLatLon(rbp["permission_id"], cursor)
  if xml_loc != "FAIL": 
    #this is too long and needs to go in its own set of functions
    rbp["address"] = str(rbp["sec_add_obj_name"]) + str(rbp["prim_add_obj_name"]) +  str(rbp["street"]) + str(rbp["town"]) + str(rbp["post_code"]) 
    xml_bp = dicttoxml(rbp, attr_type=False, root=False)
    xml_loc = getLatLon(rbp["permission_id"], cursor)
    xc_erl = ""
    xc_prl = ""
    xml_shi = ""
    xc_erl += getExistingHousing(rbp["permission_id"], impactField, cursor)
    xc_erl += getExistingOpenSpace(rbp["permission_id"], cursor)
    xc_erl += getExistingFloorspace(rbp["permission_id"], cursor)
    xc_erl += getExistingNonResAccom(rbp["permission_id"], cursor)
#    xc_erl += getExistingHousingByProvider(rbp["permission_id"], cursor)
    xc_prl += "<appStages>" + getAllAppStages(rbp, impactField, cursor)
    xc_prl += "</appStages>"
    shi = getHousingImpact(rbp, impactField, impactCat, cursor)
    xml_shi = "<housingImpact>" + str(shi["number"]) + "</housingImpact>"
    xml_shps = "<housingPcShift>" + str(shi["pcshift"]) + "</housingPcShift>"
    poly = getPoly(rbp["permission_id"], cursor)
    xc += '<planApp id="' + str(rbp["permission_id"]) + '">' + xml_bp + xml_loc + xc_erl + xc_prl + xml_shi + xml_shps + poly + "</planApp>\n"
  else: print str(rbp["permission_id"]) + " has no geo data."
  xhead = '<?xml version="1.0" encoding="UTF-8"?>\n'

#Write the xml file
#xc=xc.decode('utf-8','ignore').encode('utf-8')
out_str = xhead + "<planApps>" + xc + "</planApps>"
printable = set(string.printable)
out_str_p = ''.join(filter(lambda x: x in printable, out_str))
out_u8 = unicode(out_str_p, "utf-8")
print out_u8
outputJson(fileOut, out_u8, False)
outputJson(fileOut + ".min", out_u8, True)
#textFile = io.open(dirOut + "/" + "planapp.xml", mode="w", encoding="utf-8")
#textFile.write(out_u8)