Skip to content

Commit 5f9a4ab

Browse files
committed
merge
2 parents 570c00a + 648bfc1 commit 5f9a4ab

File tree

340 files changed

+191
-164917
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

340 files changed

+191
-164917
lines changed

datasets/business/eures/import/geocoderParser/GGeocoderParserLib.v1.php

100755100644
Lines changed: 44 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -18,49 +18,71 @@
1818
// !!! no config below this line.................................................
1919

2020
//added by Lucas to return the data for database (this could be better implemented)
21-
function get_address($q)
21+
function get_geocoder_address($q)
2222
{
2323
$ggeo = get_ggeocoder_json($q);
2424

25-
$address['formatted_address']=str_replace("'","\'",$ggeo->results['formatted_address']);
25+
if (isset($_POST['formatted_address']))
26+
$address['formatted_address']=$ggeo->results['formatted_address'];
2627

27-
$address['latitude']=$ggeo->results['latitude'];
28-
29-
$address['longitude']=$ggeo->results['longitude'];
28+
if (isset($_POST['latitude']))
29+
$address['latitude']=$ggeo->results['latitude'];
3030

31-
$address['viewport_lat_southwest']=$ggeo->results['viewport_lat_southwest'];
31+
if (isset($_POST['longitude']))
32+
$address['longitude']=$ggeo->results['longitude'];
3233

33-
$address['viewport_lng_southwest']=$ggeo->results['viewport_lng_southwest'];
34+
if (isset($_POST['viewport_lat_southwest']))
35+
$address['viewport_lat_southwest']=$ggeo->results['viewport_lat_southwest'];
3436

35-
$address['viewport_lat_northeast']=$ggeo->results['viewport_lat_northeast'];
37+
if (isset($_POST['viewport_lng_southwest']))
38+
$address['viewport_lng_southwest']=$ggeo->results['viewport_lng_southwest'];
3639

37-
$address['viewport_lng_northeast']=$ggeo->results['viewport_lng_northeast'];
40+
if (isset($_POST['viewport_lat_northeast']))
41+
$address['viewport_lat_northeast']=$ggeo->results['viewport_lat_northeast'];
3842

39-
$address['bounds_lat_southwest']=$ggeo->results['bounds_lat_southwest'];
43+
if (isset($_POST['viewport_lng_northeast']))
44+
$address['viewport_lng_northeast']=$ggeo->results['viewport_lng_northeast'];
4045

41-
$address['bounds_lng_southwest']=$ggeo->results['bounds_lng_southwest'];
46+
if (isset($_POST['bounds_lat_southwest']))
47+
$address['bounds_lat_southwest']=$ggeo->results['bounds_lat_southwest'];
4248

43-
$address['bounds_lat_northeast']=$ggeo->results['bounds_lat_northeast'];
49+
if (isset($_POST['bounds_lng_southwest']))
50+
$address['bounds_lng_southwest']=$ggeo->results['bounds_lng_southwest'];
4451

45-
$address['bounds_lng_northeast']=$ggeo->results['bounds_lng_northeast'];
52+
if (isset($_POST['bounds_lat_northeast']))
53+
$address['bounds_lat_northeast']=$ggeo->results['bounds_lat_northeast'];
4654

47-
$address['country']=str_replace("'","\'",$ggeo->find_address_components('country','long_name'));
55+
if (isset($_POST['bounds_lng_northeast']))
56+
$address['bounds_lng_northeast']=$ggeo->results['bounds_lng_northeast'];
4857

49-
$address['country_id']=$ggeo->find_address_components('country','short_name');
58+
if (isset($_POST['country']))
59+
$address['country']=$ggeo->find_address_components('country','long_name');
5060

51-
$address['administrative_area_level_1']=str_replace("'","\'",$ggeo->find_address_components('administrative_area_level_1','long_name'));
61+
if (isset($_POST['country_id']))
62+
$address['country_id']=$ggeo->find_address_components('country','short_name');
5263

53-
$address['administrative_area_level_2']=str_replace("'","\'",$ggeo->find_address_components('administrative_area_level_2','long_name'));
64+
if (isset($_POST['administrative_area_level_1']))
65+
$address['administrative_area_level_1']=$ggeo->find_address_components('administrative_area_level_1','long_name');
5466

55-
$address['locality']=str_replace("'","\'",$ggeo->find_address_components('locality','long_name'));
67+
if (isset($_POST['administrative_area_level_2']))
68+
$address['administrative_area_level_2']=$ggeo->find_address_components('administrative_area_level_2','long_name');
5669

57-
$address['postal_code']=$ggeo->find_address_components('postal_code','long_name');
70+
if (isset($_POST['locality']))
71+
$address['locality']=$ggeo->find_address_components('locality','long_name');
5872

59-
$address['route']=str_replace("'","\'",$ggeo->find_address_components('route','long_name'));
73+
if (isset($_POST['postal_code']))
74+
$address['postal_code']=$ggeo->find_address_components('postal_code','long_name');
6075

61-
$address['street_number']=str_replace("'","\'",$ggeo->find_address_components('street_number','long_name'));
76+
if (isset($_POST['route']))
77+
$address['route']=$ggeo->find_address_components('route','long_name');
6278

63-
return $address;
79+
if (isset($_POST['street_number']))
80+
$address['street_number']=$ggeo->find_address_components('street_number','long_name');
81+
82+
if (isset($address))
83+
return $address;
84+
else
85+
return FALSE;
6486
}
6587

6688
//sanitization and encoding functions (this could be better implemented)
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
<?php
2+
######################################
3+
# PHP scraper
4+
# Scraps data from EURES jobs
5+
######################################
6+
7+
ini_set('max_execution_time', 0);
8+
9+
ini_set('memory_limit', '-1');
10+
11+
include("config.php");
12+
13+
require 'scraperwiki/scraperwiki.php';
14+
15+
require 'scraperwiki/simple_html_dom.php';
16+
17+
include 'geocoderParser/GGeocoderParserLib.v1.php';
18+
19+
20+
function insert_address($country)
21+
{
22+
$dir = '/var/www/latc/datasets/business/eures/import/jobs/'.$country;
23+
if ($handle = opendir($dir))
24+
{
25+
while (false !== ($file = readdir($handle)))
26+
{
27+
$html = scraperwiki::scrape("http://localhost/latc/datasets/business/eures/import/jobs/".$country."/".$file);
28+
29+
$dom = new simple_html_dom();
30+
$dom->load($html);
31+
32+
foreach($dom->find('th') as $data)
33+
{
34+
35+
$text = trim($data->plaintext);
36+
37+
if($text == 'Address:')
38+
{
39+
$value = trim($data->next_sibling()->plaintext);
40+
41+
$address = db_prep($value);
42+
43+
echo "ADDRESS: ".$address."<br />";
44+
45+
$sql = mysql_query("SELECT id FROM geo WHERE LCASE(address) = $address") or die (mysql_error());
46+
47+
$cont = mysql_num_rows($sql);
48+
49+
if ($cont == 0 && $address <> NULL)
50+
{
51+
mysql_query("INSERT INTO geo SET address = $address") or die (mysql_error());
52+
}
53+
else
54+
{
55+
echo "Address already extracted.<br /><br />";
56+
}
57+
}
58+
}
59+
}
60+
closedir($handle);
61+
}
62+
else
63+
{
64+
echo "Unable to open directory.";
65+
}
66+
}
67+
68+
function update_address()
69+
{
70+
$sql = mysql_query("SELECT address FROM geo WHERE formatted_address IS NULL LIMIT 0, 1000") or die (mysql_error());
71+
//improve WHERE
72+
73+
while($row = mysql_fetch_array($sql))
74+
{
75+
$address = $row[0];
76+
$address_array = get_geocoder_address($address);
77+
$query = "UPDATE geo SET
78+
formatted_address =".db_prep($address_array['formatted_address']).",
79+
country_id =".db_prep($address_array['country_id']).",
80+
administrative_area =".db_prep($address_array['administrative_area_level_1']).",
81+
subadministrative_area =".db_prep($address_array['administrative_area_level_2']).",
82+
locality =".db_prep($address_array['locality']).",
83+
route =".db_prep($address_array['route']).",
84+
street_number =".db_prep($address_array['street_number']).",
85+
postal_code =".db_prep($address_array['postal_code']).",
86+
latitude =".db_prep($address_array['latitude']).",
87+
longitude =".db_prep($address_array['longitude']).",
88+
lat_southwest =".db_prep($address_array['viewport_lat_southwest']).",
89+
lng_southwest =".db_prep($address_array['viewport_lng_southwest']).",
90+
lat_northeast =".db_prep($address_array['viewport_lat_northeast']).",
91+
lng_northeast =".db_prep($address_array['viewport_lng_northeast'])."
92+
WHERE address =".db_prep($address);
93+
mysql_query($query) or die (mysql_error());
94+
}
95+
}
96+
97+
function db_prep($data)
98+
{
99+
if (isset($data) and $data != ''){
100+
$prepped = "'" . mysql_real_escape_string(trim($data)) . "'";
101+
}
102+
else {
103+
$prepped = "NULL";
104+
}
105+
return $prepped;
106+
}
107+
108+
insert_address("DE");
109+
110+
//update_address();
111+
112+
?>
113+

0 commit comments

Comments
 (0)