94 lines
3.0 KiB
PHP
94 lines
3.0 KiB
PHP
|
<?php
|
||
|
|
||
|
$countries = [];
|
||
|
$states = [];
|
||
|
$countryId = 0;
|
||
|
$stateId = 0;
|
||
|
|
||
|
class Country {
|
||
|
public int $id;
|
||
|
public function __construct(
|
||
|
public string $code,
|
||
|
public string $name,
|
||
|
public ?string $localeName,
|
||
|
public ?string $subname,
|
||
|
public bool $haveSubregions = false) {
|
||
|
global $countryId;
|
||
|
$this->id = ++$countryId;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
class State {
|
||
|
public int $id;
|
||
|
public function __construct(
|
||
|
public string $code,
|
||
|
public string $name,
|
||
|
public string $countryCode,
|
||
|
public int $countryId,
|
||
|
) {
|
||
|
global $stateId;
|
||
|
$this->id = ++$stateId;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
function get_url($url) {
|
||
|
$cacheFile = './.cache/'.md5($url);
|
||
|
if (file_exists($cacheFile)) {
|
||
|
return file_get_contents($cacheFile);
|
||
|
}
|
||
|
$content = file_get_contents($url);
|
||
|
file_put_contents($cacheFile, $content);
|
||
|
return $content;
|
||
|
}
|
||
|
$content = get_url('https://unece.org/trade/uncefact/unlocode-country-subdivisions-iso-3166-2');
|
||
|
if (preg_match_all('/<td ([^>]{1,})>([a-z]{1,4})<\/td>([^<]{0,})<td ([^>]{1,})>(.*?)<\/td>/i', $content, $matches)) {
|
||
|
foreach ($matches[0] as $mid => $m) {
|
||
|
$name = strip_tags($matches[5][$mid]);
|
||
|
$subname = null;
|
||
|
$a = strpos($name, ',');
|
||
|
if ($a !== false) {
|
||
|
$subname = trim(substr($name, $a+1));
|
||
|
$name = mb_substr($name, 0, $a);
|
||
|
}
|
||
|
$country = new Country(
|
||
|
code: $matches[2][$mid],
|
||
|
name: html_entity_decode(trim($name)),
|
||
|
localeName: null,
|
||
|
subname: $subname,
|
||
|
haveSubregions: strpos($matches[5][$mid], '<a href=') !== false,
|
||
|
);
|
||
|
if ($country->code === 'PL') {
|
||
|
$country->localeName = 'Polska';
|
||
|
}
|
||
|
$countries[] = $country;
|
||
|
}
|
||
|
}
|
||
|
$i = 0;
|
||
|
$count = sizeof($countries);
|
||
|
foreach ($countries as $country) {
|
||
|
if ($country->haveSubregions) {
|
||
|
$code = strtolower($country->code);
|
||
|
$url = 'https://unece.org/DAM/cefact/locode/Subdivision/'. $code .'Sub.htm';
|
||
|
$content = str_replace(' ',' ', get_url($url));
|
||
|
if (preg_match_all('/<td ([^>]{1,})>([^>]{2,14})<\/td>([^<]{0,})<td ([^>]{1,})>(.*?)<\/td>([^<]{0,})<td ([^>]{1,})>(.*?)<\/td>/i', $content, $matches)) {
|
||
|
foreach ($matches[0] as $mid => $m) {
|
||
|
$states[] = new State(
|
||
|
code: trim($matches[5][$mid]),
|
||
|
name: html_entity_decode(trim($matches[8][$mid])),
|
||
|
countryCode: $country->code,
|
||
|
countryId: $country->id,
|
||
|
);
|
||
|
}
|
||
|
}
|
||
|
$i++;
|
||
|
echo round(100*$i/$count, 2)."% $country->name\n";
|
||
|
} else {
|
||
|
$i++;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
$dir = dirname(__FILE__, 3). '/src/backend/data';
|
||
|
file_put_contents($dir.'/countries.json', json_encode($countries, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE));
|
||
|
file_put_contents($dir.'/states.json', json_encode($states, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE));
|
||
|
|