CureNet/tools/country-list/country-list.php

94 lines
3.0 KiB
PHP
Raw Permalink Normal View History

2021-08-30 14:27:02 +00:00
<?php
$countries = [];
$states = [];
$countryId = 0;
$stateId = 0;
class Country {
public int $id;
public function __construct(
public string $code,
public string $name,
public ?string $localeName,
public ?string $subname,
public bool $haveSubregions = false) {
global $countryId;
$this->id = ++$countryId;
}
}
class State {
public int $id;
public function __construct(
public string $code,
public string $name,
public string $countryCode,
public int $countryId,
) {
global $stateId;
$this->id = ++$stateId;
}
}
function get_url($url) {
$cacheFile = './.cache/'.md5($url);
if (file_exists($cacheFile)) {
return file_get_contents($cacheFile);
}
$content = file_get_contents($url);
file_put_contents($cacheFile, $content);
return $content;
}
$content = get_url('https://unece.org/trade/uncefact/unlocode-country-subdivisions-iso-3166-2');
if (preg_match_all('/<td ([^>]{1,})>([a-z]{1,4})<\/td>([^<]{0,})<td ([^>]{1,})>(.*?)<\/td>/i', $content, $matches)) {
foreach ($matches[0] as $mid => $m) {
$name = strip_tags($matches[5][$mid]);
$subname = null;
$a = strpos($name, ',');
if ($a !== false) {
$subname = trim(substr($name, $a+1));
$name = mb_substr($name, 0, $a);
}
$country = new Country(
code: $matches[2][$mid],
name: html_entity_decode(trim($name)),
localeName: null,
subname: $subname,
haveSubregions: strpos($matches[5][$mid], '<a href=') !== false,
);
if ($country->code === 'PL') {
$country->localeName = 'Polska';
}
$countries[] = $country;
}
}
$i = 0;
$count = sizeof($countries);
foreach ($countries as $country) {
if ($country->haveSubregions) {
$code = strtolower($country->code);
$url = 'https://unece.org/DAM/cefact/locode/Subdivision/'. $code .'Sub.htm';
$content = str_replace('&nbsp;',' ', get_url($url));
if (preg_match_all('/<td ([^>]{1,})>([^>]{2,14})<\/td>([^<]{0,})<td ([^>]{1,})>(.*?)<\/td>([^<]{0,})<td ([^>]{1,})>(.*?)<\/td>/i', $content, $matches)) {
foreach ($matches[0] as $mid => $m) {
$states[] = new State(
code: trim($matches[5][$mid]),
name: html_entity_decode(trim($matches[8][$mid])),
countryCode: $country->code,
countryId: $country->id,
);
}
}
$i++;
echo round(100*$i/$count, 2)."% $country->name\n";
} else {
$i++;
}
}
$dir = dirname(__FILE__, 3). '/src/backend/data';
file_put_contents($dir.'/countries.json', json_encode($countries, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE));
file_put_contents($dir.'/states.json', json_encode($states, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE));