id = ++$countryId; } } class State { public int $id; public function __construct( public string $code, public string $name, public string $countryCode, public int $countryId, ) { global $stateId; $this->id = ++$stateId; } } function get_url($url) { $cacheFile = './.cache/'.md5($url); if (file_exists($cacheFile)) { return file_get_contents($cacheFile); } $content = file_get_contents($url); file_put_contents($cacheFile, $content); return $content; } $content = get_url('https://unece.org/trade/uncefact/unlocode-country-subdivisions-iso-3166-2'); if (preg_match_all('/]{1,})>([a-z]{1,4})<\/td>([^<]{0,})]{1,})>(.*?)<\/td>/i', $content, $matches)) { foreach ($matches[0] as $mid => $m) { $name = strip_tags($matches[5][$mid]); $subname = null; $a = strpos($name, ','); if ($a !== false) { $subname = trim(substr($name, $a+1)); $name = mb_substr($name, 0, $a); } $country = new Country( code: $matches[2][$mid], name: html_entity_decode(trim($name)), localeName: null, subname: $subname, haveSubregions: strpos($matches[5][$mid], 'localeName = 'Polska'; } $countries[] = $country; } } $i = 0; $count = sizeof($countries); foreach ($countries as $country) { if ($country->haveSubregions) { $code = strtolower($country->code); $url = 'https://unece.org/DAM/cefact/locode/Subdivision/'. $code .'Sub.htm'; $content = str_replace(' ',' ', get_url($url)); if (preg_match_all('/]{1,})>([^>]{2,14})<\/td>([^<]{0,})]{1,})>(.*?)<\/td>([^<]{0,})]{1,})>(.*?)<\/td>/i', $content, $matches)) { foreach ($matches[0] as $mid => $m) { $states[] = new State( code: trim($matches[5][$mid]), name: html_entity_decode(trim($matches[8][$mid])), countryCode: $country->code, countryId: $country->id, ); } } $i++; echo round(100*$i/$count, 2)."% $country->name\n"; } else { $i++; } } $dir = dirname(__FILE__, 3). '/src/backend/data'; file_put_contents($dir.'/countries.json', json_encode($countries, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE)); file_put_contents($dir.'/states.json', json_encode($states, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE));