this repo has no description
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

cleanup

alice 61580d3f 930be258

-1015
-1
.gitignore
··· 1 1 build/ 2 2 .lock* 3 3 .clangd 4 - __pycache__ 5 4 node_modules
-433
scripts/generate_airport_tz_list.py
··· 1 - #!/usr/bin/env python3 2 - """Generate C timezone list for the Top-1000 airports. 3 - 4 - 1. Parse `top1000.html` (downloaded from GetToCenter) to extract IATA codes and 5 - airport names. 6 - 2. Use the `airportsdata` package to obtain the IANA timezone (`tz`) for each 7 - airport. 8 - 3. For each distinct set of (std_offset, dst_offset, dst_start, dst_end) 9 - belonging to that timezone (for the current year), build a bucket of airport 10 - IATA codes. 11 - 4. Emit a C source file (`src/c/airport_tz_list.c`) that mirrors the structure 12 - of `tz_list.c` already used by the Closest-Noon clock, but with **airport 13 - IATA codes** in the pooled name list instead of city names. 14 - 15 - Usage: 16 - # Always parse HTML top1000.html, then fallback for missing offsets 17 - python generate_airport_tz_list.py --html top1000.html --out src/c/airport_tz_list.c --top 10 --max-bucket 1000 18 - """ 19 - from __future__ import annotations 20 - 21 - import argparse 22 - import sys 23 - from pathlib import Path 24 - from datetime import datetime, timezone 25 - from typing import Dict, List, Tuple 26 - from tz_common import find_dst_transitions as _find_dst_transitions 27 - from bs4 import BeautifulSoup # type: ignore 28 - import airportsdata 29 - from timezonefinder import TimezoneFinder 30 - import pandas as pd 31 - 32 - # --------------------------------------------------------------------------- 33 - # Build ranked list of airports with route counts (fallback if HTML omitted) 34 - # --------------------------------------------------------------------------- 35 - 36 - def _download_routes_csv() -> pd.DataFrame: 37 - """Fetch routes.dat from the OpenFlights repo and return a DataFrame.""" 38 - url = "https://raw.githubusercontent.com/jpatokal/openflights/master/data/routes.dat" 39 - df = pd.read_csv(url, header=None, usecols=[2, 4], names=["src", "dst"], dtype=str) 40 - return df 41 - 42 - def _rank_airports_by_routes(airport_df: pd.DataFrame) -> pd.Series: 43 - """Return Series indexed by IATA with descending route hit counts.""" 44 - routes = _download_routes_csv() 45 - counts = pd.concat([routes["src"], routes["dst"]]).value_counts() 46 - return counts 47 - 48 - def build_topN_per_timezone(top_n: int) -> List[Tuple[str, str]]: 49 - """Return a balanced list covering all timezones with up to top_n airports each. 50 - 51 - The ranking metric is route_hits (descending). Airports lacking route data 52 - default to zero, but they might still be picked to cover empty timezones. 53 - """ 54 - adict = airportsdata.load("IATA") 55 - # Build DataFrame and remove record-level 'iata' column to avoid duplicates 56 - df = pd.DataFrame.from_dict(adict, orient="index") 57 - if 'iata' in df.columns: 58 - df = df.drop(columns=['iata']) 59 - df = df.reset_index().rename(columns={'index': 'iata'}) 60 - 61 - # Add route_hits counts 62 - counts = _rank_airports_by_routes(df) 63 - df["route_hits"] = df["iata"].map(counts).fillna(0).astype(int) 64 - 65 - # Sort by route_hits descending 66 - df_sorted = df.sort_values("route_hits", ascending=False, ignore_index=True) 67 - 68 - tz_to_codes: Dict[str, List[Tuple[str, str]]] = {} 69 - 70 - # First pass: iterate sorted df to fill up to top_n per tz 71 - for _, row in df_sorted.iterrows(): 72 - tz = row["tz"] 73 - if not isinstance(tz, str) or tz == "": 74 - continue 75 - lst = tz_to_codes.setdefault(tz, []) 76 - if len(lst) < top_n: 77 - lst.append((row["iata"], row["name"])) 78 - # Early exit optimisation – if all tz have top_n we can break; but we 79 - # don't know total tz count easily, so skip. 80 - 81 - # Now build final list 82 - final_list: List[Tuple[str, str]] = [] 83 - for codes in tz_to_codes.values(): 84 - final_list.extend(codes) 85 - return final_list 86 - 87 - # --------------------------------------------------------------------------- 88 - # Parsing HTML (optional) --------------------------------------------------- 89 - # --------------------------------------------------------------------------- 90 - 91 - def _parse_top1000(html_path: Path) -> List[Tuple[str, str]]: 92 - """Return list of (IATA, Airport Name) found in the HTML table.""" 93 - soup = BeautifulSoup(html_path.read_text(encoding="utf-8"), "html.parser") 94 - rows = soup.find_all("tr") 95 - results: List[Tuple[str, str]] = [] 96 - for tr in rows: 97 - tds = tr.find_all("td") 98 - if len(tds) < 3: 99 - continue 100 - iata = tds[2].get_text(strip=True).upper() 101 - if not (iata and len(iata) == 3): 102 - continue # skip ads rows etc. 103 - # Airport name usually inside the 2nd <td>, perhaps in an <h2> 104 - name_cell_text = tds[1].get_text(" ", strip=True) 105 - results.append((iata, name_cell_text)) 106 - return results 107 - 108 - # --------------------------------------------------------------------------- 109 - # Main C-code generation routine 110 - # --------------------------------------------------------------------------- 111 - 112 - def generate_c_code(airports_list: List[Tuple[str, str]], out_path: Path, group_size: int = 0, max_bucket: int = 0) -> None: 113 - """Generate airport_tz_list.c: 114 - 1) Build full buckets for every IATA tz variant (std, dst, transitions). 115 - 2) Pick top group_size codes per std-offset from HTML list. 116 - 3) Fallback for missing offsets (min 1, max max_bucket) using classification + traffic. 117 - 4) Distribute codes evenly across DST buckets, cap each to max_bucket. 118 - """ 119 - year = datetime.now(timezone.utc).year 120 - airport_db = airportsdata.load("IATA") 121 - 122 - # Ensure unique HTML airport entries by IATA code 123 - seen_iatas: set[str] = set() 124 - unique_airports: List[Tuple[str, str]] = [] 125 - for iata, name in airports_list: 126 - if iata not in seen_iatas: 127 - unique_airports.append((iata, name)) 128 - seen_iatas.add(iata) 129 - airports_list = unique_airports 130 - # Build fallback DataFrame with classification and traffic for missing offsets 131 - df_all = pd.DataFrame.from_dict(airport_db, orient="index") 132 - if 'iata' in df_all.columns: 133 - df_all = df_all.drop(columns=['iata']) 134 - df_all = df_all.reset_index().rename(columns={'index': 'iata'}) 135 - # Recompute tz field from lat/lon to correct misclassified zones _before_ we 136 - # derive any offset‑related columns (important for DUT / America/Adak etc.) 137 - tf = TimezoneFinder() 138 - df_all['tz'] = df_all.apply( 139 - lambda row: tf.timezone_at(lat=row.get('lat'), lng=row.get('lon')) or row.get('tz'), 140 - axis=1, 141 - ) 142 - 143 - # Merge OurAirports classification 144 - try: 145 - oa = pd.read_csv("https://ourairports.com/data/airports.csv", usecols=["iata_code","type","scheduled_service"]) # type: ignore 146 - oa = oa.rename(columns={"iata_code": "iata"}).dropna(subset=["iata"]) 147 - df_all = df_all.merge(oa[['iata','type','scheduled_service']], on='iata', how='left') 148 - except Exception: 149 - df_all['type'] = None 150 - df_all['scheduled_service'] = None 151 - # Add route hit counts 152 - traffic_counts = _rank_airports_by_routes(df_all) 153 - traffic_dict = traffic_counts.to_dict() 154 - # Map route hits using apply to ensure a Series 155 - df_all['route_hits'] = df_all['iata'].apply(lambda x: traffic_dict.get(x, 0)).astype(int) 156 - # Compute standard offset seconds for each record (now that tz is fixed) 157 - df_all['std_offset_s'] = df_all['tz'].apply(lambda tz: _find_dst_transitions(tz, year)[0]) 158 - 159 - # Fallback selector for a given std_offset 160 - def _fallback_codes(std_s: int) -> List[str]: 161 - """Fallback hierarchy per std offset: 162 - 1) up to max_bucket (or 3) large/international, 163 - 2) up to 2 medium/regional, 164 - 3) up to 1 small_airport, 165 - 4) fill any remaining to reach at least 1, max_bucket total.""" 166 - seg = df_all[df_all['std_offset_s'] == std_s] 167 - if seg.empty: 168 - return [] 169 - seg_sorted = seg.sort_values('route_hits', ascending=False) 170 - result: List[str] = [] 171 - # 1) large_international 172 - large = seg_sorted[(seg_sorted['type'] == 'large_airport') & (seg_sorted['scheduled_service'] == 'yes')] 173 - if not large.empty: 174 - cap = max_bucket if max_bucket > 0 else 3 175 - result = large['iata'].head(cap).tolist() 176 - # 2) medium_regional 177 - remain = (max_bucket - len(result)) if max_bucket > 0 else (3 - len(result)) 178 - if remain > 0: 179 - medium = seg_sorted[(seg_sorted['type'] == 'medium_airport') & (seg_sorted['scheduled_service'] == 'yes')] 180 - if not medium.empty: 181 - mcap = min(remain, 2) 182 - result.extend(medium['iata'].head(mcap).tolist()) 183 - remain = (max_bucket - len(result)) if max_bucket > 0 else (3 - len(result)) 184 - # 3) small_airport 185 - if remain > 0: 186 - small = seg_sorted[(seg_sorted['type'] == 'small_airport') & (seg_sorted['scheduled_service'] == 'yes')] 187 - if not small.empty: 188 - result.extend(small['iata'].head(1).tolist()) 189 - remain = (max_bucket - len(result)) if max_bucket > 0 else (3 - len(result)) 190 - # 4) any to ensure at least one 191 - if not result: 192 - result = [seg_sorted['iata'].iloc[0]] 193 - # enforce max_bucket hard limit 194 - if max_bucket > 0 and len(result) > max_bucket: 195 - result = result[:max_bucket] 196 - return result 197 - 198 - # 1) Build full buckets from all tz names in df_all 199 - full_buckets: Dict[Tuple[int,int,int,int], Dict[str, object]] = {} 200 - group_keys: Dict[int, List[Tuple[int,int,int,int]]] = {} 201 - for tz_name in df_all['tz'].dropna().unique(): 202 - std_s, dst_s, start_ts, end_ts = _find_dst_transitions(tz_name, year) 203 - key = (std_s, dst_s, start_ts, end_ts) 204 - if key not in full_buckets: 205 - full_buckets[key] = { 206 - 'std': std_s, 207 - 'dst': dst_s, 208 - 'start': start_ts, 209 - 'end': end_ts, 210 - 'tz_names': [tz_name], 211 - } 212 - group_keys.setdefault(std_s, []).append(key) 213 - else: 214 - full_buckets[key]['tz_names'].append(tz_name) 215 - 216 - # 2) Collect codes from HTML for each std_offset (popular timezones) 217 - group_codes: Dict[int, List[str]] = {} 218 - for iata, _ in airports_list: 219 - rec = airport_db.get(iata) 220 - if not rec or not rec.get('tz'): 221 - continue 222 - std_s = _find_dst_transitions(rec['tz'], year)[0] 223 - codes = group_codes.setdefault(std_s, []) 224 - if iata not in codes: 225 - codes.append(iata) 226 - # Trim HTML-based codes to group_size for popular timezones 227 - if group_size > 0: 228 - for std_s, codes in list(group_codes.items()): 229 - group_codes[std_s] = codes[:group_size] 230 - 231 - # 3) Fallback for offsets lacking HTML codes (unpopular timezones) 232 - for std_s, keys in group_keys.items(): 233 - # ensure at least one code per std_offset 234 - if not group_codes.get(std_s): 235 - group_codes[std_s] = _fallback_codes(std_s) 236 - 237 - # 4) Assign popular & fallback codes to their actual DST buckets 238 - # initialize codes list for each bucket 239 - for bucket_key, meta in full_buckets.items(): 240 - meta['codes'] = [] 241 - used_codes: set[str] = set() 242 - 243 - def _assign_to_bucket(iata_code: str): 244 - if iata_code in used_codes: 245 - return False 246 - rec = airport_db.get(iata_code) 247 - if rec and rec.get('tz'): 248 - std2, dst2, st2, ed2 = _find_dst_transitions(rec['tz'], year) 249 - key = (std2, dst2, st2, ed2) 250 - if key in full_buckets: 251 - full_buckets[key]['codes'].append(iata_code) 252 - used_codes.add(iata_code) 253 - return True 254 - return False 255 - 256 - for std_s, codes in group_codes.items(): 257 - for iata in codes: 258 - _assign_to_bucket(iata) 259 - 260 - # fallback for buckets still empty: only populate the first empty bucket per std-offset 261 - for std_s, keys in group_keys.items(): 262 - assigned = set(group_codes.get(std_s, [])) 263 - fallback_candidates = [c for c in group_codes.get(std_s, []) if c not in used_codes] 264 - # track if we've used fallback for this std-offset 265 - used = False 266 - for bucket_key in keys: 267 - codes_list = full_buckets[bucket_key].get('codes', []) 268 - if not codes_list and not used: 269 - # pick first unassigned fallback candidates 270 - fallback_pool = [c for c in group_codes.get(std_s, []) if c not in used_codes] 271 - for candidate in fallback_pool: 272 - if _assign_to_bucket(candidate): 273 - codes_list = [candidate] 274 - break 275 - else: 276 - codes_list = [] 277 - used = True 278 - full_buckets[bucket_key]['codes'] = codes_list 279 - 280 - # FINAL safety pass: if a bucket is still empty try to grab 1 airport that 281 - # actually sits in *this* timezone (e.g. DUT for America/Adak). This never 282 - # duplicates because we consult used_codes. 283 - for bucket_key, meta in full_buckets.items(): 284 - if meta['codes']: 285 - continue 286 - tz_names = meta.get('tz_names', []) 287 - if not tz_names: 288 - continue 289 - seg = df_all[df_all['tz'].isin(tz_names)].sort_values('route_hits', ascending=False) 290 - for code in seg['iata']: 291 - if _assign_to_bucket(code): 292 - meta['codes'] = [code] 293 - break 294 - 295 - # build ordered bucket list 296 - buckets_list = [ 297 - full_buckets[k] 298 - for k in sorted( 299 - full_buckets.keys(), 300 - key=lambda k: (full_buckets[k]['std'], full_buckets[k]['dst'], full_buckets[k]['start']) 301 - ) 302 - ] 303 - 304 - # 5) Build flat pool and offsets 305 - code_pool = [] 306 - seen_for_pool: set[str] = set() 307 - for b in buckets_list: 308 - unique_codes = [c for c in b.get('codes', []) if c not in seen_for_pool] 309 - b['offset'] = len(code_pool) 310 - b['count'] = len(unique_codes) 311 - code_pool.extend(unique_codes) 312 - seen_for_pool.update(unique_codes) 313 - 314 - # Build name pool parallel to code_pool 315 - name_pool = [] 316 - for code in code_pool: 317 - rec = airport_db.get(code) 318 - if rec and rec.get('name'): 319 - name = rec['name'] 320 - else: 321 - name = code 322 - # Remove ' International Airport' or ' Airport' from the end 323 - if name.endswith(' International Airport'): 324 - name = name[:-len(' International Airport')] 325 - elif name.endswith(' Airport'): 326 - name = name[:-len(' Airport')] 327 - name = name.rstrip() 328 - name_pool.append(name) 329 - 330 - # Emit C file 331 - with out_path.open("w", encoding="utf-8") as f: 332 - f.write("// Auto-generated by generate_airport_tz_list.py\n") 333 - f.write(f"// Year-specific DST data for {year}\n\n") 334 - f.write("#include <stdint.h>\n\n") 335 - # Code pool 336 - f.write("static const char airport_code_pool[] =\n") 337 - for i, code in enumerate(code_pool): 338 - if i % 8 == 0: 339 - f.write(" ") # Indent new line 340 - f.write(f'"{code}"') 341 - if (i + 1) % 8 == 0 or (i + 1) == len(code_pool): 342 - f.write("\n") # Newline every 8 codes or at the end 343 - else: 344 - f.write(" ") # Space between codes on the same line 345 - f.write(";\n\n") 346 - 347 - # Name pool 348 - f.write("static const char* airport_name_pool[] = {\n") 349 - for name in name_pool: 350 - f.write(f" \"{name}\",\n") 351 - f.write("};\n\n") 352 - 353 - # Struct matches TzInfo definition 354 - f.write("typedef struct {\n") 355 - f.write(" float std_offset_hours;\n") 356 - f.write(" float dst_offset_hours;\n") 357 - f.write(" int64_t dst_start_utc;\n") 358 - f.write(" int64_t dst_end_utc;\n") 359 - f.write(" int name_offset;\n") 360 - f.write(" int name_count;\n") 361 - f.write("} TzInfo;\n\n") 362 - 363 - f.write("static const TzInfo airport_tz_list[] = {\n") 364 - for bucket in buckets_list: 365 - std_h = bucket["std"] / 3600.0 366 - dst_h = bucket["dst"] / 3600.0 367 - start = bucket["start"] 368 - end = bucket["end"] 369 - off = bucket["offset"] 370 - cnt = bucket["count"] 371 - f.write(f" {{ {std_h:.2f}f, {dst_h:.2f}f, {start}LL, {end}LL, {off}, {cnt} }},\n") 372 - f.write("};\n\n") 373 - f.write("#define AIRPORT_TZ_LIST_COUNT (sizeof(airport_tz_list)/sizeof(airport_tz_list[0]))\n") 374 - f.write("#define AIRPORT_CODE_POOL_COUNT (sizeof(airport_code_pool)/3)\n") 375 - f.write("#define AIRPORT_NAME_POOL_COUNT (sizeof(airport_name_pool)/sizeof(airport_name_pool[0]))\n") 376 - 377 - print( 378 - f"Generated {out_path} with {len(buckets_list)} tz buckets and {len(code_pool)} airports." 379 - ) 380 - 381 - # --------------------------------------------------------------------------- 382 - # CLI 383 - # --------------------------------------------------------------------------- 384 - 385 - def main(argv: List[str] | None = None) -> None: 386 - parser = argparse.ArgumentParser( 387 - description="Generate airport_tz_list.c: hybrid grouping by standard offset, split DST buckets, fallback for missing offsets" 388 - ) 389 - 390 - default_html_path = Path(__file__).parent / "top1000.html" 391 - parser.add_argument( 392 - "--html", 393 - type=Path, 394 - default=default_html_path, 395 - help="Path to GetToCenter HTML file (top1000.html)", 396 - ) 397 - 398 - default_out_path = Path(__file__).parent / "../src/c/airport_tz_list.c" 399 - parser.add_argument( 400 - "--out", 401 - type=Path, 402 - default=default_out_path, 403 - help="C output file path" 404 - ) 405 - parser.add_argument( 406 - "--top", 407 - type=int, 408 - default=10, 409 - help="Number of airports to pick per standard offset group before splitting across DST buckets", 410 - ) 411 - parser.add_argument( 412 - "--max-bucket", 413 - type=int, 414 - default=10, 415 - help="Maximum number of airport codes to include per DST bucket (default: 10)", 416 - ) 417 - args = parser.parse_args(argv) 418 - 419 - # Always parse the HTML source for list of top airports 420 - if not args.html.exists(): 421 - print(f"ERROR: HTML file not found: {args.html}", file=sys.stderr) 422 - sys.exit(1) 423 - airports_list = _parse_top1000(args.html) 424 - if not airports_list: 425 - print(f"ERROR: No airports found in HTML: {args.html}", file=sys.stderr) 426 - sys.exit(1) 427 - 428 - # group_size = top N per std-offset, max_bucket = cap per DST bucket 429 - generate_c_code(airports_list, args.out, group_size=args.top, max_bucket=args.max_bucket) 430 - 431 - 432 - if __name__ == "__main__": 433 - main()
-129
scripts/generate_tz_list.py
··· 1 - # Requires Python 3.9+ for zoneinfo 2 - import zoneinfo 3 - from datetime import datetime 4 - from os import path 5 - # Use shared DST tools 6 - from tz_common import find_dst_transitions as find_dst_transitions_accurate 7 - 8 - def generate_tz_list_c_code(): 9 - """Generates C code for a static timezone list with DST transition timestamps.""" 10 - 11 - target_year = datetime.now().year # Use current year for transitions 12 - print(f"Finding DST transitions for year {target_year}...") 13 - 14 - available_zones = zoneinfo.available_timezones() 15 - print(f"Found {len(available_zones)} available timezones.") 16 - 17 - processed_zones = {} # Key: TUPLE(std_offset_s, dst_offset_s, start_utc, end_utc), Value: Dict of zone data 18 - 19 - for tz_name in available_zones: 20 - # Basic filtering (no dead code here) 21 - if tz_name.startswith("Etc/") or "/" not in tz_name: continue 22 - if tz_name in ["Factory", "factory"] or tz_name.lower().startswith("right/") or tz_name.lower().startswith("posix/"): continue 23 - 24 - std_offset_s, dst_offset_s, start_utc, end_utc = find_dst_transitions_accurate(tz_name, target_year) 25 - city_name = tz_name.split('/')[-1].replace('_', ' ') 26 - 27 - # --- Filter out generic names --- 28 - # Comprehensive list based on review of tz_list.c 29 - generic_names_to_exclude = { 30 - "Samoa", "Hawaii", "Aleutian", "Alaska", "Pacific", "Arizona", "Yukon", 31 - "Mountain", "General", "Saskatchewan", "Central", "Knox IN", "EasterIsland", 32 - "Acre", "Jamaica", "Michigan", "Eastern", "East-Indiana", "Atlantic", 33 - "Continental", "Newfoundland", "East", "Bahia", "Noronha", "South Georgia", 34 - "Canary", "Faeroe", "Faroe", "Guernsey", "Isle of Man", "Jersey", 35 - "Madeira", "Jan Mayen", "West", "North", "South", "ACT", "NSW", 36 - "Tasmania", "Victoria", "Queensland", "Yap", "South Pole", "Kanton", 37 - # Add or remove names as needed 38 - } 39 - # Case-insensitive check for exclusion 40 - if city_name.lower() in {name.lower() for name in generic_names_to_exclude}: 41 - continue # Skip this generic name 42 - 43 - # Convert offsets back to hours for potential display, but keep seconds for key 44 - std_offset_h = std_offset_s / 3600.0 45 - dst_offset_h = dst_offset_s / 3600.0 46 - 47 - # Group by the unique combination of std offset, dst offset, and transitions 48 - key_tuple = (std_offset_s, dst_offset_s, start_utc, end_utc) 49 - if city_name and city_name[0].isupper(): 50 - if key_tuple not in processed_zones: 51 - processed_zones[key_tuple] = { 52 - "std_offset_s": std_offset_s, # Store seconds internally 53 - "dst_offset_s": dst_offset_s, 54 - "start_utc": start_utc, 55 - "end_utc": end_utc, 56 - "names": [] 57 - } 58 - # Add city name if not already present 59 - if city_name not in processed_zones[key_tuple]["names"]: 60 - processed_zones[key_tuple]["names"].append(city_name) 61 - 62 - # Convert dict values to a list and sort by std offset, then DST offset, then by DST start/end to keep consistent ordering 63 - tz_data_list = sorted( 64 - processed_zones.values(), 65 - key=lambda x: ( 66 - x["std_offset_s"], 67 - x["dst_offset_s"], 68 - x["start_utc"], 69 - x["end_utc"] 70 - ) 71 - ) 72 - print(f"Generated data for {len(tz_data_list)} unique offset/DST rule combinations.") 73 - 74 - # --- C Code Generation: Flatten name pool and tz_list entries --- 75 - # Build a flat pool of city names and compute offsets 76 - names_pool = [] 77 - for zone in tz_data_list: 78 - sorted_names = sorted(zone['names']) 79 - zone['name_offset'] = len(names_pool) 80 - zone['name_count'] = len(sorted_names) 81 - names_pool.extend(sorted_names) 82 - 83 - # Begin C output 84 - c_code = "// Generated by Python script using zoneinfo\n" 85 - c_code += f"// Contains Standard & DST offsets for {target_year}.\n" 86 - c_code += "// WARNING: DST rules accurate only for the generated year.\n\n" 87 - c_code += "#include <stdint.h>\n\n" 88 - 89 - # Flattened list of all city names 90 - c_code += "static const char* tz_name_pool[] = {\n" 91 - for name in names_pool: 92 - c_code += f" \"{name}\",\n" 93 - c_code += "};\n\n" 94 - 95 - # TzInfo struct with name pool indices 96 - c_code += "typedef struct {\n" 97 - c_code += " float std_offset_hours;\n" 98 - c_code += " float dst_offset_hours;\n" 99 - c_code += " int64_t dst_start_utc;\n" 100 - c_code += " int64_t dst_end_utc;\n" 101 - c_code += " int name_offset;\n" 102 - c_code += " int name_count;\n" 103 - c_code += "} TzInfo;\n\n" 104 - 105 - # Main tz_list entries 106 - c_code += "static const TzInfo tz_list[] = {\n" 107 - for zone in tz_data_list: 108 - std_h = zone['std_offset_s'] / 3600.0 109 - dst_h = zone['dst_offset_s'] / 3600.0 110 - start = zone['start_utc'] 111 - end = zone['end_utc'] 112 - offs = zone['name_offset'] 113 - cnt = zone['name_count'] 114 - c_code += f" {{ {std_h:.2f}f, {dst_h:.2f}f, {start}LL, {end}LL, {offs}, {cnt} }},\n" 115 - c_code += "};\n\n" 116 - c_code += f"#define TZ_LIST_COUNT (sizeof(tz_list)/sizeof(tz_list[0]))\n" 117 - c_code += f"#define TZ_NAME_POOL_COUNT (sizeof(tz_name_pool)/sizeof(tz_name_pool[0]))\n" 118 - return c_code 119 - 120 - # --- Main execution --- 121 - if __name__ == "__main__": 122 - c_code_output = generate_tz_list_c_code() 123 - output_filename = path.join(path.dirname(__file__), "../src/c/tz_list.c") # Default output path 124 - try: 125 - with open(output_filename, "w") as f: 126 - f.write(c_code_output) 127 - print(f"\nSuccessfully written timezone data (with accurate DST timestamps) to {output_filename}") 128 - except IOError as e: 129 - print(f"\nError: Could not write to file {output_filename}: {e}")
-348
scripts/temp_py_out.c
··· 1 - // Auto-generated by generate_airport_tz_list.py 2 - // Year-specific DST data for 2025 3 - 4 - #include <stdint.h> 5 - 6 - static const char airport_code_pool[] = 7 - "PPG" "HNL" "KOA" "LIH" "ITO" "MKK" "JHM" "LNY" 8 - "AKB" "NHV" "GMR" "ANC" "LAX" "SFO" "LAS" "SEA" 9 - "YVR" "SAN" "PDX" "OAK" "SJC" "SMF" "PHX" "DEN" 10 - "SLC" "YYC" "YEG" "ABQ" "ELP" "MEX" "ORD" "DFW" 11 - "IAH" "MSP" "MDW" "DAL" "STL" "BNA" "AUS" "IPC" 12 - "CUN" "HAV" "ATL" "JFK" "YYZ" "CLT" "MCO" "MIA" 13 - "EWR" "BOS" "DTW" "FLL" "MUN" "CGB" "MAO" "SXM" 14 - "CGR" "PVH" "BVB" "YHZ" "SCL" "YYT" "GRU" "CGH" 15 - "BSB" "GIG" "AEP" "EZE" "CNF" "VCP" "SDU" "POA" 16 - "FEN" "FSP" "GOH" "SID" "PDL" "TER" "DKR" "LHR" 17 - "LGW" "DUB" "MAN" "LIS" "STN" "LTN" "EDI" "LPA" 18 - "BHX" "CMN" "ALG" "CDG" "AMS" "FRA" "MAD" "BCN" 19 - "MUC" "FCO" "ZRH" "CPH" "PMI" "JNB" "CPT" "TLV" 20 - "BEY" "BZY" "ATH" "HEL" "OTP" "KBP" "HER" "SOF" 21 - "CAI" "IST" "SVO" "DOH" "JED" "SAW" "DME" "AYT" 22 - "RUH" "VKO" "LED" "THR" "MHD" "IKA" "SYZ" "AWZ" 23 - "KIH" "IFN" "TBZ" "BND" "PGU" "DXB" "AUH" "SHJ" 24 - "MRU" "TBS" "KUF" "RUN" "DWC" "ASF" "BUS" "KBL" 25 - "SVX" "UFA" "TJM" "SGC" "CEK" "PEE" "NUX" "REN" 26 - "NJC" "SLY" "DEL" "BOM" "BLR" "CCU" "MAA" "HYD" 27 - "COK" "PNQ" "AMD" "GOI" "KTM" "OMS" "RGN" "MDL" 28 - "CGK" "BKK" "DMK" "SGN" "HAN" "SUB" "HKT" "KNO" 29 - "DAD" "CNX" "PEK" "HKG" "PVG" "CAN" "SIN" "KUL" 30 - "CTU" "SZX" "TPE" "KMG" "EUC" "HND" "ICN" "NRT" 31 - "CJU" "KIX" "GMP" "FUK" "CTS" "OKA" "PUS" "DRW" 32 - "ASP" "AYQ" "ADL" "PLO" "MGB" "OLP" "BNE" "OOL" 33 - "CNS" "VVO" "KHV" "TSV" "SYD" "MEL" "CBR" "HBA" 34 - "LDH" "VLI" "NLK" "NAN" "PKC" "AKL" "CHC" "WLG" 35 - "ZQN" "NSN" "DUD" "NPE" "PMR" "CHT" "APW" "CXI" 36 - ; 37 - 38 - static const char* airport_name_pool[] = { 39 - "Pago Pago", 40 - "Daniel K Inouye", 41 - "Ellison Onizuka Kona International At Keahole", 42 - "Lihue", 43 - "Hilo", 44 - "Molokai", 45 - "Kapalua", 46 - "Lanai", 47 - "Atka", 48 - "Nuku Hiva", 49 - "Totegegie", 50 - "Ted Stevens Anchorage", 51 - "Los Angeles", 52 - "San Francisco", 53 - "Harry Reid", 54 - "Seattle-Tacoma", 55 - "Vancouver", 56 - "San Diego", 57 - "Portland", 58 - "Metro Oakland", 59 - "Norman Y Mineta San Jose", 60 - "Sacramento", 61 - "Phoenix Sky Harbor", 62 - "Denver", 63 - "Salt Lake City", 64 - "Calgary", 65 - "Edmonton", 66 - "Albuquerque International Sunport", 67 - "El Paso", 68 - "Licenciado Benito Juarez", 69 - "Chicago O'Hare", 70 - "Dallas-Fort Worth", 71 - "George Bush Intcntl/Houston", 72 - "Minneapolis-St Paul International/Wold-Chamberlain", 73 - "Chicago Midway", 74 - "Dallas Love Field", 75 - "St Louis Lambert", 76 - "Nashville", 77 - "Austin-Bergstrom", 78 - "Mataveri", 79 - "Cancun", 80 - "Jose Marti", 81 - "Hartsfield - Jackson Atlanta", 82 - "John F Kennedy", 83 - "Toronto Pearson", 84 - "Charlotte/Douglas", 85 - "Orlando", 86 - "Miami", 87 - "Newark Liberty", 88 - "General Edward Lawrence Logan", 89 - "Detroit Metro Wayne County", 90 - "Fort Lauderdale/Hollywood", 91 - "Maturin", 92 - "Marechal Rondon", 93 - "Eduardo Gomes", 94 - "Princess Juliana", 95 - "Campo Grande", 96 - "Governador Jorge Teixeira de Oliveira", 97 - "Atlas Brasil Cantanhede", 98 - "Halifax Robert L. Stanfield", 99 - "Comodoro Arturo Merino Benitez", 100 - "St. John's", 101 - "Guarulhos - Governador Andre Franco Montoro", 102 - "Congonhas", 103 - "Presidente Juscelino Kubistschek", 104 - "Galeao - Antonio Carlos Jobim", 105 - "Jorge Newbery Airpark", 106 - "Ministro Pistarini", 107 - "Tancredo Neves", 108 - "Viracopos", 109 - "Santos Dumont", 110 - "Salgado Filho", 111 - "Fernando de Noronha", 112 - "St Pierre", 113 - "Godthaab / Nuuk", 114 - "Amilcar Cabral", 115 - "João Paulo II", 116 - "Lajes Field", 117 - "Leopold Sedar Senghor", 118 - "London Heathrow", 119 - "London Gatwick", 120 - "Dublin", 121 - "Manchester", 122 - "Lisbon Portela", 123 - "London Stansted", 124 - "London Luton", 125 - "Edinburgh", 126 - "Gran Canaria", 127 - "Birmingham", 128 - "Mohammed V", 129 - "Houari Boumediene", 130 - "Charles de Gaulle", 131 - "Amsterdam Airport Schiphol", 132 - "Frankfurt am Main", 133 - "Madrid Barajas", 134 - "Barcelona", 135 - "Munich", 136 - "Leonardo Da Vinci (Fiumicino)", 137 - "Zurich", 138 - "Copenhagen Kastrup", 139 - "Palma De Mallorca", 140 - "O. R. Tambo", 141 - "Cape Town", 142 - "Ben Gurion", 143 - "Beirut Rafic Hariri", 144 - "Balti", 145 - "Eleftherios Venizelos", 146 - "Helsinki Vantaa", 147 - "Henri Coanda", 148 - "Boryspil", 149 - "Heraklion International Nikos Kazantzakis", 150 - "Sofia", 151 - "Cairo", 152 - "Istanbul", 153 - "Sheremetyevo", 154 - "Hamad", 155 - "King Abdulaziz", 156 - "Sabiha Gokcen", 157 - "Domodedovo", 158 - "Antalya", 159 - "King Khaled", 160 - "Vnukovo", 161 - "Pulkovo", 162 - "Mehrabad", 163 - "Mashhad", 164 - "Imam Khomeini", 165 - "Shiraz Shahid Dastghaib", 166 - "Ahwaz", 167 - "Kish", 168 - "Esfahan Shahid Beheshti", 169 - "Tabriz", 170 - "Bandar Abbas", 171 - "Persian Gulf", 172 - "Dubai", 173 - "Abu Dhabi", 174 - "Sharjah", 175 - "Sir Seewoosagur Ramgoolam", 176 - "Tbilisi", 177 - "Kurumoch", 178 - "Roland Garros", 179 - "Al Maktoum", 180 - "Astrakhan", 181 - "Batumi", 182 - "Kabul", 183 - "Koltsovo", 184 - "Ufa", 185 - "Roshchino", 186 - "Surgut", 187 - "Chelyabinsk Balandino", 188 - "Bolshoye Savino", 189 - "Novy Urengoy", 190 - "Orenburg Central", 191 - "Nizhnevartovsk", 192 - "Salekhard", 193 - "Indira Gandhi", 194 - "Chhatrapati Shivaji", 195 - "Bengaluru", 196 - "Netaji Subhash Chandra Bose", 197 - "Chennai", 198 - "Rajiv Gandhi International Airport Shamshabad", 199 - "Cochin", 200 - "Pune", 201 - "Sardar Vallabhbhai Patel", 202 - "Dabolim", 203 - "Tribhuvan", 204 - "Omsk Central", 205 - "Yangon", 206 - "Mandalay", 207 - "Soekarno-Hatta", 208 - "Suvarnabhumi", 209 - "Don Mueang", 210 - "Tan Son Nhat", 211 - "Noi Bai", 212 - "Juanda", 213 - "Phuket", 214 - "Polonia", 215 - "Da Nang", 216 - "Chiang Mai", 217 - "Beijing Capital", 218 - "Chek Lap Kok", 219 - "Shanghai Pudong", 220 - "Guangzhou Baiyun", 221 - "Singapore Changi", 222 - "Kuala Lumpur", 223 - "Chengdu Shuangliu", 224 - "Shenzhen Bao'an", 225 - "Taiwan Taoyuan", 226 - "Kunming Wujiaba", 227 - "Eucla", 228 - "Tokyo", 229 - "Incheon", 230 - "Narita", 231 - "Jeju", 232 - "Kansai", 233 - "Gimpo", 234 - "Fukuoka", 235 - "New Chitose", 236 - "Naha", 237 - "Gimhae", 238 - "Darwin", 239 - "Alice Springs", 240 - "Ayers Rock Connellan", 241 - "Adelaide", 242 - "Port Lincoln", 243 - "Mount Gambier", 244 - "Olympic Dam", 245 - "Brisbane", 246 - "Gold Coast", 247 - "Cairns", 248 - "Vladivostok", 249 - "Khabarovsk-Novy", 250 - "Townsville", 251 - "Sydney Kingsford Smith", 252 - "Melbourne", 253 - "Canberra", 254 - "Hobart", 255 - "Lord Howe Island", 256 - "Port Vila Bauerfield", 257 - "Norfolk Island", 258 - "Nadi", 259 - "Yelizovo", 260 - "Auckland", 261 - "Christchurch", 262 - "Wellington", 263 - "Queenstown", 264 - "Nelson", 265 - "Dunedin", 266 - "Napier", 267 - "Palmerston North", 268 - "Chatham Islands-Tuuta", 269 - "Faleolo", 270 - "Cassidy", 271 - }; 272 - 273 - typedef struct { 274 - float std_offset_hours; 275 - float dst_offset_hours; 276 - int64_t dst_start_utc; 277 - int64_t dst_end_utc; 278 - int name_offset; 279 - int name_count; 280 - } TzInfo; 281 - 282 - static const TzInfo airport_tz_list[] = { 283 - { -11.00f, -11.00f, 0LL, 0LL, 0, 1 }, 284 - { -10.00f, -10.00f, 0LL, 0LL, 1, 7 }, 285 - { -10.00f, -9.00f, 1741489200LL, 1762048800LL, 8, 1 }, 286 - { -9.50f, -9.50f, 0LL, 0LL, 9, 1 }, 287 - { -9.00f, -9.00f, 0LL, 0LL, 10, 1 }, 288 - { -9.00f, -8.00f, 1741489200LL, 1762048800LL, 11, 1 }, 289 - { -8.00f, -7.00f, 1741489200LL, 1762048800LL, 12, 10 }, 290 - { -7.00f, -7.00f, 0LL, 0LL, 22, 1 }, 291 - { -7.00f, -6.00f, 1741489200LL, 1762048800LL, 23, 6 }, 292 - { -6.00f, -6.00f, 0LL, 0LL, 29, 1 }, 293 - { -6.00f, -5.00f, 1741489200LL, 1762048800LL, 30, 9 }, 294 - { -6.00f, -5.00f, 1757199600LL, 1743890400LL, 39, 1 }, 295 - { -5.00f, -5.00f, 0LL, 0LL, 40, 1 }, 296 - { -5.00f, -4.00f, 1741482000LL, 1762045200LL, 41, 1 }, 297 - { -5.00f, -4.00f, 1741489200LL, 1762048800LL, 42, 10 }, 298 - { -4.00f, -4.00f, 0LL, 0LL, 52, 7 }, 299 - { -4.00f, -3.00f, 1741489200LL, 1762048800LL, 59, 1 }, 300 - { -4.00f, -3.00f, 1757206800LL, 1743897600LL, 60, 1 }, 301 - { -3.50f, -2.50f, 1741489200LL, 1762048800LL, 61, 1 }, 302 - { -3.00f, -3.00f, 0LL, 0LL, 62, 11 }, 303 - { -3.00f, -2.00f, 1741489200LL, 1762048800LL, 73, 1 }, 304 - { -2.00f, -2.00f, 0LL, 0LL, 74, 0 }, 305 - { -2.00f, -1.00f, 1743292800LL, 1761436800LL, 74, 1 }, 306 - { -1.00f, -1.00f, 0LL, 0LL, 75, 1 }, 307 - { -1.00f, 0.00f, 1743296400LL, 1761440400LL, 76, 2 }, 308 - { 0.00f, 0.00f, 0LL, 0LL, 78, 1 }, 309 - { 0.00f, 1.00f, 1743300000LL, 1761444000LL, 79, 10 }, 310 - { 0.00f, 1.00f, 1743908400LL, 1740279600LL, 89, 1 }, 311 - { 1.00f, 1.00f, 0LL, 0LL, 90, 1 }, 312 - { 1.00f, 2.00f, 1743303600LL, 1761447600LL, 91, 10 }, 313 - { 2.00f, 2.00f, 0LL, 0LL, 101, 2 }, 314 - { 2.00f, 3.00f, 1743130800LL, 1761444000LL, 103, 1 }, 315 - { 2.00f, 3.00f, 1743296400LL, 1761436800LL, 104, 1 }, 316 - { 2.00f, 3.00f, 1743303600LL, 1761447600LL, 105, 1 }, 317 - { 2.00f, 3.00f, 1743307200LL, 1761451200LL, 106, 6 }, 318 - { 2.00f, 3.00f, 1745542800LL, 1761868800LL, 112, 1 }, 319 - { 3.00f, 3.00f, 0LL, 0LL, 113, 10 }, 320 - { 3.50f, 3.50f, 0LL, 0LL, 123, 10 }, 321 - { 4.00f, 4.00f, 0LL, 0LL, 133, 10 }, 322 - { 4.50f, 4.50f, 0LL, 0LL, 143, 1 }, 323 - { 5.00f, 5.00f, 0LL, 0LL, 144, 10 }, 324 - { 5.50f, 5.50f, 0LL, 0LL, 154, 10 }, 325 - { 5.75f, 5.75f, 0LL, 0LL, 164, 1 }, 326 - { 6.00f, 6.00f, 0LL, 0LL, 165, 1 }, 327 - { 6.50f, 6.50f, 0LL, 0LL, 166, 2 }, 328 - { 7.00f, 7.00f, 0LL, 0LL, 168, 10 }, 329 - { 8.00f, 8.00f, 0LL, 0LL, 178, 10 }, 330 - { 8.75f, 8.75f, 0LL, 0LL, 188, 1 }, 331 - { 9.00f, 9.00f, 0LL, 0LL, 189, 10 }, 332 - { 9.50f, 9.50f, 0LL, 0LL, 199, 3 }, 333 - { 9.50f, 10.50f, 1759633200LL, 1743908400LL, 202, 4 }, 334 - { 10.00f, 10.00f, 0LL, 0LL, 206, 6 }, 335 - { 10.00f, 11.00f, 1759633200LL, 1743908400LL, 212, 4 }, 336 - { 10.50f, 11.00f, 1759633200LL, 1743904800LL, 216, 1 }, 337 - { 11.00f, 11.00f, 0LL, 0LL, 217, 1 }, 338 - { 11.00f, 12.00f, 1759633200LL, 1743908400LL, 218, 1 }, 339 - { 12.00f, 12.00f, 0LL, 0LL, 219, 2 }, 340 - { 12.00f, 13.00f, 1759028400LL, 1743908400LL, 221, 8 }, 341 - { 12.75f, 13.75f, 1759032000LL, 1743912000LL, 229, 1 }, 342 - { 13.00f, 13.00f, 0LL, 0LL, 230, 1 }, 343 - { 14.00f, 14.00f, 0LL, 0LL, 231, 1 }, 344 - }; 345 - 346 - #define AIRPORT_TZ_LIST_COUNT (sizeof(airport_tz_list)/sizeof(airport_tz_list[0])) 347 - #define AIRPORT_CODE_POOL_COUNT (sizeof(airport_code_pool)/3) 348 - #define AIRPORT_NAME_POOL_COUNT (sizeof(airport_name_pool)/sizeof(airport_name_pool[0]))
-104
scripts/tz_common.py
··· 1 - # tz_common.py 2 - """Common timezone utilities shared by both generators.""" 3 - from datetime import datetime, timedelta, timezone 4 - import zoneinfo 5 - from functools import lru_cache 6 - 7 - 8 - def get_tz_details(tz_name: str, dt_utc: datetime) -> tuple[int, timedelta] | None: 9 - """Return (offset_seconds, dst_timedelta) or None if the timezone is invalid.""" 10 - try: 11 - tz = zoneinfo.ZoneInfo(tz_name) 12 - offset_td = tz.utcoffset(dt_utc) 13 - dst_td = tz.dst(dt_utc) or timedelta(0) 14 - if offset_td is not None: 15 - return int(offset_td.total_seconds()), dst_td 16 - except Exception: 17 - pass 18 - return None 19 - 20 - 21 - @lru_cache(maxsize=None) 22 - def find_dst_transitions(tz_name: str, year: int) -> tuple[int, int, int, int]: 23 - """Return (std_offset_sec, dst_offset_sec, dst_start_utc_ts, dst_end_utc_ts). 24 - If the zone does not observe DST, std == dst and transition timestamps are 0. 25 - """ 26 - std_offset_sec = None 27 - dst_offset_sec = None 28 - start_ts = 0 29 - end_ts = 0 30 - 31 - # Start one hour before the year to catch boundary transitions 32 - current_dt = datetime(year, 1, 1, tzinfo=timezone.utc) - timedelta(hours=1) 33 - initial = get_tz_details(tz_name, current_dt) 34 - if not initial: 35 - return 0, 0, 0, 0 36 - 37 - prev_off, prev_dst = initial 38 - total_hours = (366 * 24) + 3 # cover leap year + buffer 39 - 40 - for _ in range(total_hours): 41 - current_dt += timedelta(hours=1) 42 - details = get_tz_details(tz_name, current_dt) 43 - if not details: 44 - continue 45 - cur_off, cur_dst = details 46 - 47 - # Track seen std/dst offsets 48 - if cur_dst == timedelta(0): 49 - std_offset_sec = cur_off 50 - else: 51 - dst_offset_sec = cur_off 52 - 53 - # Detect DST toggles 54 - if cur_dst != prev_dst: 55 - ts = int(current_dt.timestamp()) 56 - if current_dt.year == year: 57 - if prev_dst == timedelta(0) and cur_dst > timedelta(0): 58 - start_ts = ts 59 - elif prev_dst > timedelta(0) and cur_dst == timedelta(0): 60 - end_ts = ts 61 - prev_off, prev_dst = cur_off, cur_dst 62 - 63 - # Fallback if never set 64 - if std_offset_sec is None: 65 - std_offset_sec = prev_off 66 - if dst_offset_sec is None: 67 - dst_offset_sec = std_offset_sec 68 - 69 - # If offsets differ by less than 1 minute, treat as no DST 70 - if abs(std_offset_sec - dst_offset_sec) < 60: 71 - start_ts = 0 72 - end_ts = 0 73 - dst_offset_sec = std_offset_sec 74 - 75 - return std_offset_sec, dst_offset_sec, start_ts, end_ts 76 - 77 - # --- Main execution (for CLI testing) --- 78 - if __name__ == "__main__": 79 - import sys 80 - if len(sys.argv) != 3: 81 - print("Usage: python tz_common.py <timezone_name> <year>", file=sys.stderr) 82 - sys.exit(1) 83 - 84 - tz_name_arg = sys.argv[1] 85 - try: 86 - year_arg = int(sys.argv[2]) 87 - except ValueError: 88 - print(f"Error: Invalid year '{sys.argv[2]}'", file=sys.stderr) 89 - sys.exit(1) 90 - 91 - # Ensure zoneinfo is available if needed by get_tz_details 92 - try: 93 - import zoneinfo 94 - except ImportError: 95 - print("Error: Python 3.9+ with zoneinfo is required to run this script directly.", file=sys.stderr) 96 - sys.exit(1) 97 - 98 - try: 99 - result = find_dst_transitions(tz_name_arg, year_arg) 100 - # Print tuple directly for easy parsing 101 - print(result) 102 - except Exception as e: 103 - print(f"Error processing {tz_name_arg} for {year_arg}: {e}", file=sys.stderr) 104 - sys.exit(1)