30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180 | class Population:
"""
A class that represents population data for a specific year and area.
This class facilitates the representation and manipulation of population data.
It offers methods to initialize population arrays and provides easy access to
various population metrics such as age, household type, sex, etc.
Attributes:
year (int): The year for which the population data is represented.
area (str): The geographical area for which the population data is represented.
variables (List[str]): List of variables associated with the population data.
variable_categories (List[str]): List of categories for each variable.
comment (str): Additional comments or notes associated with the population data.
array_age_household_sex (np.ndarray): 3D array representing population data by age, household, and sex.
array_age_sex (np.ndarray): 2D array representing population data by age and sex.
array_age_household (np.ndarray): 2D array representing population data by age and household.
Other arrays...
Class Attributes:
instances (List[Population]): List to keep track of all instances of the Population class.
Example:
>>> population_2022_haga = Population(2023, "Haga")
>>> print(population_2022_haga.year)
2022
>>> print(population_2022_haga.area)
"Haga"
"""
instances: List['Population'] = []
def __init__(self, year: int, area: str) -> None:
"""
Initializes the Population object by fetching the relevant data.
Parameters:
- year (int): The year for which the population data is required.
- area (str): The geographical area for which the population data is required.
Returns:
None
"""
logger.info(f"Initializing Population object for year {year} and area {area}")
result: Dict[str, Any] = fetcher.fetch_population_data(year, area)
data, year, area, variables, variable_categories, comment = self._get_population_params(result)
self.year: int = year
self.area: str = area
self.variables: List[str] = variables
self.variable_categories: List[str] = variable_categories
self.comment: str = comment
# Initialize arrays
self.array_age_household_sex: np.ndarray = np.array([])
self.array_age_sex: np.ndarray = np.array([])
self.array_age_household: np.ndarray = np.array([])
self.array_sex_household: np.ndarray = np.array([])
self.array_age_household_male: np.ndarray = np.array([])
self.array_age_household_female: np.ndarray = np.array([])
self.total_population: int = 0
self.instances.append(self)
self._instantiate(data)
@classmethod
def clear_instances(cls):
cls.instances = []
@classmethod
def info(cls):
info = {
"description": "A population class with information about the population in a given area and year",
"year": cls.instances[0].year,
"area": cls.instances[0].area,
"variables": cls.instances[0].variables,
"variable_categories": cls.instances[0].variable_categories,
"comment": cls.instances[0].comment,
"total_population": cls.instances[0].total_population
}
return info
def _create_2d_array(self, data, dimension1, dimension2, dimension3=None):
categories_dim1 = np.unique([item['key'][dimension1] for item in data])
categories_dim2 = np.unique([item['key'][dimension2] for item in data])
if dimension3!=None:
categories_dim3 = np.unique([item['key'][dimension3] for item in data]) if dimension3 else [None]
array_2d = np.zeros((len(categories_dim1), len(categories_dim2), len(categories_dim3)), dtype=int)
for item in data:
if not dimension3 or item['key'][dimension3] in categories_dim3:
index_dim1 = np.where(categories_dim1 == item['key'][dimension1])[0][0]
index_dim2 = np.where(categories_dim2 == item['key'][dimension2])[0][0]
index_dim3 = np.where(categories_dim3 == item['key'][dimension3])[0][0] if dimension3 else 0
array_2d[index_dim1, index_dim2, index_dim3] += int(item['values'][0])
else:
categories_dim3 = [None]
array_2d = np.zeros((len(categories_dim1), len(categories_dim2)), dtype=int)
for item in data:
index_dim1 = np.where(categories_dim1 == item['key'][dimension1])[0][0]
index_dim2 = np.where(categories_dim2 == item['key'][dimension2])[0][0]
array_2d[index_dim1, index_dim2] += int(item['values'][0])
return array_2d
def _instantiate(self, data: Dict[str, Any]) -> None:
self.array_age_sex = self._create_2d_array(data, dimension1=1, dimension2=2)
self.array_age_household = self._create_2d_array(data, dimension1=1, dimension2=3)
self.array_sex_household = self._create_2d_array(data, dimension1=2, dimension2=3)
self.array_age_household_sex = self._create_2d_array(data, dimension1=1, dimension2=3, dimension3=2)
self.array_age_household_female = self.array_age_household_sex[:,:,0]
self.array_age_household_male = self.array_age_household_sex[:,:,1]
# Calculate the total population by summing all the values in each array
total_alder_kon = np.sum(self.array_age_sex)
total_alder_hushall = np.sum(self.array_age_household)
total_kon_hushall = np.sum(self.array_sex_household)
# Checking if the totals are equal
if total_alder_kon == total_alder_hushall and total_alder_kon == total_kon_hushall:
self.total_population = total_alder_kon
else:
self.total_population = None
logger.info("Total population could not be calculated")
def _get_population_params(self, result: Dict[str, Any]) -> Tuple:
"""
Extracts population parameters from the given result.
Parameters:
- result (Dict[str, Any]): The fetched result containing population data.
Returns:
Tuple: Extracted data, year, area, variables, variable_categories, and comment.
"""
json_data = result
# Extract the relevant data from the JSON
data = json_data["data"]
logger.info(data)
# Extract additional attributes
year = data[0]['key'][4]
area = data[0]['key'][0]
variables = [column['code'] for column in json_data['columns'] if column['type'] == 'd']
variable_categories = {
column['code']: np.unique([item['key'][i] for item in data])
for i, column in enumerate(json_data['columns']) if column['type'] == 'd'
}
comment = json_data['columns'][3]['comment']
return data, year, area, variables, variable_categories, comment
|