Concept Note on Inclusive Nepal (WIP) and Analysis of usage of Alt text in Nepali newspapers

This is a work in progress

This notebook compiles statistics, resources and insights on:

1. Status of Disability in Nepal
2. Nepali Mobile Apps and Web App penetration
3. Most used Nepali Mobile Apps
3. Most used Nepali Mobile Apps accessibility status
4. Comparitive analysis with other countries
5. Potential Solutions for few apps
6. Prototype and Tools suggestions for Accessibility
7. Published work regarding similar research

alt text

Imports

# Pandas
import pandas as pd
#Mapbox
from mapboxgl.utils import *
from mapboxgl.viz import *

Status of Disability in Nepal

df = pd.read_csv("data_sources/disability_2011.csv")
df.head()

District Male Female Population with Disability Percentage of PWDs
0 Kathmandu 9144 7978 17122 0.98
1 Morang 9494 7559 17053 1.77
2 Kailali 8143 7235 15378 1.98
3 Jhapa 8466 6910 15376 1.89
4 Chitwan 6973 5964 12937 2.23
df.head()

District Male Female Population with Disability Percentage of PWDs
0 Kathmandu 9144 7978 17122 0.98
1 Morang 9494 7559 17053 1.77
2 Kailali 8143 7235 15378 1.98
3 Jhapa 8466 6910 15376 1.89
4 Chitwan 6973 5964 12937 2.23
df_geojson = pd.read_json('data_sources/map.geojson')
df_geojson.head()

type features
0 FeatureCollection {'properties': {'name': 'Humla', 'death': 1}, ...
1 FeatureCollection {'properties': {'name': 'Darchula', 'death': 2...
2 FeatureCollection {'properties': {'name': 'Bajhang', 'death': 3}...
3 FeatureCollection {'properties': {'name': 'Mugu'}, 'geometry': {...
4 FeatureCollection {'properties': {'name': 'Bajura'}, 'geometry':...
df.set_index("District", inplace=True)
def add_data(features):
    name = features.get('properties').get('name')
    if (name in df.index):
        percentage = df.loc[name]['Percentage of PWDs']
        population = df.loc[name]['Population with Disability']
        features['properties'] = {'name': name, 'percentage': percentage, 'population':population}
        return features
df_geojson.features.apply(add_data)
0     {'properties': {'name': 'Humla', 'percentage':...
1     {'properties': {'name': 'Darchula', 'percentag...
2     {'properties': {'name': 'Bajhang', 'percentage...
3     {'properties': {'name': 'Mugu', 'percentage': ...
4     {'properties': {'name': 'Bajura', 'percentage'...
                            ...                        
70    {'properties': {'name': 'Siraha', 'percentage'...
71    {'properties': {'name': 'Saptari', 'percentage...
72    {'properties': {'name': 'Morang', 'percentage'...
73    {'properties': {'name': 'Sunsari', 'percentage...
74    {'properties': {'name': 'Jhapa', 'percentage':...
Name: features, Length: 75, dtype: object
features = df_geojson['features'].values.tolist()
my_dict = {"type":"FeatureCollection", "features":features}
token  = "pk................................................................"
viz = ChoroplethViz(my_dict, 
                     access_token=token,
                     color_property='population',
                     color_stops=create_color_stops([0, 2500, 5000, 7500, 10000, 12500], colors='YlOrRd'),
                     color_function_type='interpolate',
                     line_stroke='--',
                     line_color='rgb(128,0,38)',
                     line_width=1,
                     opacity=0.8,
                     center=(84, 28.5), 
                     zoom=6
                    )
viz.show()

Percentage and Population of disabled in each district of Nepal

viz = ChoroplethViz(my_dict, 
                     access_token=token,
                     color_property='percentage',
                     color_stops=create_color_stops([0, 1, 2, 3, 4], colors='YlOrRd'),
                     color_function_type='interpolate',
                     line_stroke='--',
                     line_color='rgb(128,0,38)',
                     line_width=1,
                     opacity=0.8,
                     center=(84, 28.5), 
                     zoom=6
                    )
viz.show()

Districts with most number of Disability (Sorted 5)

df.sort_values(['Population with Disability'], ascending=False).head()

Male Female Population with Disability Percentage of PWDs
District
Kathmandu 9144 7978 17122 0.98
Morang 9494 7559 17053 1.77
Kailali 8143 7235 15378 1.98
Jhapa 8466 6910 15376 1.89
Chitwan 6973 5964 12937 2.23
import pandas as pd
from bs4 import BeautifulSoup
from urllib.request import Request, urlopen
import seaborn as sns
site = "https://www.onlinekhabar.com/"
hdr = {'User-Agent': 'Mozilla/5.0'}
req = Request(site,headers=hdr)
page = urlopen(req)
soup = BeautifulSoup(page, "lxml")
image_count = 0
alt_count = 0 
collection = soup.findAll("img")
for img in collection:
    image_count = image_count + 1
    if 'alt' in img.attrs:
        alt_count = alt_count + 1
print ("image_count", image_count) 
print ("alt_count", alt_count) 
print ("alt_count_percent", alt_count/image_count*100)
image_count 154
alt_count 1
alt_count_percent 0.6493506493506493
df = pd.read_csv('data_sources/news_portals.csv')
df

Portal Link
0 Online Khabar https://www.onlinekhabar.com/
1 eKantipur http://www.ekantipur.com/
2 Setopati https://www.setopati.com/
3 The Himalayan Times https://thehimalayantimes.com/
4 My Republica https://myrepublica.nagariknetwork.com/
5 Nepal News https://www.nepalnews.com/
6 Gorkhapatra http://www.gorkhapatraonline.com/
7 Nepali Times https://www.nepalitimes.com/
def alt_counter(site):
    try:
        hdr = {'User-Agent': 'Mozilla/5.0'}
        req = Request(site, headers=hdr)
        page = urlopen(req)
        soup = BeautifulSoup(page, "lxml")
        image_count = 0
        alt_count = 0 
        collection = soup.findAll("img")
        for img in collection:
            image_count = image_count + 1
            if 'alt' in img.attrs:
                alt_count = alt_count + 1
        return (alt_count, image_count)
    except:
        return (None, None)
df['alt_count'],df['image_count']=zip(*df.Link.apply(alt_counter))
df

Portal Link alt_count image_count
0 Online Khabar https://www.onlinekhabar.com/ 1 154
1 eKantipur http://www.ekantipur.com/ 129 147
2 Setopati https://www.setopati.com/ 121 122
3 The Himalayan Times https://thehimalayantimes.com/ 137 138
4 My Republica https://myrepublica.nagariknetwork.com/ 94 95
5 Nepal News https://www.nepalnews.com/ 198 212
6 Gorkhapatra http://www.gorkhapatraonline.com/ None None
7 Nepali Times https://www.nepalitimes.com/ 71 74
df['percent'] = 100*df['alt_count']/df['image_count']
df

Portal Link alt_count image_count percent
0 Online Khabar https://www.onlinekhabar.com/ 1 154 0.649351
1 eKantipur http://www.ekantipur.com/ 129 147 87.7551
2 Setopati https://www.setopati.com/ 121 122 99.1803
3 The Himalayan Times https://thehimalayantimes.com/ 137 138 99.2754
4 My Republica https://myrepublica.nagariknetwork.com/ 94 95 98.9474
5 Nepal News https://www.nepalnews.com/ 198 212 93.3962
6 Gorkhapatra http://www.gorkhapatraonline.com/ None None NaN
7 Nepali Times https://www.nepalitimes.com/ 71 74 95.9459
sns.barplot(y='Portal', x='percent', data=df)

output image